1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA512 block routine. See sha512block.go for Go equivalent.
10
11 #define REGTMP R30
12 #define REGTMP1 R16
13 #define REGTMP2 R17
14 #define REGTMP3 R18
15 #define REGTMP4 R7
16 #define REGTMP5 R6
17
18 // W[i] = M[i]; for 0 <= i <= 15
19 #define LOAD0(index) \
20 MOVV (index*8)(R5), REGTMP4; \
21 REVBV REGTMP4, REGTMP4; \
22 MOVV REGTMP4, (index*8)(R3)
23
24 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
25 // SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
26 // SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
27 #define LOAD1(index) \
28 MOVV (((index-2)&0xf)*8)(R3), REGTMP4; \
29 MOVV (((index-15)&0xf)*8)(R3), REGTMP1; \
30 MOVV (((index-7)&0xf)*8)(R3), REGTMP; \
31 MOVV REGTMP4, REGTMP2; \
32 MOVV REGTMP4, REGTMP3; \
33 ROTRV $19, REGTMP4; \
34 ROTRV $61, REGTMP2; \
35 SRLV $6, REGTMP3; \
36 XOR REGTMP2, REGTMP4; \
37 XOR REGTMP3, REGTMP4; \
38 ROTRV $1, REGTMP1, REGTMP5; \
39 SRLV $7, REGTMP1, REGTMP2; \
40 ROTRV $8, REGTMP1; \
41 ADDV REGTMP, REGTMP4; \
42 MOVV (((index-16)&0xf)*8)(R3), REGTMP; \
43 XOR REGTMP1, REGTMP5; \
44 XOR REGTMP2, REGTMP5; \
45 ADDV REGTMP, REGTMP5; \
46 ADDV REGTMP5, REGTMP4; \
47 MOVV REGTMP4, ((index&0xf)*8)(R3)
48
49 // h is also used as an accumulator. Wt is passed in REGTMP4.
50 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
51 // BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
52 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
53 // = ((y XOR z) AND x) XOR z
54 // Calculate T1 in REGTMP4
55 #define SHA512T1(const, e, f, g, h) \
56 ADDV $const, h; \
57 ADDV REGTMP4, h; \
58 ROTRV $14, e, REGTMP5; \
59 ROTRV $18, e, REGTMP; \
60 ROTRV $41, e, REGTMP3; \
61 XOR f, g, REGTMP2; \
62 XOR REGTMP, REGTMP5; \
63 AND e, REGTMP2; \
64 XOR REGTMP5, REGTMP3; \
65 XOR g, REGTMP2; \
66 ADDV REGTMP3, h; \
67 ADDV h, REGTMP2, REGTMP4
68
69 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
70 // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
71 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
72 // = ((y XOR z) AND x) XOR (y AND z)
73 // Calculate T2 in REGTMP1
74 #define SHA512T2(a, b, c) \
75 ROTRV $28, a, REGTMP5; \
76 ROTRV $34, a, REGTMP3; \
77 ROTRV $39, a, REGTMP2; \
78 XOR b, c, REGTMP; \
79 AND b, c, REGTMP1; \
80 XOR REGTMP3, REGTMP5; \
81 AND REGTMP, a, REGTMP; \
82 XOR REGTMP2, REGTMP5; \
83 XOR REGTMP, REGTMP1; \
84 ADDV REGTMP5, REGTMP1
85
86 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
87 // The values for e and a are stored in d and h, ready for rotation.
88 #define SHA512ROUND(const, a, b, c, d, e, f, g, h) \
89 SHA512T1(const, e, f, g, h); \
90 SHA512T2(a, b, c); \
91 ADDV REGTMP4, d; \
92 ADDV REGTMP1, REGTMP4, h
93
94 #define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
95 LOAD0(index); \
96 SHA512ROUND(const, a, b, c, d, e, f, g, h)
97
98 #define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
99 LOAD1(index); \
100 SHA512ROUND(const, a, b, c, d, e, f, g, h)
101
102 // A stack frame size of 128 bytes is required here, because
103 // the frame size used for data expansion is 128 bytes.
104 // See the definition of the macro LOAD1 above (8 bytes * 16 entries).
105 //
106 // func block(dig *Digest, p []byte)
107 TEXT ·block(SB),NOSPLIT,$128-32
108 MOVV p_len+16(FP), R6
109 MOVV p_base+8(FP), R5
110 AND $~127, R6
111 BEQ R6, end
112
113 // p_len >= 128
114 MOVV dig+0(FP), R4
115 ADDV R5, R6, R25
116 MOVV (0*8)(R4), R8 // a = H0
117 MOVV (1*8)(R4), R9 // b = H1
118 MOVV (2*8)(R4), R10 // c = H2
119 MOVV (3*8)(R4), R11 // d = H3
120 MOVV (4*8)(R4), R12 // e = H4
121 MOVV (5*8)(R4), R13 // f = H5
122 MOVV (6*8)(R4), R14 // g = H6
123 MOVV (7*8)(R4), R15 // h = H7
124
125 loop:
126 SHA512ROUND0( 0, 0x428a2f98d728ae22, R8, R9, R10, R11, R12, R13, R14, R15)
127 SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8, R9, R10, R11, R12, R13, R14)
128 SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8, R9, R10, R11, R12, R13)
129 SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8, R9, R10, R11, R12)
130 SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8, R9, R10, R11)
131 SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8, R9, R10)
132 SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8, R9)
133 SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9, R10, R11, R12, R13, R14, R15, R8)
134 SHA512ROUND0( 8, 0xd807aa98a3030242, R8, R9, R10, R11, R12, R13, R14, R15)
135 SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8, R9, R10, R11, R12, R13, R14)
136 SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8, R9, R10, R11, R12, R13)
137 SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8, R9, R10, R11, R12)
138 SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8, R9, R10, R11)
139 SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8, R9, R10)
140 SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8, R9)
141 SHA512ROUND0(15, 0xc19bf174cf692694, R9, R10, R11, R12, R13, R14, R15, R8)
142
143 SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8, R9, R10, R11, R12, R13, R14, R15)
144 SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8, R9, R10, R11, R12, R13, R14)
145 SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8, R9, R10, R11, R12, R13)
146 SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8, R9, R10, R11, R12)
147 SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8, R9, R10, R11)
148 SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8, R9, R10)
149 SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8, R9)
150 SHA512ROUND1(23, 0x76f988da831153b5, R9, R10, R11, R12, R13, R14, R15, R8)
151 SHA512ROUND1(24, 0x983e5152ee66dfab, R8, R9, R10, R11, R12, R13, R14, R15)
152 SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8, R9, R10, R11, R12, R13, R14)
153 SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8, R9, R10, R11, R12, R13)
154 SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8, R9, R10, R11, R12)
155 SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8, R9, R10, R11)
156 SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8, R9, R10)
157 SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8, R9)
158 SHA512ROUND1(31, 0x142929670a0e6e70, R9, R10, R11, R12, R13, R14, R15, R8)
159 SHA512ROUND1(32, 0x27b70a8546d22ffc, R8, R9, R10, R11, R12, R13, R14, R15)
160 SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8, R9, R10, R11, R12, R13, R14)
161 SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8, R9, R10, R11, R12, R13)
162 SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8, R9, R10, R11, R12)
163 SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8, R9, R10, R11)
164 SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8, R9, R10)
165 SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8, R9)
166 SHA512ROUND1(39, 0x92722c851482353b, R9, R10, R11, R12, R13, R14, R15, R8)
167 SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8, R9, R10, R11, R12, R13, R14, R15)
168 SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8, R9, R10, R11, R12, R13, R14)
169 SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8, R9, R10, R11, R12, R13)
170 SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8, R9, R10, R11, R12)
171 SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8, R9, R10, R11)
172 SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8, R9, R10)
173 SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8, R9)
174 SHA512ROUND1(47, 0x106aa07032bbd1b8, R9, R10, R11, R12, R13, R14, R15, R8)
175 SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8, R9, R10, R11, R12, R13, R14, R15)
176 SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8, R9, R10, R11, R12, R13, R14)
177 SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8, R9, R10, R11, R12, R13)
178 SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8, R9, R10, R11, R12)
179 SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8, R9, R10, R11)
180 SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8, R9, R10)
181 SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8, R9)
182 SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9, R10, R11, R12, R13, R14, R15, R8)
183 SHA512ROUND1(56, 0x748f82ee5defb2fc, R8, R9, R10, R11, R12, R13, R14, R15)
184 SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8, R9, R10, R11, R12, R13, R14)
185 SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8, R9, R10, R11, R12, R13)
186 SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8, R9, R10, R11, R12)
187 SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8, R9, R10, R11)
188 SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8, R9, R10)
189 SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8, R9)
190 SHA512ROUND1(63, 0xc67178f2e372532b, R9, R10, R11, R12, R13, R14, R15, R8)
191 SHA512ROUND1(64, 0xca273eceea26619c, R8, R9, R10, R11, R12, R13, R14, R15)
192 SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8, R9, R10, R11, R12, R13, R14)
193 SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8, R9, R10, R11, R12, R13)
194 SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8, R9, R10, R11, R12)
195 SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8, R9, R10, R11)
196 SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8, R9, R10)
197 SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8, R9)
198 SHA512ROUND1(71, 0x1b710b35131c471b, R9, R10, R11, R12, R13, R14, R15, R8)
199 SHA512ROUND1(72, 0x28db77f523047d84, R8, R9, R10, R11, R12, R13, R14, R15)
200 SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8, R9, R10, R11, R12, R13, R14)
201 SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8, R9, R10, R11, R12, R13)
202 SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8, R9, R10, R11, R12)
203 SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8, R9, R10, R11)
204 SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8, R9, R10)
205 SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8, R9)
206 SHA512ROUND1(79, 0x6c44198c4a475817, R9, R10, R11, R12, R13, R14, R15, R8)
207
208 MOVV (0*8)(R4), REGTMP
209 MOVV (1*8)(R4), REGTMP1
210 MOVV (2*8)(R4), REGTMP2
211 MOVV (3*8)(R4), REGTMP3
212 ADDV REGTMP, R8 // H0 = a + H0
213 ADDV REGTMP1, R9 // H1 = b + H1
214 ADDV REGTMP2, R10 // H2 = c + H2
215 ADDV REGTMP3, R11 // H3 = d + H3
216 MOVV R8, (0*8)(R4)
217 MOVV R9, (1*8)(R4)
218 MOVV R10, (2*8)(R4)
219 MOVV R11, (3*8)(R4)
220 MOVV (4*8)(R4), REGTMP
221 MOVV (5*8)(R4), REGTMP1
222 MOVV (6*8)(R4), REGTMP2
223 MOVV (7*8)(R4), REGTMP3
224 ADDV REGTMP, R12 // H4 = e + H4
225 ADDV REGTMP1, R13 // H5 = f + H5
226 ADDV REGTMP2, R14 // H6 = g + H6
227 ADDV REGTMP3, R15 // H7 = h + H7
228 MOVV R12, (4*8)(R4)
229 MOVV R13, (5*8)(R4)
230 MOVV R14, (6*8)(R4)
231 MOVV R15, (7*8)(R4)
232
233 ADDV $128, R5
234 BNE R5, R25, loop
235
236 end:
237 RET
238
View as plain text