1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA256 block routine. See sha256block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // W[i] = M[i]; for 0 <= i <= 15
16 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for i = 0 to 63 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 #define REGTMP R30
50 #define REGTMP1 R16
51 #define REGTMP2 R17
52 #define REGTMP3 R18
53 #define REGTMP4 R7
54 #define REGTMP5 R6
55
56 // W[i] = M[i]; for 0 <= i <= 15
57 #define LOAD0(index) \
58 MOVW (index*4)(R5), REGTMP4; \
59 REVB2W REGTMP4, REGTMP4; \
60 MOVW REGTMP4, (index*4)(R3)
61
62 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
63 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
64 // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
65 #define LOAD1(index) \
66 MOVW (((index-2)&0xf)*4)(R3), REGTMP4; \
67 MOVW (((index-15)&0xf)*4)(R3), REGTMP1; \
68 MOVW (((index-7)&0xf)*4)(R3), REGTMP; \
69 MOVW REGTMP4, REGTMP2; \
70 MOVW REGTMP4, REGTMP3; \
71 ROTR $17, REGTMP4; \
72 ROTR $19, REGTMP2; \
73 SRL $10, REGTMP3; \
74 XOR REGTMP2, REGTMP4; \
75 XOR REGTMP3, REGTMP4; \
76 ROTR $7, REGTMP1, REGTMP5; \
77 SRL $3, REGTMP1, REGTMP3; \
78 ROTR $18, REGTMP1, REGTMP2; \
79 ADD REGTMP, REGTMP4; \
80 MOVW (((index-16)&0xf)*4)(R3), REGTMP; \
81 XOR REGTMP3, REGTMP5; \
82 XOR REGTMP2, REGTMP5; \
83 ADD REGTMP, REGTMP5; \
84 ADD REGTMP5, REGTMP4; \
85 MOVW REGTMP4, ((index&0xf)*4)(R3)
86
87 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
88 // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
89 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
90 // = ((y XOR z) AND x) XOR z
91 // Calculate T1 in REGTMP4
92 #define SHA256T1(const, e, f, g, h) \
93 ADDV $const, h; \
94 ADD REGTMP4, h; \
95 ROTR $6, e, REGTMP5; \
96 ROTR $11, e, REGTMP; \
97 ROTR $25, e, REGTMP3; \
98 XOR f, g, REGTMP2; \
99 XOR REGTMP, REGTMP5; \
100 AND e, REGTMP2; \
101 XOR REGTMP5, REGTMP3; \
102 XOR g, REGTMP2; \
103 ADD REGTMP3, h; \
104 ADD h, REGTMP2, REGTMP4
105
106 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
107 // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
108 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
109 // = ((y XOR z) AND x) XOR (y AND z)
110 // Calculate T2 in REGTMP1
111 #define SHA256T2(a, b, c) \
112 ROTR $2, a, REGTMP5; \
113 ROTR $13, a, REGTMP3; \
114 ROTR $22, a, REGTMP2; \
115 XOR b, c, REGTMP; \
116 AND b, c, REGTMP1; \
117 XOR REGTMP3, REGTMP5; \
118 AND REGTMP, a, REGTMP; \
119 XOR REGTMP2, REGTMP5; \
120 XOR REGTMP, REGTMP1; \
121 ADD REGTMP5, REGTMP1
122
123 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
124 // The values for e and a are stored in d and h, ready for rotation.
125 #define SHA256ROUND(const, a, b, c, d, e, f, g, h) \
126 SHA256T1(const, e, f, g, h); \
127 SHA256T2(a, b, c); \
128 ADD REGTMP4, d; \
129 ADD REGTMP1, REGTMP4, h
130
131 #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
132 LOAD0(index); \
133 SHA256ROUND(const, a, b, c, d, e, f, g, h)
134
135 #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
136 LOAD1(index); \
137 SHA256ROUND(const, a, b, c, d, e, f, g, h)
138
139 // A stack frame size of 64 bytes is required here, because
140 // the frame size used for data expansion is 64 bytes.
141 // See the definition of the macro LOAD1 above (4 bytes * 16 entries).
142 //
143 //func block(dig *Digest, p []byte)
144 TEXT ·block(SB),NOSPLIT,$64-32
145 MOVV p_base+8(FP), R5
146 MOVV p_len+16(FP), R6
147 AND $~63, R6
148 BEQ R6, end
149
150 // p_len >= 64
151 MOVV dig+0(FP), R4
152 ADDV R5, R6, R25
153 MOVW (0*4)(R4), R8 // a = H0
154 MOVW (1*4)(R4), R9 // b = H1
155 MOVW (2*4)(R4), R10 // c = H2
156 MOVW (3*4)(R4), R11 // d = H3
157 MOVW (4*4)(R4), R12 // e = H4
158 MOVW (5*4)(R4), R13 // f = H5
159 MOVW (6*4)(R4), R14 // g = H6
160 MOVW (7*4)(R4), R15 // h = H7
161
162 loop:
163 SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15)
164 SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14)
165 SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13)
166 SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12)
167 SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11)
168 SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10)
169 SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9)
170 SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8)
171 SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15)
172 SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14)
173 SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13)
174 SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12)
175 SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11)
176 SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10)
177 SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9)
178 SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8)
179
180 SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15)
181 SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14)
182 SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13)
183 SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12)
184 SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11)
185 SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10)
186 SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9)
187 SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8)
188 SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15)
189 SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14)
190 SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13)
191 SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12)
192 SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11)
193 SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10)
194 SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9)
195 SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8)
196 SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15)
197 SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14)
198 SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13)
199 SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12)
200 SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11)
201 SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10)
202 SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9)
203 SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8)
204 SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15)
205 SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14)
206 SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13)
207 SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12)
208 SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11)
209 SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10)
210 SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9)
211 SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8)
212 SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15)
213 SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14)
214 SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13)
215 SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12)
216 SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11)
217 SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10)
218 SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9)
219 SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8)
220 SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15)
221 SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14)
222 SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13)
223 SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12)
224 SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11)
225 SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10)
226 SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9)
227 SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8)
228
229 MOVW (0*4)(R4), REGTMP
230 MOVW (1*4)(R4), REGTMP1
231 MOVW (2*4)(R4), REGTMP2
232 MOVW (3*4)(R4), REGTMP3
233 ADD REGTMP, R8 // H0 = a + H0
234 ADD REGTMP1, R9 // H1 = b + H1
235 ADD REGTMP2, R10 // H2 = c + H2
236 ADD REGTMP3, R11 // H3 = d + H3
237 MOVW R8, (0*4)(R4)
238 MOVW R9, (1*4)(R4)
239 MOVW R10, (2*4)(R4)
240 MOVW R11, (3*4)(R4)
241 MOVW (4*4)(R4), REGTMP
242 MOVW (5*4)(R4), REGTMP1
243 MOVW (6*4)(R4), REGTMP2
244 MOVW (7*4)(R4), REGTMP3
245 ADD REGTMP, R12 // H4 = e + H4
246 ADD REGTMP1, R13 // H5 = f + H5
247 ADD REGTMP2, R14 // H6 = g + H6
248 ADD REGTMP3, R15 // H7 = h + H7
249 MOVW R12, (4*4)(R4)
250 MOVW R13, (5*4)(R4)
251 MOVW R14, (6*4)(R4)
252 MOVW R15, (7*4)(R4)
253
254 ADDV $64, R5
255 BNE R5, R25, loop
256
257 end:
258 RET
259
View as plain text