1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA512 block routine. See sha512block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // Wt = Mt; for 0 <= t <= 15
16 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for t = 0 to 79 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 // Wt = Mt; for 0 <= t <= 15
50 #define MSGSCHEDULE0(index) \
51 MOVBU ((index*8)+0)(X29), X5; \
52 MOVBU ((index*8)+1)(X29), X6; \
53 MOVBU ((index*8)+2)(X29), X7; \
54 MOVBU ((index*8)+3)(X29), X8; \
55 SLL $56, X5; \
56 SLL $48, X6; \
57 OR X5, X6, X5; \
58 SLL $40, X7; \
59 OR X5, X7, X5; \
60 SLL $32, X8; \
61 OR X5, X8, X5; \
62 MOVBU ((index*8)+4)(X29), X9; \
63 MOVBU ((index*8)+5)(X29), X6; \
64 MOVBU ((index*8)+6)(X29), X7; \
65 MOVBU ((index*8)+7)(X29), X8; \
66 SLL $24, X9; \
67 OR X5, X9, X5; \
68 SLL $16, X6; \
69 OR X5, X6, X5; \
70 SLL $8, X7; \
71 OR X5, X7, X5; \
72 OR X5, X8, X5; \
73 MOV X5, (index*8)(X19)
74
75 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
76 // SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
77 // SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
78 #define MSGSCHEDULE1(index) \
79 MOV (((index-2)&0xf)*8)(X19), X5; \
80 MOV (((index-15)&0xf)*8)(X19), X6; \
81 MOV (((index-7)&0xf)*8)(X19), X9; \
82 MOV (((index-16)&0xf)*8)(X19), X21; \
83 ROR $19, X5, X7; \
84 ROR $61, X5, X8; \
85 SRL $6, X5; \
86 XOR X7, X5; \
87 XOR X8, X5; \
88 ADD X9, X5; \
89 ROR $1, X6, X7; \
90 ROR $8, X6, X8; \
91 SRL $7, X6; \
92 XOR X7, X6; \
93 XOR X8, X6; \
94 ADD X6, X5; \
95 ADD X21, X5; \
96 MOV X5, ((index&0xf)*8)(X19)
97
98 // Calculate T1 in X5.
99 // h is also used as an accumulator. Wt is passed in X5.
100 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
101 // BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
102 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
103 // = ((y XOR z) AND x) XOR z
104 #define SHA512T1(index, e, f, g, h) \
105 MOV (index*8)(X18), X8; \
106 ADD X5, h; \
107 ROR $14, e, X6; \
108 ADD X8, h; \
109 ROR $18, e, X7; \
110 ROR $41, e, X8; \
111 XOR X7, X6; \
112 XOR f, g, X5; \
113 XOR X8, X6; \
114 AND e, X5; \
115 ADD X6, h; \
116 XOR g, X5; \
117 ADD h, X5
118
119 // Calculate T2 in X6.
120 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
121 // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
122 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
123 // = ((y XOR z) AND x) XOR (y AND z)
124 #define SHA512T2(a, b, c) \
125 ROR $28, a, X6; \
126 ROR $34, a, X7; \
127 ROR $39, a, X8; \
128 XOR X7, X6; \
129 XOR b, c, X9; \
130 AND b, c, X7; \
131 AND a, X9; \
132 XOR X8, X6; \
133 XOR X7, X9; \
134 ADD X9, X6
135
136 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
137 // The values for e and a are stored in d and h, ready for rotation.
138 #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
139 SHA512T1(index, e, f, g, h); \
140 SHA512T2(a, b, c); \
141 ADD X5, d; \
142 ADD X6, X5, h
143
144 #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
145 MSGSCHEDULE0(index); \
146 SHA512ROUND(index, a, b, c, d, e, f, g, h)
147
148 #define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
149 MSGSCHEDULE1(index); \
150 SHA512ROUND(index, a, b, c, d, e, f, g, h)
151
152 // func block(dig *Digest, p []byte)
153 TEXT ·block(SB),0,$128-32
154 MOV p_base+8(FP), X29
155 MOV p_len+16(FP), X30
156 SRL $7, X30
157 SLL $7, X30
158
159 ADD X29, X30, X28
160 BEQ X28, X29, end
161
162 MOV $·_K(SB), X18 // const table
163 ADD $8, X2, X19 // message schedule
164
165 MOV dig+0(FP), X20
166 MOV (0*8)(X20), X10 // a = H0
167 MOV (1*8)(X20), X11 // b = H1
168 MOV (2*8)(X20), X12 // c = H2
169 MOV (3*8)(X20), X13 // d = H3
170 MOV (4*8)(X20), X14 // e = H4
171 MOV (5*8)(X20), X15 // f = H5
172 MOV (6*8)(X20), X16 // g = H6
173 MOV (7*8)(X20), X17 // h = H7
174
175 loop:
176 SHA512ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
177 SHA512ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
178 SHA512ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
179 SHA512ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
180 SHA512ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
181 SHA512ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
182 SHA512ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
183 SHA512ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
184 SHA512ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
185 SHA512ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
186 SHA512ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
187 SHA512ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
188 SHA512ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
189 SHA512ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
190 SHA512ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
191 SHA512ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
192
193 SHA512ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
194 SHA512ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
195 SHA512ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
196 SHA512ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
197 SHA512ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
198 SHA512ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
199 SHA512ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
200 SHA512ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
201 SHA512ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
202 SHA512ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
203 SHA512ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
204 SHA512ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
205 SHA512ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
206 SHA512ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
207 SHA512ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
208 SHA512ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
209 SHA512ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
210 SHA512ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
211 SHA512ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
212 SHA512ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
213 SHA512ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
214 SHA512ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
215 SHA512ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
216 SHA512ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
217 SHA512ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
218 SHA512ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
219 SHA512ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
220 SHA512ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
221 SHA512ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
222 SHA512ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
223 SHA512ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
224 SHA512ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
225 SHA512ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
226 SHA512ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
227 SHA512ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
228 SHA512ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
229 SHA512ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
230 SHA512ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
231 SHA512ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
232 SHA512ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
233 SHA512ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
234 SHA512ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
235 SHA512ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
236 SHA512ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
237 SHA512ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
238 SHA512ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
239 SHA512ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
240 SHA512ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
241 SHA512ROUND1(64, X10, X11, X12, X13, X14, X15, X16, X17)
242 SHA512ROUND1(65, X17, X10, X11, X12, X13, X14, X15, X16)
243 SHA512ROUND1(66, X16, X17, X10, X11, X12, X13, X14, X15)
244 SHA512ROUND1(67, X15, X16, X17, X10, X11, X12, X13, X14)
245 SHA512ROUND1(68, X14, X15, X16, X17, X10, X11, X12, X13)
246 SHA512ROUND1(69, X13, X14, X15, X16, X17, X10, X11, X12)
247 SHA512ROUND1(70, X12, X13, X14, X15, X16, X17, X10, X11)
248 SHA512ROUND1(71, X11, X12, X13, X14, X15, X16, X17, X10)
249 SHA512ROUND1(72, X10, X11, X12, X13, X14, X15, X16, X17)
250 SHA512ROUND1(73, X17, X10, X11, X12, X13, X14, X15, X16)
251 SHA512ROUND1(74, X16, X17, X10, X11, X12, X13, X14, X15)
252 SHA512ROUND1(75, X15, X16, X17, X10, X11, X12, X13, X14)
253 SHA512ROUND1(76, X14, X15, X16, X17, X10, X11, X12, X13)
254 SHA512ROUND1(77, X13, X14, X15, X16, X17, X10, X11, X12)
255 SHA512ROUND1(78, X12, X13, X14, X15, X16, X17, X10, X11)
256 SHA512ROUND1(79, X11, X12, X13, X14, X15, X16, X17, X10)
257
258 MOV (0*8)(X20), X5
259 MOV (1*8)(X20), X6
260 MOV (2*8)(X20), X7
261 MOV (3*8)(X20), X8
262 ADD X5, X10 // H0 = a + H0
263 ADD X6, X11 // H1 = b + H1
264 ADD X7, X12 // H2 = c + H2
265 ADD X8, X13 // H3 = d + H3
266 MOV X10, (0*8)(X20)
267 MOV X11, (1*8)(X20)
268 MOV X12, (2*8)(X20)
269 MOV X13, (3*8)(X20)
270 MOV (4*8)(X20), X5
271 MOV (5*8)(X20), X6
272 MOV (6*8)(X20), X7
273 MOV (7*8)(X20), X8
274 ADD X5, X14 // H4 = e + H4
275 ADD X6, X15 // H5 = f + H5
276 ADD X7, X16 // H6 = g + H6
277 ADD X8, X17 // H7 = h + H7
278 MOV X14, (4*8)(X20)
279 MOV X15, (5*8)(X20)
280 MOV X16, (6*8)(X20)
281 MOV X17, (7*8)(X20)
282
283 ADD $128, X29
284 BNE X28, X29, loop
285
286 end:
287 RET
288
View as plain text