1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA256 block routine. See sha256block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // Wt = Mt; for 0 <= t <= 15
16 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for t = 0 to 63 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 // Wt = Mt; for 0 <= t <= 15
50 #define MSGSCHEDULE0(index) \
51 MOVBU ((index*4)+0)(X29), X5; \
52 MOVBU ((index*4)+1)(X29), X6; \
53 MOVBU ((index*4)+2)(X29), X7; \
54 MOVBU ((index*4)+3)(X29), X8; \
55 SLL $24, X5; \
56 SLL $16, X6; \
57 OR X5, X6, X5; \
58 SLL $8, X7; \
59 OR X5, X7, X5; \
60 OR X5, X8, X5; \
61 MOVW X5, (index*4)(X19)
62
63 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
64 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
65 // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
66 #define MSGSCHEDULE1(index) \
67 MOVWU (((index-2)&0xf)*4)(X19), X5; \
68 MOVWU (((index-15)&0xf)*4)(X19), X6; \
69 MOVWU (((index-7)&0xf)*4)(X19), X9; \
70 MOVWU (((index-16)&0xf)*4)(X19), X21; \
71 RORW $17, X5, X7; \
72 RORW $19, X5, X8; \
73 SRL $10, X5; \
74 XOR X7, X5; \
75 XOR X8, X5; \
76 ADD X9, X5; \
77 RORW $7, X6, X7; \
78 RORW $18, X6, X8; \
79 SRL $3, X6; \
80 XOR X7, X6; \
81 XOR X8, X6; \
82 ADD X6, X5; \
83 ADD X21, X5; \
84 MOVW X5, ((index&0xf)*4)(X19)
85
86 // Calculate T1 in X5.
87 // h is also used as an accumulator. Wt is passed in X5.
88 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
89 // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
90 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
91 // = ((y XOR z) AND x) XOR z
92 #define SHA256T1(index, e, f, g, h) \
93 MOVWU (index*4)(X18), X8; \
94 ADD X5, h; \
95 RORW $6, e, X6; \
96 ADD X8, h; \
97 RORW $11, e, X7; \
98 RORW $25, e, X8; \
99 XOR X7, X6; \
100 XOR f, g, X5; \
101 XOR X8, X6; \
102 AND e, X5; \
103 ADD X6, h; \
104 XOR g, X5; \
105 ADD h, X5
106
107 // Calculate T2 in X6.
108 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
109 // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
110 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
111 // = ((y XOR z) AND x) XOR (y AND z)
112 #define SHA256T2(a, b, c) \
113 RORW $2, a, X6; \
114 RORW $13, a, X7; \
115 RORW $22, a, X8; \
116 XOR X7, X6; \
117 XOR b, c, X9; \
118 AND b, c, X7; \
119 AND a, X9; \
120 XOR X8, X6; \
121 XOR X7, X9; \
122 ADD X9, X6
123
124 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
125 // The values for e and a are stored in d and h, ready for rotation.
126 #define SHA256ROUND(index, a, b, c, d, e, f, g, h) \
127 SHA256T1(index, e, f, g, h); \
128 SHA256T2(a, b, c); \
129 ADD X5, d; \
130 ADD X6, X5, h
131
132 #define SHA256ROUND0(index, a, b, c, d, e, f, g, h) \
133 MSGSCHEDULE0(index); \
134 SHA256ROUND(index, a, b, c, d, e, f, g, h)
135
136 #define SHA256ROUND1(index, a, b, c, d, e, f, g, h) \
137 MSGSCHEDULE1(index); \
138 SHA256ROUND(index, a, b, c, d, e, f, g, h)
139
140 // Note that 64 bytes of stack space is used as a circular buffer
141 // for the message schedule (4 bytes * 16 entries).
142 //
143 // func block(dig *Digest, p []byte)
144 TEXT ·block(SB),0,$64-32
145 MOV p_base+8(FP), X29
146 MOV p_len+16(FP), X30
147 SRL $6, X30
148 SLL $6, X30
149
150 ADD X29, X30, X28
151 BEQ X28, X29, end
152
153 MOV $·_K(SB), X18 // const table
154 ADD $8, X2, X19 // message schedule
155
156 MOV dig+0(FP), X20
157 MOVWU (0*4)(X20), X10 // a = H0
158 MOVWU (1*4)(X20), X11 // b = H1
159 MOVWU (2*4)(X20), X12 // c = H2
160 MOVWU (3*4)(X20), X13 // d = H3
161 MOVWU (4*4)(X20), X14 // e = H4
162 MOVWU (5*4)(X20), X15 // f = H5
163 MOVWU (6*4)(X20), X16 // g = H6
164 MOVWU (7*4)(X20), X17 // h = H7
165
166 loop:
167 SHA256ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
168 SHA256ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
169 SHA256ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
170 SHA256ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
171 SHA256ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
172 SHA256ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
173 SHA256ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
174 SHA256ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
175 SHA256ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
176 SHA256ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
177 SHA256ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
178 SHA256ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
179 SHA256ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
180 SHA256ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
181 SHA256ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
182 SHA256ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
183
184 SHA256ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
185 SHA256ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
186 SHA256ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
187 SHA256ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
188 SHA256ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
189 SHA256ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
190 SHA256ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
191 SHA256ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
192 SHA256ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
193 SHA256ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
194 SHA256ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
195 SHA256ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
196 SHA256ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
197 SHA256ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
198 SHA256ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
199 SHA256ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
200 SHA256ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
201 SHA256ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
202 SHA256ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
203 SHA256ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
204 SHA256ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
205 SHA256ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
206 SHA256ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
207 SHA256ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
208 SHA256ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
209 SHA256ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
210 SHA256ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
211 SHA256ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
212 SHA256ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
213 SHA256ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
214 SHA256ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
215 SHA256ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
216 SHA256ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
217 SHA256ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
218 SHA256ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
219 SHA256ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
220 SHA256ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
221 SHA256ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
222 SHA256ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
223 SHA256ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
224 SHA256ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
225 SHA256ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
226 SHA256ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
227 SHA256ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
228 SHA256ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
229 SHA256ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
230 SHA256ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
231 SHA256ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
232
233 MOVWU (0*4)(X20), X5
234 MOVWU (1*4)(X20), X6
235 MOVWU (2*4)(X20), X7
236 MOVWU (3*4)(X20), X8
237 ADD X5, X10 // H0 = a + H0
238 ADD X6, X11 // H1 = b + H1
239 ADD X7, X12 // H2 = c + H2
240 ADD X8, X13 // H3 = d + H3
241 MOVW X10, (0*4)(X20)
242 MOVW X11, (1*4)(X20)
243 MOVW X12, (2*4)(X20)
244 MOVW X13, (3*4)(X20)
245 MOVWU (4*4)(X20), X5
246 MOVWU (5*4)(X20), X6
247 MOVWU (6*4)(X20), X7
248 MOVWU (7*4)(X20), X8
249 ADD X5, X14 // H4 = e + H4
250 ADD X6, X15 // H5 = f + H5
251 ADD X7, X16 // H6 = g + H6
252 ADD X8, X17 // H7 = h + H7
253 MOVW X14, (4*4)(X20)
254 MOVW X15, (5*4)(X20)
255 MOVW X16, (6*4)(X20)
256 MOVW X17, (7*4)(X20)
257
258 ADD $64, X29
259 BNE X28, X29, loop
260
261 end:
262 RET
263
View as plain text