Text file
src/math/big/arith_loong64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7 //go:build !math_big_pure_go
8
9 #include "textflag.h"
10
11 // func addVV(z, x, y []Word) (c Word)
12 TEXT ·addVV(SB), NOSPLIT, $0
13 MOVV z_len+8(FP), R4
14 MOVV x_base+24(FP), R5
15 MOVV y_base+48(FP), R6
16 MOVV z_base+0(FP), R7
17 // compute unrolled loop lengths
18 AND $3, R4, R8
19 SRLV $2, R4
20 XOR R28, R28 // clear carry
21 loop1:
22 BEQ R8, loop1done
23 loop1cont:
24 // unroll 1X
25 MOVV 0(R5), R9
26 MOVV 0(R6), R10
27 ADDVU R10, R9 // ADCS R10, R9, R9 (cr=R28)
28 SGTU R10, R9, R30 // ...
29 ADDVU R28, R9 // ...
30 SGTU R28, R9, R28 // ...
31 ADDVU R30, R28 // ...
32 MOVV R9, 0(R7)
33 ADDVU $8, R5
34 ADDVU $8, R6
35 ADDVU $8, R7
36 SUBVU $1, R8
37 BNE R8, loop1cont
38 loop1done:
39 loop4:
40 BEQ R4, loop4done
41 loop4cont:
42 // unroll 4X
43 MOVV 0(R5), R8
44 MOVV 8(R5), R9
45 MOVV 16(R5), R10
46 MOVV 24(R5), R11
47 MOVV 0(R6), R12
48 MOVV 8(R6), R13
49 MOVV 16(R6), R14
50 MOVV 24(R6), R15
51 ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R28)
52 SGTU R12, R8, R30 // ...
53 ADDVU R28, R8 // ...
54 SGTU R28, R8, R28 // ...
55 ADDVU R30, R28 // ...
56 ADDVU R13, R9 // ADCS R13, R9, R9 (cr=R28)
57 SGTU R13, R9, R30 // ...
58 ADDVU R28, R9 // ...
59 SGTU R28, R9, R28 // ...
60 ADDVU R30, R28 // ...
61 ADDVU R14, R10 // ADCS R14, R10, R10 (cr=R28)
62 SGTU R14, R10, R30 // ...
63 ADDVU R28, R10 // ...
64 SGTU R28, R10, R28 // ...
65 ADDVU R30, R28 // ...
66 ADDVU R15, R11 // ADCS R15, R11, R11 (cr=R28)
67 SGTU R15, R11, R30 // ...
68 ADDVU R28, R11 // ...
69 SGTU R28, R11, R28 // ...
70 ADDVU R30, R28 // ...
71 MOVV R8, 0(R7)
72 MOVV R9, 8(R7)
73 MOVV R10, 16(R7)
74 MOVV R11, 24(R7)
75 ADDVU $32, R5
76 ADDVU $32, R6
77 ADDVU $32, R7
78 SUBVU $1, R4
79 BNE R4, loop4cont
80 loop4done:
81 MOVV R28, c+72(FP)
82 RET
83
84 // func subVV(z, x, y []Word) (c Word)
85 TEXT ·subVV(SB), NOSPLIT, $0
86 MOVV z_len+8(FP), R4
87 MOVV x_base+24(FP), R5
88 MOVV y_base+48(FP), R6
89 MOVV z_base+0(FP), R7
90 // compute unrolled loop lengths
91 AND $3, R4, R8
92 SRLV $2, R4
93 XOR R28, R28 // clear carry
94 loop1:
95 BEQ R8, loop1done
96 loop1cont:
97 // unroll 1X
98 MOVV 0(R5), R9
99 MOVV 0(R6), R10
100 SGTU R28, R9, R30 // SBCS R10, R9, R9
101 SUBVU R28, R9 // ...
102 SGTU R10, R9, R28 // ...
103 SUBVU R10, R9 // ...
104 ADDVU R30, R28 // ...
105 MOVV R9, 0(R7)
106 ADDVU $8, R5
107 ADDVU $8, R6
108 ADDVU $8, R7
109 SUBVU $1, R8
110 BNE R8, loop1cont
111 loop1done:
112 loop4:
113 BEQ R4, loop4done
114 loop4cont:
115 // unroll 4X
116 MOVV 0(R5), R8
117 MOVV 8(R5), R9
118 MOVV 16(R5), R10
119 MOVV 24(R5), R11
120 MOVV 0(R6), R12
121 MOVV 8(R6), R13
122 MOVV 16(R6), R14
123 MOVV 24(R6), R15
124 SGTU R28, R8, R30 // SBCS R12, R8, R8
125 SUBVU R28, R8 // ...
126 SGTU R12, R8, R28 // ...
127 SUBVU R12, R8 // ...
128 ADDVU R30, R28 // ...
129 SGTU R28, R9, R30 // SBCS R13, R9, R9
130 SUBVU R28, R9 // ...
131 SGTU R13, R9, R28 // ...
132 SUBVU R13, R9 // ...
133 ADDVU R30, R28 // ...
134 SGTU R28, R10, R30 // SBCS R14, R10, R10
135 SUBVU R28, R10 // ...
136 SGTU R14, R10, R28 // ...
137 SUBVU R14, R10 // ...
138 ADDVU R30, R28 // ...
139 SGTU R28, R11, R30 // SBCS R15, R11, R11
140 SUBVU R28, R11 // ...
141 SGTU R15, R11, R28 // ...
142 SUBVU R15, R11 // ...
143 ADDVU R30, R28 // ...
144 MOVV R8, 0(R7)
145 MOVV R9, 8(R7)
146 MOVV R10, 16(R7)
147 MOVV R11, 24(R7)
148 ADDVU $32, R5
149 ADDVU $32, R6
150 ADDVU $32, R7
151 SUBVU $1, R4
152 BNE R4, loop4cont
153 loop4done:
154 MOVV R28, c+72(FP)
155 RET
156
157 // func lshVU(z, x []Word, s uint) (c Word)
158 TEXT ·lshVU(SB), NOSPLIT, $0
159 MOVV z_len+8(FP), R4
160 BEQ R4, ret0
161 MOVV s+48(FP), R5
162 MOVV x_base+24(FP), R6
163 MOVV z_base+0(FP), R7
164 // run loop backward
165 SLLV $3, R4, R8
166 ADDVU R8, R6
167 SLLV $3, R4, R8
168 ADDVU R8, R7
169 // shift first word into carry
170 MOVV -8(R6), R8
171 MOVV $64, R9
172 SUBVU R5, R9
173 SRLV R9, R8, R10
174 SLLV R5, R8
175 MOVV R10, c+56(FP)
176 // shift remaining words
177 SUBVU $1, R4
178 // compute unrolled loop lengths
179 AND $3, R4, R10
180 SRLV $2, R4
181 loop1:
182 BEQ R10, loop1done
183 loop1cont:
184 // unroll 1X
185 MOVV -16(R6), R11
186 SRLV R9, R11, R12
187 OR R8, R12
188 SLLV R5, R11, R8
189 MOVV R12, -8(R7)
190 ADDVU $-8, R6
191 ADDVU $-8, R7
192 SUBVU $1, R10
193 BNE R10, loop1cont
194 loop1done:
195 loop4:
196 BEQ R4, loop4done
197 loop4cont:
198 // unroll 4X
199 MOVV -16(R6), R10
200 MOVV -24(R6), R11
201 MOVV -32(R6), R12
202 MOVV -40(R6), R13
203 SRLV R9, R10, R14
204 OR R8, R14
205 SLLV R5, R10, R8
206 SRLV R9, R11, R10
207 OR R8, R10
208 SLLV R5, R11, R8
209 SRLV R9, R12, R11
210 OR R8, R11
211 SLLV R5, R12, R8
212 SRLV R9, R13, R12
213 OR R8, R12
214 SLLV R5, R13, R8
215 MOVV R14, -8(R7)
216 MOVV R10, -16(R7)
217 MOVV R11, -24(R7)
218 MOVV R12, -32(R7)
219 ADDVU $-32, R6
220 ADDVU $-32, R7
221 SUBVU $1, R4
222 BNE R4, loop4cont
223 loop4done:
224 // store final shifted bits
225 MOVV R8, -8(R7)
226 RET
227 ret0:
228 MOVV R0, c+56(FP)
229 RET
230
231 // func rshVU(z, x []Word, s uint) (c Word)
232 TEXT ·rshVU(SB), NOSPLIT, $0
233 MOVV z_len+8(FP), R4
234 BEQ R4, ret0
235 MOVV s+48(FP), R5
236 MOVV x_base+24(FP), R6
237 MOVV z_base+0(FP), R7
238 // shift first word into carry
239 MOVV 0(R6), R8
240 MOVV $64, R9
241 SUBVU R5, R9
242 SLLV R9, R8, R10
243 SRLV R5, R8
244 MOVV R10, c+56(FP)
245 // shift remaining words
246 SUBVU $1, R4
247 // compute unrolled loop lengths
248 AND $3, R4, R10
249 SRLV $2, R4
250 loop1:
251 BEQ R10, loop1done
252 loop1cont:
253 // unroll 1X
254 MOVV 8(R6), R11
255 SLLV R9, R11, R12
256 OR R8, R12
257 SRLV R5, R11, R8
258 MOVV R12, 0(R7)
259 ADDVU $8, R6
260 ADDVU $8, R7
261 SUBVU $1, R10
262 BNE R10, loop1cont
263 loop1done:
264 loop4:
265 BEQ R4, loop4done
266 loop4cont:
267 // unroll 4X
268 MOVV 8(R6), R10
269 MOVV 16(R6), R11
270 MOVV 24(R6), R12
271 MOVV 32(R6), R13
272 SLLV R9, R10, R14
273 OR R8, R14
274 SRLV R5, R10, R8
275 SLLV R9, R11, R10
276 OR R8, R10
277 SRLV R5, R11, R8
278 SLLV R9, R12, R11
279 OR R8, R11
280 SRLV R5, R12, R8
281 SLLV R9, R13, R12
282 OR R8, R12
283 SRLV R5, R13, R8
284 MOVV R14, 0(R7)
285 MOVV R10, 8(R7)
286 MOVV R11, 16(R7)
287 MOVV R12, 24(R7)
288 ADDVU $32, R6
289 ADDVU $32, R7
290 SUBVU $1, R4
291 BNE R4, loop4cont
292 loop4done:
293 // store final shifted bits
294 MOVV R8, 0(R7)
295 RET
296 ret0:
297 MOVV R0, c+56(FP)
298 RET
299
300 // func mulAddVWW(z, x []Word, m, a Word) (c Word)
301 TEXT ·mulAddVWW(SB), NOSPLIT, $0
302 MOVV m+48(FP), R4
303 MOVV a+56(FP), R5
304 MOVV z_len+8(FP), R6
305 MOVV x_base+24(FP), R7
306 MOVV z_base+0(FP), R8
307 // compute unrolled loop lengths
308 AND $3, R6, R9
309 SRLV $2, R6
310 loop1:
311 BEQ R9, loop1done
312 loop1cont:
313 // unroll 1X
314 MOVV 0(R7), R10
315 // synthetic carry, one column at a time
316 MULV R4, R10, R11
317 MULHVU R4, R10, R12
318 ADDVU R5, R11, R10 // ADDS R5, R11, R10 (cr=R28)
319 SGTU R5, R10, R28 // ...
320 ADDVU R28, R12, R5 // ADC $0, R12, R5
321 MOVV R10, 0(R8)
322 ADDVU $8, R7
323 ADDVU $8, R8
324 SUBVU $1, R9
325 BNE R9, loop1cont
326 loop1done:
327 loop4:
328 BEQ R6, loop4done
329 loop4cont:
330 // unroll 4X
331 MOVV 0(R7), R9
332 MOVV 8(R7), R10
333 MOVV 16(R7), R11
334 MOVV 24(R7), R12
335 // synthetic carry, one column at a time
336 MULV R4, R9, R13
337 MULHVU R4, R9, R14
338 ADDVU R5, R13, R9 // ADDS R5, R13, R9 (cr=R28)
339 SGTU R5, R9, R28 // ...
340 ADDVU R28, R14, R5 // ADC $0, R14, R5
341 MULV R4, R10, R13
342 MULHVU R4, R10, R14
343 ADDVU R5, R13, R10 // ADDS R5, R13, R10 (cr=R28)
344 SGTU R5, R10, R28 // ...
345 ADDVU R28, R14, R5 // ADC $0, R14, R5
346 MULV R4, R11, R13
347 MULHVU R4, R11, R14
348 ADDVU R5, R13, R11 // ADDS R5, R13, R11 (cr=R28)
349 SGTU R5, R11, R28 // ...
350 ADDVU R28, R14, R5 // ADC $0, R14, R5
351 MULV R4, R12, R13
352 MULHVU R4, R12, R14
353 ADDVU R5, R13, R12 // ADDS R5, R13, R12 (cr=R28)
354 SGTU R5, R12, R28 // ...
355 ADDVU R28, R14, R5 // ADC $0, R14, R5
356 MOVV R9, 0(R8)
357 MOVV R10, 8(R8)
358 MOVV R11, 16(R8)
359 MOVV R12, 24(R8)
360 ADDVU $32, R7
361 ADDVU $32, R8
362 SUBVU $1, R6
363 BNE R6, loop4cont
364 loop4done:
365 MOVV R5, c+64(FP)
366 RET
367
368 // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
369 TEXT ·addMulVVWW(SB), NOSPLIT, $0
370 MOVV m+72(FP), R4
371 MOVV a+80(FP), R5
372 MOVV z_len+8(FP), R6
373 MOVV x_base+24(FP), R7
374 MOVV y_base+48(FP), R8
375 MOVV z_base+0(FP), R9
376 // compute unrolled loop lengths
377 AND $3, R6, R10
378 SRLV $2, R6
379 loop1:
380 BEQ R10, loop1done
381 loop1cont:
382 // unroll 1X
383 MOVV 0(R7), R11
384 MOVV 0(R8), R12
385 // synthetic carry, one column at a time
386 MULV R4, R12, R13
387 MULHVU R4, R12, R14
388 ADDVU R11, R13 // ADDS R11, R13, R13 (cr=R28)
389 SGTU R11, R13, R28 // ...
390 ADDVU R28, R14 // ADC $0, R14, R14
391 ADDVU R5, R13, R12 // ADDS R5, R13, R12 (cr=R28)
392 SGTU R5, R12, R28 // ...
393 ADDVU R28, R14, R5 // ADC $0, R14, R5
394 MOVV R12, 0(R9)
395 ADDVU $8, R7
396 ADDVU $8, R8
397 ADDVU $8, R9
398 SUBVU $1, R10
399 BNE R10, loop1cont
400 loop1done:
401 loop4:
402 BEQ R6, loop4done
403 loop4cont:
404 // unroll 4X
405 MOVV 0(R7), R10
406 MOVV 8(R7), R11
407 MOVV 16(R7), R12
408 MOVV 24(R7), R13
409 MOVV 0(R8), R14
410 MOVV 8(R8), R15
411 MOVV 16(R8), R16
412 MOVV 24(R8), R17
413 // synthetic carry, one column at a time
414 MULV R4, R14, R18
415 MULHVU R4, R14, R19
416 ADDVU R10, R18 // ADDS R10, R18, R18 (cr=R28)
417 SGTU R10, R18, R28 // ...
418 ADDVU R28, R19 // ADC $0, R19, R19
419 ADDVU R5, R18, R14 // ADDS R5, R18, R14 (cr=R28)
420 SGTU R5, R14, R28 // ...
421 ADDVU R28, R19, R5 // ADC $0, R19, R5
422 MULV R4, R15, R18
423 MULHVU R4, R15, R19
424 ADDVU R11, R18 // ADDS R11, R18, R18 (cr=R28)
425 SGTU R11, R18, R28 // ...
426 ADDVU R28, R19 // ADC $0, R19, R19
427 ADDVU R5, R18, R15 // ADDS R5, R18, R15 (cr=R28)
428 SGTU R5, R15, R28 // ...
429 ADDVU R28, R19, R5 // ADC $0, R19, R5
430 MULV R4, R16, R18
431 MULHVU R4, R16, R19
432 ADDVU R12, R18 // ADDS R12, R18, R18 (cr=R28)
433 SGTU R12, R18, R28 // ...
434 ADDVU R28, R19 // ADC $0, R19, R19
435 ADDVU R5, R18, R16 // ADDS R5, R18, R16 (cr=R28)
436 SGTU R5, R16, R28 // ...
437 ADDVU R28, R19, R5 // ADC $0, R19, R5
438 MULV R4, R17, R18
439 MULHVU R4, R17, R19
440 ADDVU R13, R18 // ADDS R13, R18, R18 (cr=R28)
441 SGTU R13, R18, R28 // ...
442 ADDVU R28, R19 // ADC $0, R19, R19
443 ADDVU R5, R18, R17 // ADDS R5, R18, R17 (cr=R28)
444 SGTU R5, R17, R28 // ...
445 ADDVU R28, R19, R5 // ADC $0, R19, R5
446 MOVV R14, 0(R9)
447 MOVV R15, 8(R9)
448 MOVV R16, 16(R9)
449 MOVV R17, 24(R9)
450 ADDVU $32, R7
451 ADDVU $32, R8
452 ADDVU $32, R9
453 SUBVU $1, R6
454 BNE R6, loop4cont
455 loop4done:
456 MOVV R5, c+88(FP)
457 RET
458
View as plain text