Text file
src/math/big/arith_mips64x.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7 //go:build !math_big_pure_go && (mips64 || mips64le)
8
9 #include "textflag.h"
10
11 // func addVV(z, x, y []Word) (c Word)
12 TEXT ·addVV(SB), NOSPLIT, $0
13 MOVV z_len+8(FP), R1
14 MOVV x_base+24(FP), R2
15 MOVV y_base+48(FP), R3
16 MOVV z_base+0(FP), R4
17 // compute unrolled loop lengths
18 AND $3, R1, R5
19 SRLV $2, R1
20 XOR R24, R24 // clear carry
21 loop1:
22 BEQ R5, loop1done
23 loop1cont:
24 // unroll 1X
25 MOVV 0(R2), R6
26 MOVV 0(R3), R7
27 ADDVU R7, R6 // ADCS R7, R6, R6 (cr=R24)
28 SGTU R7, R6, R23 // ...
29 ADDVU R24, R6 // ...
30 SGTU R24, R6, R24 // ...
31 ADDVU R23, R24 // ...
32 MOVV R6, 0(R4)
33 ADDVU $8, R2
34 ADDVU $8, R3
35 ADDVU $8, R4
36 SUBVU $1, R5
37 BNE R5, loop1cont
38 loop1done:
39 loop4:
40 BEQ R1, loop4done
41 loop4cont:
42 // unroll 4X
43 MOVV 0(R2), R5
44 MOVV 8(R2), R6
45 MOVV 16(R2), R7
46 MOVV 24(R2), R8
47 MOVV 0(R3), R9
48 MOVV 8(R3), R10
49 MOVV 16(R3), R11
50 MOVV 24(R3), R12
51 ADDVU R9, R5 // ADCS R9, R5, R5 (cr=R24)
52 SGTU R9, R5, R23 // ...
53 ADDVU R24, R5 // ...
54 SGTU R24, R5, R24 // ...
55 ADDVU R23, R24 // ...
56 ADDVU R10, R6 // ADCS R10, R6, R6 (cr=R24)
57 SGTU R10, R6, R23 // ...
58 ADDVU R24, R6 // ...
59 SGTU R24, R6, R24 // ...
60 ADDVU R23, R24 // ...
61 ADDVU R11, R7 // ADCS R11, R7, R7 (cr=R24)
62 SGTU R11, R7, R23 // ...
63 ADDVU R24, R7 // ...
64 SGTU R24, R7, R24 // ...
65 ADDVU R23, R24 // ...
66 ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R24)
67 SGTU R12, R8, R23 // ...
68 ADDVU R24, R8 // ...
69 SGTU R24, R8, R24 // ...
70 ADDVU R23, R24 // ...
71 MOVV R5, 0(R4)
72 MOVV R6, 8(R4)
73 MOVV R7, 16(R4)
74 MOVV R8, 24(R4)
75 ADDVU $32, R2
76 ADDVU $32, R3
77 ADDVU $32, R4
78 SUBVU $1, R1
79 BNE R1, loop4cont
80 loop4done:
81 MOVV R24, c+72(FP)
82 RET
83
84 // func subVV(z, x, y []Word) (c Word)
85 TEXT ·subVV(SB), NOSPLIT, $0
86 MOVV z_len+8(FP), R1
87 MOVV x_base+24(FP), R2
88 MOVV y_base+48(FP), R3
89 MOVV z_base+0(FP), R4
90 // compute unrolled loop lengths
91 AND $3, R1, R5
92 SRLV $2, R1
93 XOR R24, R24 // clear carry
94 loop1:
95 BEQ R5, loop1done
96 loop1cont:
97 // unroll 1X
98 MOVV 0(R2), R6
99 MOVV 0(R3), R7
100 SGTU R24, R6, R23 // SBCS R7, R6, R6
101 SUBVU R24, R6 // ...
102 SGTU R7, R6, R24 // ...
103 SUBVU R7, R6 // ...
104 ADDVU R23, R24 // ...
105 MOVV R6, 0(R4)
106 ADDVU $8, R2
107 ADDVU $8, R3
108 ADDVU $8, R4
109 SUBVU $1, R5
110 BNE R5, loop1cont
111 loop1done:
112 loop4:
113 BEQ R1, loop4done
114 loop4cont:
115 // unroll 4X
116 MOVV 0(R2), R5
117 MOVV 8(R2), R6
118 MOVV 16(R2), R7
119 MOVV 24(R2), R8
120 MOVV 0(R3), R9
121 MOVV 8(R3), R10
122 MOVV 16(R3), R11
123 MOVV 24(R3), R12
124 SGTU R24, R5, R23 // SBCS R9, R5, R5
125 SUBVU R24, R5 // ...
126 SGTU R9, R5, R24 // ...
127 SUBVU R9, R5 // ...
128 ADDVU R23, R24 // ...
129 SGTU R24, R6, R23 // SBCS R10, R6, R6
130 SUBVU R24, R6 // ...
131 SGTU R10, R6, R24 // ...
132 SUBVU R10, R6 // ...
133 ADDVU R23, R24 // ...
134 SGTU R24, R7, R23 // SBCS R11, R7, R7
135 SUBVU R24, R7 // ...
136 SGTU R11, R7, R24 // ...
137 SUBVU R11, R7 // ...
138 ADDVU R23, R24 // ...
139 SGTU R24, R8, R23 // SBCS R12, R8, R8
140 SUBVU R24, R8 // ...
141 SGTU R12, R8, R24 // ...
142 SUBVU R12, R8 // ...
143 ADDVU R23, R24 // ...
144 MOVV R5, 0(R4)
145 MOVV R6, 8(R4)
146 MOVV R7, 16(R4)
147 MOVV R8, 24(R4)
148 ADDVU $32, R2
149 ADDVU $32, R3
150 ADDVU $32, R4
151 SUBVU $1, R1
152 BNE R1, loop4cont
153 loop4done:
154 MOVV R24, c+72(FP)
155 RET
156
157 // func lshVU(z, x []Word, s uint) (c Word)
158 TEXT ·lshVU(SB), NOSPLIT, $0
159 MOVV z_len+8(FP), R1
160 BEQ R1, ret0
161 MOVV s+48(FP), R2
162 MOVV x_base+24(FP), R3
163 MOVV z_base+0(FP), R4
164 // run loop backward
165 SLLV $3, R1, R5
166 ADDVU R5, R3
167 SLLV $3, R1, R5
168 ADDVU R5, R4
169 // shift first word into carry
170 MOVV -8(R3), R5
171 MOVV $64, R6
172 SUBVU R2, R6
173 SRLV R6, R5, R7
174 SLLV R2, R5
175 MOVV R7, c+56(FP)
176 // shift remaining words
177 SUBVU $1, R1
178 // compute unrolled loop lengths
179 AND $3, R1, R7
180 SRLV $2, R1
181 loop1:
182 BEQ R7, loop1done
183 loop1cont:
184 // unroll 1X
185 MOVV -16(R3), R8
186 SRLV R6, R8, R9
187 OR R5, R9
188 SLLV R2, R8, R5
189 MOVV R9, -8(R4)
190 ADDVU $-8, R3
191 ADDVU $-8, R4
192 SUBVU $1, R7
193 BNE R7, loop1cont
194 loop1done:
195 loop4:
196 BEQ R1, loop4done
197 loop4cont:
198 // unroll 4X
199 MOVV -16(R3), R7
200 MOVV -24(R3), R8
201 MOVV -32(R3), R9
202 MOVV -40(R3), R10
203 SRLV R6, R7, R11
204 OR R5, R11
205 SLLV R2, R7, R5
206 SRLV R6, R8, R7
207 OR R5, R7
208 SLLV R2, R8, R5
209 SRLV R6, R9, R8
210 OR R5, R8
211 SLLV R2, R9, R5
212 SRLV R6, R10, R9
213 OR R5, R9
214 SLLV R2, R10, R5
215 MOVV R11, -8(R4)
216 MOVV R7, -16(R4)
217 MOVV R8, -24(R4)
218 MOVV R9, -32(R4)
219 ADDVU $-32, R3
220 ADDVU $-32, R4
221 SUBVU $1, R1
222 BNE R1, loop4cont
223 loop4done:
224 // store final shifted bits
225 MOVV R5, -8(R4)
226 RET
227 ret0:
228 MOVV R0, c+56(FP)
229 RET
230
231 // func rshVU(z, x []Word, s uint) (c Word)
232 TEXT ·rshVU(SB), NOSPLIT, $0
233 MOVV z_len+8(FP), R1
234 BEQ R1, ret0
235 MOVV s+48(FP), R2
236 MOVV x_base+24(FP), R3
237 MOVV z_base+0(FP), R4
238 // shift first word into carry
239 MOVV 0(R3), R5
240 MOVV $64, R6
241 SUBVU R2, R6
242 SLLV R6, R5, R7
243 SRLV R2, R5
244 MOVV R7, c+56(FP)
245 // shift remaining words
246 SUBVU $1, R1
247 // compute unrolled loop lengths
248 AND $3, R1, R7
249 SRLV $2, R1
250 loop1:
251 BEQ R7, loop1done
252 loop1cont:
253 // unroll 1X
254 MOVV 8(R3), R8
255 SLLV R6, R8, R9
256 OR R5, R9
257 SRLV R2, R8, R5
258 MOVV R9, 0(R4)
259 ADDVU $8, R3
260 ADDVU $8, R4
261 SUBVU $1, R7
262 BNE R7, loop1cont
263 loop1done:
264 loop4:
265 BEQ R1, loop4done
266 loop4cont:
267 // unroll 4X
268 MOVV 8(R3), R7
269 MOVV 16(R3), R8
270 MOVV 24(R3), R9
271 MOVV 32(R3), R10
272 SLLV R6, R7, R11
273 OR R5, R11
274 SRLV R2, R7, R5
275 SLLV R6, R8, R7
276 OR R5, R7
277 SRLV R2, R8, R5
278 SLLV R6, R9, R8
279 OR R5, R8
280 SRLV R2, R9, R5
281 SLLV R6, R10, R9
282 OR R5, R9
283 SRLV R2, R10, R5
284 MOVV R11, 0(R4)
285 MOVV R7, 8(R4)
286 MOVV R8, 16(R4)
287 MOVV R9, 24(R4)
288 ADDVU $32, R3
289 ADDVU $32, R4
290 SUBVU $1, R1
291 BNE R1, loop4cont
292 loop4done:
293 // store final shifted bits
294 MOVV R5, 0(R4)
295 RET
296 ret0:
297 MOVV R0, c+56(FP)
298 RET
299
300 // func mulAddVWW(z, x []Word, m, a Word) (c Word)
301 TEXT ·mulAddVWW(SB), NOSPLIT, $0
302 MOVV m+48(FP), R1
303 MOVV a+56(FP), R2
304 MOVV z_len+8(FP), R3
305 MOVV x_base+24(FP), R4
306 MOVV z_base+0(FP), R5
307 // compute unrolled loop lengths
308 AND $3, R3, R6
309 SRLV $2, R3
310 loop1:
311 BEQ R6, loop1done
312 loop1cont:
313 // unroll 1X
314 MOVV 0(R4), R7
315 // synthetic carry, one column at a time
316 MULVU R1, R7
317 MOVV LO, R8
318 MOVV HI, R9
319 ADDVU R2, R8, R7 // ADDS R2, R8, R7 (cr=R24)
320 SGTU R2, R7, R24 // ...
321 ADDVU R24, R9, R2 // ADC $0, R9, R2
322 MOVV R7, 0(R5)
323 ADDVU $8, R4
324 ADDVU $8, R5
325 SUBVU $1, R6
326 BNE R6, loop1cont
327 loop1done:
328 loop4:
329 BEQ R3, loop4done
330 loop4cont:
331 // unroll 4X
332 MOVV 0(R4), R6
333 MOVV 8(R4), R7
334 MOVV 16(R4), R8
335 MOVV 24(R4), R9
336 // synthetic carry, one column at a time
337 MULVU R1, R6
338 MOVV LO, R10
339 MOVV HI, R11
340 ADDVU R2, R10, R6 // ADDS R2, R10, R6 (cr=R24)
341 SGTU R2, R6, R24 // ...
342 ADDVU R24, R11, R2 // ADC $0, R11, R2
343 MULVU R1, R7
344 MOVV LO, R10
345 MOVV HI, R11
346 ADDVU R2, R10, R7 // ADDS R2, R10, R7 (cr=R24)
347 SGTU R2, R7, R24 // ...
348 ADDVU R24, R11, R2 // ADC $0, R11, R2
349 MULVU R1, R8
350 MOVV LO, R10
351 MOVV HI, R11
352 ADDVU R2, R10, R8 // ADDS R2, R10, R8 (cr=R24)
353 SGTU R2, R8, R24 // ...
354 ADDVU R24, R11, R2 // ADC $0, R11, R2
355 MULVU R1, R9
356 MOVV LO, R10
357 MOVV HI, R11
358 ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24)
359 SGTU R2, R9, R24 // ...
360 ADDVU R24, R11, R2 // ADC $0, R11, R2
361 MOVV R6, 0(R5)
362 MOVV R7, 8(R5)
363 MOVV R8, 16(R5)
364 MOVV R9, 24(R5)
365 ADDVU $32, R4
366 ADDVU $32, R5
367 SUBVU $1, R3
368 BNE R3, loop4cont
369 loop4done:
370 MOVV R2, c+64(FP)
371 RET
372
373 // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
374 TEXT ·addMulVVWW(SB), NOSPLIT, $0
375 MOVV m+72(FP), R1
376 MOVV a+80(FP), R2
377 MOVV z_len+8(FP), R3
378 MOVV x_base+24(FP), R4
379 MOVV y_base+48(FP), R5
380 MOVV z_base+0(FP), R6
381 // compute unrolled loop lengths
382 AND $3, R3, R7
383 SRLV $2, R3
384 loop1:
385 BEQ R7, loop1done
386 loop1cont:
387 // unroll 1X
388 MOVV 0(R4), R8
389 MOVV 0(R5), R9
390 // synthetic carry, one column at a time
391 MULVU R1, R9
392 MOVV LO, R10
393 MOVV HI, R11
394 ADDVU R8, R10 // ADDS R8, R10, R10 (cr=R24)
395 SGTU R8, R10, R24 // ...
396 ADDVU R24, R11 // ADC $0, R11, R11
397 ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24)
398 SGTU R2, R9, R24 // ...
399 ADDVU R24, R11, R2 // ADC $0, R11, R2
400 MOVV R9, 0(R6)
401 ADDVU $8, R4
402 ADDVU $8, R5
403 ADDVU $8, R6
404 SUBVU $1, R7
405 BNE R7, loop1cont
406 loop1done:
407 loop4:
408 BEQ R3, loop4done
409 loop4cont:
410 // unroll 4X
411 MOVV 0(R4), R7
412 MOVV 8(R4), R8
413 MOVV 16(R4), R9
414 MOVV 24(R4), R10
415 MOVV 0(R5), R11
416 MOVV 8(R5), R12
417 MOVV 16(R5), R13
418 MOVV 24(R5), R14
419 // synthetic carry, one column at a time
420 MULVU R1, R11
421 MOVV LO, R15
422 MOVV HI, R16
423 ADDVU R7, R15 // ADDS R7, R15, R15 (cr=R24)
424 SGTU R7, R15, R24 // ...
425 ADDVU R24, R16 // ADC $0, R16, R16
426 ADDVU R2, R15, R11 // ADDS R2, R15, R11 (cr=R24)
427 SGTU R2, R11, R24 // ...
428 ADDVU R24, R16, R2 // ADC $0, R16, R2
429 MULVU R1, R12
430 MOVV LO, R15
431 MOVV HI, R16
432 ADDVU R8, R15 // ADDS R8, R15, R15 (cr=R24)
433 SGTU R8, R15, R24 // ...
434 ADDVU R24, R16 // ADC $0, R16, R16
435 ADDVU R2, R15, R12 // ADDS R2, R15, R12 (cr=R24)
436 SGTU R2, R12, R24 // ...
437 ADDVU R24, R16, R2 // ADC $0, R16, R2
438 MULVU R1, R13
439 MOVV LO, R15
440 MOVV HI, R16
441 ADDVU R9, R15 // ADDS R9, R15, R15 (cr=R24)
442 SGTU R9, R15, R24 // ...
443 ADDVU R24, R16 // ADC $0, R16, R16
444 ADDVU R2, R15, R13 // ADDS R2, R15, R13 (cr=R24)
445 SGTU R2, R13, R24 // ...
446 ADDVU R24, R16, R2 // ADC $0, R16, R2
447 MULVU R1, R14
448 MOVV LO, R15
449 MOVV HI, R16
450 ADDVU R10, R15 // ADDS R10, R15, R15 (cr=R24)
451 SGTU R10, R15, R24 // ...
452 ADDVU R24, R16 // ADC $0, R16, R16
453 ADDVU R2, R15, R14 // ADDS R2, R15, R14 (cr=R24)
454 SGTU R2, R14, R24 // ...
455 ADDVU R24, R16, R2 // ADC $0, R16, R2
456 MOVV R11, 0(R6)
457 MOVV R12, 8(R6)
458 MOVV R13, 16(R6)
459 MOVV R14, 24(R6)
460 ADDVU $32, R4
461 ADDVU $32, R5
462 ADDVU $32, R6
463 SUBVU $1, R3
464 BNE R3, loop4cont
465 loop4done:
466 MOVV R2, c+88(FP)
467 RET
468
View as plain text