// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT. //go:build !math_big_pure_go #include "textflag.h" // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB), NOSPLIT, $0 MOVV z_len+8(FP), R4 MOVV x_base+24(FP), R5 MOVV y_base+48(FP), R6 MOVV z_base+0(FP), R7 // compute unrolled loop lengths AND $3, R4, R8 SRLV $2, R4 XOR R28, R28 // clear carry loop1: BEQ R8, loop1done loop1cont: // unroll 1X MOVV 0(R5), R9 MOVV 0(R6), R10 ADDVU R10, R9 // ADCS R10, R9, R9 (cr=R28) SGTU R10, R9, R30 // ... ADDVU R28, R9 // ... SGTU R28, R9, R28 // ... ADDVU R30, R28 // ... MOVV R9, 0(R7) ADDVU $8, R5 ADDVU $8, R6 ADDVU $8, R7 SUBVU $1, R8 BNE R8, loop1cont loop1done: loop4: BEQ R4, loop4done loop4cont: // unroll 4X MOVV 0(R5), R8 MOVV 8(R5), R9 MOVV 16(R5), R10 MOVV 24(R5), R11 MOVV 0(R6), R12 MOVV 8(R6), R13 MOVV 16(R6), R14 MOVV 24(R6), R15 ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R28) SGTU R12, R8, R30 // ... ADDVU R28, R8 // ... SGTU R28, R8, R28 // ... ADDVU R30, R28 // ... ADDVU R13, R9 // ADCS R13, R9, R9 (cr=R28) SGTU R13, R9, R30 // ... ADDVU R28, R9 // ... SGTU R28, R9, R28 // ... ADDVU R30, R28 // ... ADDVU R14, R10 // ADCS R14, R10, R10 (cr=R28) SGTU R14, R10, R30 // ... ADDVU R28, R10 // ... SGTU R28, R10, R28 // ... ADDVU R30, R28 // ... ADDVU R15, R11 // ADCS R15, R11, R11 (cr=R28) SGTU R15, R11, R30 // ... ADDVU R28, R11 // ... SGTU R28, R11, R28 // ... ADDVU R30, R28 // ... MOVV R8, 0(R7) MOVV R9, 8(R7) MOVV R10, 16(R7) MOVV R11, 24(R7) ADDVU $32, R5 ADDVU $32, R6 ADDVU $32, R7 SUBVU $1, R4 BNE R4, loop4cont loop4done: MOVV R28, c+72(FP) RET // func subVV(z, x, y []Word) (c Word) TEXT ·subVV(SB), NOSPLIT, $0 MOVV z_len+8(FP), R4 MOVV x_base+24(FP), R5 MOVV y_base+48(FP), R6 MOVV z_base+0(FP), R7 // compute unrolled loop lengths AND $3, R4, R8 SRLV $2, R4 XOR R28, R28 // clear carry loop1: BEQ R8, loop1done loop1cont: // unroll 1X MOVV 0(R5), R9 MOVV 0(R6), R10 SGTU R28, R9, R30 // SBCS R10, R9, R9 SUBVU R28, R9 // ... SGTU R10, R9, R28 // ... SUBVU R10, R9 // ... ADDVU R30, R28 // ... MOVV R9, 0(R7) ADDVU $8, R5 ADDVU $8, R6 ADDVU $8, R7 SUBVU $1, R8 BNE R8, loop1cont loop1done: loop4: BEQ R4, loop4done loop4cont: // unroll 4X MOVV 0(R5), R8 MOVV 8(R5), R9 MOVV 16(R5), R10 MOVV 24(R5), R11 MOVV 0(R6), R12 MOVV 8(R6), R13 MOVV 16(R6), R14 MOVV 24(R6), R15 SGTU R28, R8, R30 // SBCS R12, R8, R8 SUBVU R28, R8 // ... SGTU R12, R8, R28 // ... SUBVU R12, R8 // ... ADDVU R30, R28 // ... SGTU R28, R9, R30 // SBCS R13, R9, R9 SUBVU R28, R9 // ... SGTU R13, R9, R28 // ... SUBVU R13, R9 // ... ADDVU R30, R28 // ... SGTU R28, R10, R30 // SBCS R14, R10, R10 SUBVU R28, R10 // ... SGTU R14, R10, R28 // ... SUBVU R14, R10 // ... ADDVU R30, R28 // ... SGTU R28, R11, R30 // SBCS R15, R11, R11 SUBVU R28, R11 // ... SGTU R15, R11, R28 // ... SUBVU R15, R11 // ... ADDVU R30, R28 // ... MOVV R8, 0(R7) MOVV R9, 8(R7) MOVV R10, 16(R7) MOVV R11, 24(R7) ADDVU $32, R5 ADDVU $32, R6 ADDVU $32, R7 SUBVU $1, R4 BNE R4, loop4cont loop4done: MOVV R28, c+72(FP) RET // func lshVU(z, x []Word, s uint) (c Word) TEXT ·lshVU(SB), NOSPLIT, $0 MOVV z_len+8(FP), R4 BEQ R4, ret0 MOVV s+48(FP), R5 MOVV x_base+24(FP), R6 MOVV z_base+0(FP), R7 // run loop backward SLLV $3, R4, R8 ADDVU R8, R6 SLLV $3, R4, R8 ADDVU R8, R7 // shift first word into carry MOVV -8(R6), R8 MOVV $64, R9 SUBVU R5, R9 SRLV R9, R8, R10 SLLV R5, R8 MOVV R10, c+56(FP) // shift remaining words SUBVU $1, R4 // compute unrolled loop lengths AND $3, R4, R10 SRLV $2, R4 loop1: BEQ R10, loop1done loop1cont: // unroll 1X MOVV -16(R6), R11 SRLV R9, R11, R12 OR R8, R12 SLLV R5, R11, R8 MOVV R12, -8(R7) ADDVU $-8, R6 ADDVU $-8, R7 SUBVU $1, R10 BNE R10, loop1cont loop1done: loop4: BEQ R4, loop4done loop4cont: // unroll 4X MOVV -16(R6), R10 MOVV -24(R6), R11 MOVV -32(R6), R12 MOVV -40(R6), R13 SRLV R9, R10, R14 OR R8, R14 SLLV R5, R10, R8 SRLV R9, R11, R10 OR R8, R10 SLLV R5, R11, R8 SRLV R9, R12, R11 OR R8, R11 SLLV R5, R12, R8 SRLV R9, R13, R12 OR R8, R12 SLLV R5, R13, R8 MOVV R14, -8(R7) MOVV R10, -16(R7) MOVV R11, -24(R7) MOVV R12, -32(R7) ADDVU $-32, R6 ADDVU $-32, R7 SUBVU $1, R4 BNE R4, loop4cont loop4done: // store final shifted bits MOVV R8, -8(R7) RET ret0: MOVV R0, c+56(FP) RET // func rshVU(z, x []Word, s uint) (c Word) TEXT ·rshVU(SB), NOSPLIT, $0 MOVV z_len+8(FP), R4 BEQ R4, ret0 MOVV s+48(FP), R5 MOVV x_base+24(FP), R6 MOVV z_base+0(FP), R7 // shift first word into carry MOVV 0(R6), R8 MOVV $64, R9 SUBVU R5, R9 SLLV R9, R8, R10 SRLV R5, R8 MOVV R10, c+56(FP) // shift remaining words SUBVU $1, R4 // compute unrolled loop lengths AND $3, R4, R10 SRLV $2, R4 loop1: BEQ R10, loop1done loop1cont: // unroll 1X MOVV 8(R6), R11 SLLV R9, R11, R12 OR R8, R12 SRLV R5, R11, R8 MOVV R12, 0(R7) ADDVU $8, R6 ADDVU $8, R7 SUBVU $1, R10 BNE R10, loop1cont loop1done: loop4: BEQ R4, loop4done loop4cont: // unroll 4X MOVV 8(R6), R10 MOVV 16(R6), R11 MOVV 24(R6), R12 MOVV 32(R6), R13 SLLV R9, R10, R14 OR R8, R14 SRLV R5, R10, R8 SLLV R9, R11, R10 OR R8, R10 SRLV R5, R11, R8 SLLV R9, R12, R11 OR R8, R11 SRLV R5, R12, R8 SLLV R9, R13, R12 OR R8, R12 SRLV R5, R13, R8 MOVV R14, 0(R7) MOVV R10, 8(R7) MOVV R11, 16(R7) MOVV R12, 24(R7) ADDVU $32, R6 ADDVU $32, R7 SUBVU $1, R4 BNE R4, loop4cont loop4done: // store final shifted bits MOVV R8, 0(R7) RET ret0: MOVV R0, c+56(FP) RET // func mulAddVWW(z, x []Word, m, a Word) (c Word) TEXT ·mulAddVWW(SB), NOSPLIT, $0 MOVV m+48(FP), R4 MOVV a+56(FP), R5 MOVV z_len+8(FP), R6 MOVV x_base+24(FP), R7 MOVV z_base+0(FP), R8 // compute unrolled loop lengths AND $3, R6, R9 SRLV $2, R6 loop1: BEQ R9, loop1done loop1cont: // unroll 1X MOVV 0(R7), R10 // synthetic carry, one column at a time MULV R4, R10, R11 MULHVU R4, R10, R12 ADDVU R5, R11, R10 // ADDS R5, R11, R10 (cr=R28) SGTU R5, R10, R28 // ... ADDVU R28, R12, R5 // ADC $0, R12, R5 MOVV R10, 0(R8) ADDVU $8, R7 ADDVU $8, R8 SUBVU $1, R9 BNE R9, loop1cont loop1done: loop4: BEQ R6, loop4done loop4cont: // unroll 4X MOVV 0(R7), R9 MOVV 8(R7), R10 MOVV 16(R7), R11 MOVV 24(R7), R12 // synthetic carry, one column at a time MULV R4, R9, R13 MULHVU R4, R9, R14 ADDVU R5, R13, R9 // ADDS R5, R13, R9 (cr=R28) SGTU R5, R9, R28 // ... ADDVU R28, R14, R5 // ADC $0, R14, R5 MULV R4, R10, R13 MULHVU R4, R10, R14 ADDVU R5, R13, R10 // ADDS R5, R13, R10 (cr=R28) SGTU R5, R10, R28 // ... ADDVU R28, R14, R5 // ADC $0, R14, R5 MULV R4, R11, R13 MULHVU R4, R11, R14 ADDVU R5, R13, R11 // ADDS R5, R13, R11 (cr=R28) SGTU R5, R11, R28 // ... ADDVU R28, R14, R5 // ADC $0, R14, R5 MULV R4, R12, R13 MULHVU R4, R12, R14 ADDVU R5, R13, R12 // ADDS R5, R13, R12 (cr=R28) SGTU R5, R12, R28 // ... ADDVU R28, R14, R5 // ADC $0, R14, R5 MOVV R9, 0(R8) MOVV R10, 8(R8) MOVV R11, 16(R8) MOVV R12, 24(R8) ADDVU $32, R7 ADDVU $32, R8 SUBVU $1, R6 BNE R6, loop4cont loop4done: MOVV R5, c+64(FP) RET // func addMulVVWW(z, x, y []Word, m, a Word) (c Word) TEXT ·addMulVVWW(SB), NOSPLIT, $0 MOVV m+72(FP), R4 MOVV a+80(FP), R5 MOVV z_len+8(FP), R6 MOVV x_base+24(FP), R7 MOVV y_base+48(FP), R8 MOVV z_base+0(FP), R9 // compute unrolled loop lengths AND $3, R6, R10 SRLV $2, R6 loop1: BEQ R10, loop1done loop1cont: // unroll 1X MOVV 0(R7), R11 MOVV 0(R8), R12 // synthetic carry, one column at a time MULV R4, R12, R13 MULHVU R4, R12, R14 ADDVU R11, R13 // ADDS R11, R13, R13 (cr=R28) SGTU R11, R13, R28 // ... ADDVU R28, R14 // ADC $0, R14, R14 ADDVU R5, R13, R12 // ADDS R5, R13, R12 (cr=R28) SGTU R5, R12, R28 // ... ADDVU R28, R14, R5 // ADC $0, R14, R5 MOVV R12, 0(R9) ADDVU $8, R7 ADDVU $8, R8 ADDVU $8, R9 SUBVU $1, R10 BNE R10, loop1cont loop1done: loop4: BEQ R6, loop4done loop4cont: // unroll 4X MOVV 0(R7), R10 MOVV 8(R7), R11 MOVV 16(R7), R12 MOVV 24(R7), R13 MOVV 0(R8), R14 MOVV 8(R8), R15 MOVV 16(R8), R16 MOVV 24(R8), R17 // synthetic carry, one column at a time MULV R4, R14, R18 MULHVU R4, R14, R19 ADDVU R10, R18 // ADDS R10, R18, R18 (cr=R28) SGTU R10, R18, R28 // ... ADDVU R28, R19 // ADC $0, R19, R19 ADDVU R5, R18, R14 // ADDS R5, R18, R14 (cr=R28) SGTU R5, R14, R28 // ... ADDVU R28, R19, R5 // ADC $0, R19, R5 MULV R4, R15, R18 MULHVU R4, R15, R19 ADDVU R11, R18 // ADDS R11, R18, R18 (cr=R28) SGTU R11, R18, R28 // ... ADDVU R28, R19 // ADC $0, R19, R19 ADDVU R5, R18, R15 // ADDS R5, R18, R15 (cr=R28) SGTU R5, R15, R28 // ... ADDVU R28, R19, R5 // ADC $0, R19, R5 MULV R4, R16, R18 MULHVU R4, R16, R19 ADDVU R12, R18 // ADDS R12, R18, R18 (cr=R28) SGTU R12, R18, R28 // ... ADDVU R28, R19 // ADC $0, R19, R19 ADDVU R5, R18, R16 // ADDS R5, R18, R16 (cr=R28) SGTU R5, R16, R28 // ... ADDVU R28, R19, R5 // ADC $0, R19, R5 MULV R4, R17, R18 MULHVU R4, R17, R19 ADDVU R13, R18 // ADDS R13, R18, R18 (cr=R28) SGTU R13, R18, R28 // ... ADDVU R28, R19 // ADC $0, R19, R19 ADDVU R5, R18, R17 // ADDS R5, R18, R17 (cr=R28) SGTU R5, R17, R28 // ... ADDVU R28, R19, R5 // ADC $0, R19, R5 MOVV R14, 0(R9) MOVV R15, 8(R9) MOVV R16, 16(R9) MOVV R17, 24(R9) ADDVU $32, R7 ADDVU $32, R8 ADDVU $32, R9 SUBVU $1, R6 BNE R6, loop4cont loop4done: MOVV R5, c+88(FP) RET