Text file src/math/big/arith_loong64.s

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
     6  
     7  //go:build !math_big_pure_go
     8  
     9  #include "textflag.h"
    10  
    11  // func addVV(z, x, y []Word) (c Word)
    12  TEXT ·addVV(SB), NOSPLIT, $0
    13  	MOVV z_len+8(FP), R4
    14  	MOVV x_base+24(FP), R5
    15  	MOVV y_base+48(FP), R6
    16  	MOVV z_base+0(FP), R7
    17  	// compute unrolled loop lengths
    18  	AND $3, R4, R8
    19  	SRLV $2, R4
    20  	XOR R28, R28	// clear carry
    21  loop1:
    22  	BEQ R8, loop1done
    23  loop1cont:
    24  	// unroll 1X
    25  	MOVV 0(R5), R9
    26  	MOVV 0(R6), R10
    27  	ADDVU R10, R9	// ADCS R10, R9, R9 (cr=R28)
    28  	SGTU R10, R9, R30	// ...
    29  	ADDVU R28, R9	// ...
    30  	SGTU R28, R9, R28	// ...
    31  	ADDVU R30, R28	// ...
    32  	MOVV R9, 0(R7)
    33  	ADDVU $8, R5
    34  	ADDVU $8, R6
    35  	ADDVU $8, R7
    36  	SUBVU $1, R8
    37  	BNE R8, loop1cont
    38  loop1done:
    39  loop4:
    40  	BEQ R4, loop4done
    41  loop4cont:
    42  	// unroll 4X
    43  	MOVV 0(R5), R8
    44  	MOVV 8(R5), R9
    45  	MOVV 16(R5), R10
    46  	MOVV 24(R5), R11
    47  	MOVV 0(R6), R12
    48  	MOVV 8(R6), R13
    49  	MOVV 16(R6), R14
    50  	MOVV 24(R6), R15
    51  	ADDVU R12, R8	// ADCS R12, R8, R8 (cr=R28)
    52  	SGTU R12, R8, R30	// ...
    53  	ADDVU R28, R8	// ...
    54  	SGTU R28, R8, R28	// ...
    55  	ADDVU R30, R28	// ...
    56  	ADDVU R13, R9	// ADCS R13, R9, R9 (cr=R28)
    57  	SGTU R13, R9, R30	// ...
    58  	ADDVU R28, R9	// ...
    59  	SGTU R28, R9, R28	// ...
    60  	ADDVU R30, R28	// ...
    61  	ADDVU R14, R10	// ADCS R14, R10, R10 (cr=R28)
    62  	SGTU R14, R10, R30	// ...
    63  	ADDVU R28, R10	// ...
    64  	SGTU R28, R10, R28	// ...
    65  	ADDVU R30, R28	// ...
    66  	ADDVU R15, R11	// ADCS R15, R11, R11 (cr=R28)
    67  	SGTU R15, R11, R30	// ...
    68  	ADDVU R28, R11	// ...
    69  	SGTU R28, R11, R28	// ...
    70  	ADDVU R30, R28	// ...
    71  	MOVV R8, 0(R7)
    72  	MOVV R9, 8(R7)
    73  	MOVV R10, 16(R7)
    74  	MOVV R11, 24(R7)
    75  	ADDVU $32, R5
    76  	ADDVU $32, R6
    77  	ADDVU $32, R7
    78  	SUBVU $1, R4
    79  	BNE R4, loop4cont
    80  loop4done:
    81  	MOVV R28, c+72(FP)
    82  	RET
    83  
    84  // func subVV(z, x, y []Word) (c Word)
    85  TEXT ·subVV(SB), NOSPLIT, $0
    86  	MOVV z_len+8(FP), R4
    87  	MOVV x_base+24(FP), R5
    88  	MOVV y_base+48(FP), R6
    89  	MOVV z_base+0(FP), R7
    90  	// compute unrolled loop lengths
    91  	AND $3, R4, R8
    92  	SRLV $2, R4
    93  	XOR R28, R28	// clear carry
    94  loop1:
    95  	BEQ R8, loop1done
    96  loop1cont:
    97  	// unroll 1X
    98  	MOVV 0(R5), R9
    99  	MOVV 0(R6), R10
   100  	SGTU R28, R9, R30	// SBCS R10, R9, R9
   101  	SUBVU R28, R9	// ...
   102  	SGTU R10, R9, R28	// ...
   103  	SUBVU R10, R9	// ...
   104  	ADDVU R30, R28	// ...
   105  	MOVV R9, 0(R7)
   106  	ADDVU $8, R5
   107  	ADDVU $8, R6
   108  	ADDVU $8, R7
   109  	SUBVU $1, R8
   110  	BNE R8, loop1cont
   111  loop1done:
   112  loop4:
   113  	BEQ R4, loop4done
   114  loop4cont:
   115  	// unroll 4X
   116  	MOVV 0(R5), R8
   117  	MOVV 8(R5), R9
   118  	MOVV 16(R5), R10
   119  	MOVV 24(R5), R11
   120  	MOVV 0(R6), R12
   121  	MOVV 8(R6), R13
   122  	MOVV 16(R6), R14
   123  	MOVV 24(R6), R15
   124  	SGTU R28, R8, R30	// SBCS R12, R8, R8
   125  	SUBVU R28, R8	// ...
   126  	SGTU R12, R8, R28	// ...
   127  	SUBVU R12, R8	// ...
   128  	ADDVU R30, R28	// ...
   129  	SGTU R28, R9, R30	// SBCS R13, R9, R9
   130  	SUBVU R28, R9	// ...
   131  	SGTU R13, R9, R28	// ...
   132  	SUBVU R13, R9	// ...
   133  	ADDVU R30, R28	// ...
   134  	SGTU R28, R10, R30	// SBCS R14, R10, R10
   135  	SUBVU R28, R10	// ...
   136  	SGTU R14, R10, R28	// ...
   137  	SUBVU R14, R10	// ...
   138  	ADDVU R30, R28	// ...
   139  	SGTU R28, R11, R30	// SBCS R15, R11, R11
   140  	SUBVU R28, R11	// ...
   141  	SGTU R15, R11, R28	// ...
   142  	SUBVU R15, R11	// ...
   143  	ADDVU R30, R28	// ...
   144  	MOVV R8, 0(R7)
   145  	MOVV R9, 8(R7)
   146  	MOVV R10, 16(R7)
   147  	MOVV R11, 24(R7)
   148  	ADDVU $32, R5
   149  	ADDVU $32, R6
   150  	ADDVU $32, R7
   151  	SUBVU $1, R4
   152  	BNE R4, loop4cont
   153  loop4done:
   154  	MOVV R28, c+72(FP)
   155  	RET
   156  
   157  // func lshVU(z, x []Word, s uint) (c Word)
   158  TEXT ·lshVU(SB), NOSPLIT, $0
   159  	MOVV z_len+8(FP), R4
   160  	BEQ R4, ret0
   161  	MOVV s+48(FP), R5
   162  	MOVV x_base+24(FP), R6
   163  	MOVV z_base+0(FP), R7
   164  	// run loop backward
   165  	SLLV $3, R4, R8
   166  	ADDVU R8, R6
   167  	SLLV $3, R4, R8
   168  	ADDVU R8, R7
   169  	// shift first word into carry
   170  	MOVV -8(R6), R8
   171  	MOVV $64, R9
   172  	SUBVU R5, R9
   173  	SRLV R9, R8, R10
   174  	SLLV R5, R8
   175  	MOVV R10, c+56(FP)
   176  	// shift remaining words
   177  	SUBVU $1, R4
   178  	// compute unrolled loop lengths
   179  	AND $3, R4, R10
   180  	SRLV $2, R4
   181  loop1:
   182  	BEQ R10, loop1done
   183  loop1cont:
   184  	// unroll 1X
   185  	MOVV -16(R6), R11
   186  	SRLV R9, R11, R12
   187  	OR R8, R12
   188  	SLLV R5, R11, R8
   189  	MOVV R12, -8(R7)
   190  	ADDVU $-8, R6
   191  	ADDVU $-8, R7
   192  	SUBVU $1, R10
   193  	BNE R10, loop1cont
   194  loop1done:
   195  loop4:
   196  	BEQ R4, loop4done
   197  loop4cont:
   198  	// unroll 4X
   199  	MOVV -16(R6), R10
   200  	MOVV -24(R6), R11
   201  	MOVV -32(R6), R12
   202  	MOVV -40(R6), R13
   203  	SRLV R9, R10, R14
   204  	OR R8, R14
   205  	SLLV R5, R10, R8
   206  	SRLV R9, R11, R10
   207  	OR R8, R10
   208  	SLLV R5, R11, R8
   209  	SRLV R9, R12, R11
   210  	OR R8, R11
   211  	SLLV R5, R12, R8
   212  	SRLV R9, R13, R12
   213  	OR R8, R12
   214  	SLLV R5, R13, R8
   215  	MOVV R14, -8(R7)
   216  	MOVV R10, -16(R7)
   217  	MOVV R11, -24(R7)
   218  	MOVV R12, -32(R7)
   219  	ADDVU $-32, R6
   220  	ADDVU $-32, R7
   221  	SUBVU $1, R4
   222  	BNE R4, loop4cont
   223  loop4done:
   224  	// store final shifted bits
   225  	MOVV R8, -8(R7)
   226  	RET
   227  ret0:
   228  	MOVV R0, c+56(FP)
   229  	RET
   230  
   231  // func rshVU(z, x []Word, s uint) (c Word)
   232  TEXT ·rshVU(SB), NOSPLIT, $0
   233  	MOVV z_len+8(FP), R4
   234  	BEQ R4, ret0
   235  	MOVV s+48(FP), R5
   236  	MOVV x_base+24(FP), R6
   237  	MOVV z_base+0(FP), R7
   238  	// shift first word into carry
   239  	MOVV 0(R6), R8
   240  	MOVV $64, R9
   241  	SUBVU R5, R9
   242  	SLLV R9, R8, R10
   243  	SRLV R5, R8
   244  	MOVV R10, c+56(FP)
   245  	// shift remaining words
   246  	SUBVU $1, R4
   247  	// compute unrolled loop lengths
   248  	AND $3, R4, R10
   249  	SRLV $2, R4
   250  loop1:
   251  	BEQ R10, loop1done
   252  loop1cont:
   253  	// unroll 1X
   254  	MOVV 8(R6), R11
   255  	SLLV R9, R11, R12
   256  	OR R8, R12
   257  	SRLV R5, R11, R8
   258  	MOVV R12, 0(R7)
   259  	ADDVU $8, R6
   260  	ADDVU $8, R7
   261  	SUBVU $1, R10
   262  	BNE R10, loop1cont
   263  loop1done:
   264  loop4:
   265  	BEQ R4, loop4done
   266  loop4cont:
   267  	// unroll 4X
   268  	MOVV 8(R6), R10
   269  	MOVV 16(R6), R11
   270  	MOVV 24(R6), R12
   271  	MOVV 32(R6), R13
   272  	SLLV R9, R10, R14
   273  	OR R8, R14
   274  	SRLV R5, R10, R8
   275  	SLLV R9, R11, R10
   276  	OR R8, R10
   277  	SRLV R5, R11, R8
   278  	SLLV R9, R12, R11
   279  	OR R8, R11
   280  	SRLV R5, R12, R8
   281  	SLLV R9, R13, R12
   282  	OR R8, R12
   283  	SRLV R5, R13, R8
   284  	MOVV R14, 0(R7)
   285  	MOVV R10, 8(R7)
   286  	MOVV R11, 16(R7)
   287  	MOVV R12, 24(R7)
   288  	ADDVU $32, R6
   289  	ADDVU $32, R7
   290  	SUBVU $1, R4
   291  	BNE R4, loop4cont
   292  loop4done:
   293  	// store final shifted bits
   294  	MOVV R8, 0(R7)
   295  	RET
   296  ret0:
   297  	MOVV R0, c+56(FP)
   298  	RET
   299  
   300  // func mulAddVWW(z, x []Word, m, a Word) (c Word)
   301  TEXT ·mulAddVWW(SB), NOSPLIT, $0
   302  	MOVV m+48(FP), R4
   303  	MOVV a+56(FP), R5
   304  	MOVV z_len+8(FP), R6
   305  	MOVV x_base+24(FP), R7
   306  	MOVV z_base+0(FP), R8
   307  	// compute unrolled loop lengths
   308  	AND $3, R6, R9
   309  	SRLV $2, R6
   310  loop1:
   311  	BEQ R9, loop1done
   312  loop1cont:
   313  	// unroll 1X
   314  	MOVV 0(R7), R10
   315  	// synthetic carry, one column at a time
   316  	MULV R4, R10, R11
   317  	MULHVU R4, R10, R12
   318  	ADDVU R5, R11, R10	// ADDS R5, R11, R10 (cr=R28)
   319  	SGTU R5, R10, R28	// ...
   320  	ADDVU R28, R12, R5	// ADC $0, R12, R5
   321  	MOVV R10, 0(R8)
   322  	ADDVU $8, R7
   323  	ADDVU $8, R8
   324  	SUBVU $1, R9
   325  	BNE R9, loop1cont
   326  loop1done:
   327  loop4:
   328  	BEQ R6, loop4done
   329  loop4cont:
   330  	// unroll 4X
   331  	MOVV 0(R7), R9
   332  	MOVV 8(R7), R10
   333  	MOVV 16(R7), R11
   334  	MOVV 24(R7), R12
   335  	// synthetic carry, one column at a time
   336  	MULV R4, R9, R13
   337  	MULHVU R4, R9, R14
   338  	ADDVU R5, R13, R9	// ADDS R5, R13, R9 (cr=R28)
   339  	SGTU R5, R9, R28	// ...
   340  	ADDVU R28, R14, R5	// ADC $0, R14, R5
   341  	MULV R4, R10, R13
   342  	MULHVU R4, R10, R14
   343  	ADDVU R5, R13, R10	// ADDS R5, R13, R10 (cr=R28)
   344  	SGTU R5, R10, R28	// ...
   345  	ADDVU R28, R14, R5	// ADC $0, R14, R5
   346  	MULV R4, R11, R13
   347  	MULHVU R4, R11, R14
   348  	ADDVU R5, R13, R11	// ADDS R5, R13, R11 (cr=R28)
   349  	SGTU R5, R11, R28	// ...
   350  	ADDVU R28, R14, R5	// ADC $0, R14, R5
   351  	MULV R4, R12, R13
   352  	MULHVU R4, R12, R14
   353  	ADDVU R5, R13, R12	// ADDS R5, R13, R12 (cr=R28)
   354  	SGTU R5, R12, R28	// ...
   355  	ADDVU R28, R14, R5	// ADC $0, R14, R5
   356  	MOVV R9, 0(R8)
   357  	MOVV R10, 8(R8)
   358  	MOVV R11, 16(R8)
   359  	MOVV R12, 24(R8)
   360  	ADDVU $32, R7
   361  	ADDVU $32, R8
   362  	SUBVU $1, R6
   363  	BNE R6, loop4cont
   364  loop4done:
   365  	MOVV R5, c+64(FP)
   366  	RET
   367  
   368  // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
   369  TEXT ·addMulVVWW(SB), NOSPLIT, $0
   370  	MOVV m+72(FP), R4
   371  	MOVV a+80(FP), R5
   372  	MOVV z_len+8(FP), R6
   373  	MOVV x_base+24(FP), R7
   374  	MOVV y_base+48(FP), R8
   375  	MOVV z_base+0(FP), R9
   376  	// compute unrolled loop lengths
   377  	AND $3, R6, R10
   378  	SRLV $2, R6
   379  loop1:
   380  	BEQ R10, loop1done
   381  loop1cont:
   382  	// unroll 1X
   383  	MOVV 0(R7), R11
   384  	MOVV 0(R8), R12
   385  	// synthetic carry, one column at a time
   386  	MULV R4, R12, R13
   387  	MULHVU R4, R12, R14
   388  	ADDVU R11, R13	// ADDS R11, R13, R13 (cr=R28)
   389  	SGTU R11, R13, R28	// ...
   390  	ADDVU R28, R14	// ADC $0, R14, R14
   391  	ADDVU R5, R13, R12	// ADDS R5, R13, R12 (cr=R28)
   392  	SGTU R5, R12, R28	// ...
   393  	ADDVU R28, R14, R5	// ADC $0, R14, R5
   394  	MOVV R12, 0(R9)
   395  	ADDVU $8, R7
   396  	ADDVU $8, R8
   397  	ADDVU $8, R9
   398  	SUBVU $1, R10
   399  	BNE R10, loop1cont
   400  loop1done:
   401  loop4:
   402  	BEQ R6, loop4done
   403  loop4cont:
   404  	// unroll 4X
   405  	MOVV 0(R7), R10
   406  	MOVV 8(R7), R11
   407  	MOVV 16(R7), R12
   408  	MOVV 24(R7), R13
   409  	MOVV 0(R8), R14
   410  	MOVV 8(R8), R15
   411  	MOVV 16(R8), R16
   412  	MOVV 24(R8), R17
   413  	// synthetic carry, one column at a time
   414  	MULV R4, R14, R18
   415  	MULHVU R4, R14, R19
   416  	ADDVU R10, R18	// ADDS R10, R18, R18 (cr=R28)
   417  	SGTU R10, R18, R28	// ...
   418  	ADDVU R28, R19	// ADC $0, R19, R19
   419  	ADDVU R5, R18, R14	// ADDS R5, R18, R14 (cr=R28)
   420  	SGTU R5, R14, R28	// ...
   421  	ADDVU R28, R19, R5	// ADC $0, R19, R5
   422  	MULV R4, R15, R18
   423  	MULHVU R4, R15, R19
   424  	ADDVU R11, R18	// ADDS R11, R18, R18 (cr=R28)
   425  	SGTU R11, R18, R28	// ...
   426  	ADDVU R28, R19	// ADC $0, R19, R19
   427  	ADDVU R5, R18, R15	// ADDS R5, R18, R15 (cr=R28)
   428  	SGTU R5, R15, R28	// ...
   429  	ADDVU R28, R19, R5	// ADC $0, R19, R5
   430  	MULV R4, R16, R18
   431  	MULHVU R4, R16, R19
   432  	ADDVU R12, R18	// ADDS R12, R18, R18 (cr=R28)
   433  	SGTU R12, R18, R28	// ...
   434  	ADDVU R28, R19	// ADC $0, R19, R19
   435  	ADDVU R5, R18, R16	// ADDS R5, R18, R16 (cr=R28)
   436  	SGTU R5, R16, R28	// ...
   437  	ADDVU R28, R19, R5	// ADC $0, R19, R5
   438  	MULV R4, R17, R18
   439  	MULHVU R4, R17, R19
   440  	ADDVU R13, R18	// ADDS R13, R18, R18 (cr=R28)
   441  	SGTU R13, R18, R28	// ...
   442  	ADDVU R28, R19	// ADC $0, R19, R19
   443  	ADDVU R5, R18, R17	// ADDS R5, R18, R17 (cr=R28)
   444  	SGTU R5, R17, R28	// ...
   445  	ADDVU R28, R19, R5	// ADC $0, R19, R5
   446  	MOVV R14, 0(R9)
   447  	MOVV R15, 8(R9)
   448  	MOVV R16, 16(R9)
   449  	MOVV R17, 24(R9)
   450  	ADDVU $32, R7
   451  	ADDVU $32, R8
   452  	ADDVU $32, R9
   453  	SUBVU $1, R6
   454  	BNE R6, loop4cont
   455  loop4done:
   456  	MOVV R5, c+88(FP)
   457  	RET
   458  

View as plain text