Text file src/internal/bytealg/equal_loong64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  #define	REGCTXT	R29
     9  
    10  // memequal(a, b unsafe.Pointer, size uintptr) bool
    11  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
    12  	// R4 = a_base
    13  	// R5 = b_base
    14  	// R6 = size
    15  	JMP	equalbody<>(SB)
    16  
    17  // memequal_varlen(a, b unsafe.Pointer) bool
    18  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0
    19  	// R4 = a_base
    20  	// R5 = b_base
    21  	MOVV	8(REGCTXT), R6    // compiler stores size at offset 8 in the closure
    22  	JMP	equalbody<>(SB)
    23  
    24  // input:
    25  //   R4 = a_base
    26  //   R5 = b_base
    27  //   R6 = size
    28  TEXT equalbody<>(SB),NOSPLIT|NOFRAME,$0
    29  	// a_base == b_base
    30  	BEQ	R4, R5, equal
    31  	// 0 bytes
    32  	BEQ	R6, equal
    33  
    34  	MOVV	$64, R7
    35  	BGE	R6, R7, lasx
    36  
    37  	// size < 64 bytes
    38  tail:
    39  	MOVV	$16, R7
    40  	BLT	R6, R7, lt_16
    41  generic16_loop:
    42  	ADDV	$-16, R6
    43  	MOVV	0(R4), R8
    44  	MOVV	8(R4), R9
    45  	MOVV	0(R5), R10
    46  	MOVV	8(R5), R11
    47  	BNE	R8, R10, not_equal
    48  	BNE	R9, R11, not_equal
    49  	BEQ	R6, equal
    50  	ADDV	$16, R4
    51  	ADDV	$16, R5
    52  	BGE	R6, R7, generic16_loop
    53  
    54  	// size < 16 bytes
    55  lt_16:
    56  	MOVV	$8, R7
    57  	BLT	R6, R7, lt_8
    58  	ADDV	$-8, R6
    59  	MOVV	0(R4), R8
    60  	MOVV	0(R5), R9
    61  	BNE	R8, R9, not_equal
    62  	BEQ	R6, equal
    63  	ADDV	$8, R4
    64  	ADDV	$8, R5
    65  
    66  	// size < 8 bytes
    67  lt_8:
    68  	MOVV	$4, R7
    69  	BLT	R6, R7, lt_4
    70  	ADDV	$-4, R6
    71  	MOVW	0(R4), R8
    72  	MOVW	0(R5), R9
    73  	BNE	R8, R9, not_equal
    74  	BEQ	R6, equal
    75  	ADDV	$4, R4
    76  	ADDV	$4, R5
    77  
    78  	// size < 4 bytes
    79  lt_4:
    80  	MOVV	$2, R7
    81  	BLT	R6, R7, lt_2
    82  	ADDV	$-2, R6
    83  	MOVH	0(R4), R8
    84  	MOVH	0(R5), R9
    85  	BNE	R8, R9, not_equal
    86  	BEQ	R6, equal
    87  	ADDV	$2, R4
    88  	ADDV	$2, R5
    89  
    90  	// size < 2 bytes
    91  lt_2:
    92  	MOVB	0(R4), R8
    93  	MOVB	0(R5), R9
    94  	BNE	R8, R9, not_equal
    95  
    96  equal:
    97  	MOVV	$1, R4
    98  	RET
    99  
   100  not_equal:
   101  	MOVV	R0, R4
   102  	RET
   103  
   104  	// Implemented using 256-bit SIMD instructions
   105  lasx:
   106  	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R7
   107  	BEQ	R7, lsx
   108  
   109  lasx256:
   110  	MOVV	$256, R7
   111  	BLT	R6, R7, lasx64
   112  lasx256_loop:
   113  	ADDV	$-256, R6
   114  	XVMOVQ	0(R4), X0
   115  	XVMOVQ	32(R4), X1
   116  	XVMOVQ	64(R4), X2
   117  	XVMOVQ	96(R4), X3
   118  	XVMOVQ	128(R4), X4
   119  	XVMOVQ	160(R4), X5
   120  	XVMOVQ	192(R4), X6
   121  	XVMOVQ	224(R4), X7
   122  	XVMOVQ	0(R5), X8
   123  	XVMOVQ	32(R5), X9
   124  	XVMOVQ	64(R5), X10
   125  	XVMOVQ	96(R5), X11
   126  	XVMOVQ	128(R5), X12
   127  	XVMOVQ	160(R5), X13
   128  	XVMOVQ	192(R5), X14
   129  	XVMOVQ	224(R5), X15
   130  	XVSEQV	X0, X8, X0
   131  	XVSEQV	X1, X9, X1
   132  	XVSEQV	X2, X10, X2
   133  	XVSEQV	X3, X11, X3
   134  	XVSEQV	X4, X12, X4
   135  	XVSEQV	X5, X13, X5
   136  	XVSEQV	X6, X14, X6
   137  	XVSEQV	X7, X15, X7
   138  	XVANDV	X0, X1, X0
   139  	XVANDV	X2, X3, X2
   140  	XVANDV	X4, X5, X4
   141  	XVANDV	X6, X7, X6
   142  	XVANDV	X0, X2, X0
   143  	XVANDV	X4, X6, X4
   144  	XVANDV	X0, X4, X0
   145  	XVSETALLNEV	X0, FCC0
   146  	BFPF	not_equal
   147  	BEQ	R6, equal
   148  	ADDV	$256, R4
   149  	ADDV	$256, R5
   150  	BGE	R6, R7, lasx256_loop
   151  
   152  lasx64:
   153  	MOVV	$64, R7
   154  	BLT	R6, R7, tail
   155  lasx64_loop:
   156  	ADDV	$-64, R6
   157  	XVMOVQ	0(R4), X0
   158  	XVMOVQ	32(R4), X1
   159  	XVMOVQ	0(R5), X2
   160  	XVMOVQ	32(R5), X3
   161  	XVSEQV	X0, X2, X0
   162  	XVSEQV	X1, X3, X1
   163  	XVANDV	X0, X1, X0
   164  	XVSETALLNEV	X0, FCC0
   165  	BFPF	not_equal
   166  	BEQ	R6, equal
   167  	ADDV	$64, R4
   168  	ADDV	$64, R5
   169  	BGE	R6, R7, lasx64_loop
   170  	JMP	tail
   171  
   172  	// Implemented using 128-bit SIMD instructions
   173  lsx:
   174  	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R7
   175  	BEQ	R7, generic64_loop
   176  
   177  lsx128:
   178  	MOVV	$128, R7
   179  	BLT	R6, R7, lsx32
   180  lsx128_loop:
   181  	ADDV	$-128, R6
   182  	VMOVQ	0(R4), V0
   183  	VMOVQ	16(R4), V1
   184  	VMOVQ	32(R4), V2
   185  	VMOVQ	48(R4), V3
   186  	VMOVQ	64(R4), V4
   187  	VMOVQ	80(R4), V5
   188  	VMOVQ	96(R4), V6
   189  	VMOVQ	112(R4), V7
   190  	VMOVQ	0(R5), V8
   191  	VMOVQ	16(R5), V9
   192  	VMOVQ	32(R5), V10
   193  	VMOVQ	48(R5), V11
   194  	VMOVQ	64(R5), V12
   195  	VMOVQ	80(R5), V13
   196  	VMOVQ	96(R5), V14
   197  	VMOVQ	112(R5), V15
   198  	VSEQV	V0, V8, V0
   199  	VSEQV	V1, V9, V1
   200  	VSEQV	V2, V10, V2
   201  	VSEQV	V3, V11, V3
   202  	VSEQV	V4, V12, V4
   203  	VSEQV	V5, V13, V5
   204  	VSEQV	V6, V14, V6
   205  	VSEQV	V7, V15, V7
   206  	VANDV	V0, V1, V0
   207  	VANDV	V2, V3, V2
   208  	VANDV	V4, V5, V4
   209  	VANDV	V6, V7, V6
   210  	VANDV	V0, V2, V0
   211  	VANDV	V4, V6, V4
   212  	VANDV	V0, V4, V0
   213  	VSETALLNEV	V0, FCC0
   214  	BFPF	not_equal
   215  	BEQ	R6, equal
   216  
   217  	ADDV	$128, R4
   218  	ADDV	$128, R5
   219  	BGE	R6, R7, lsx128_loop
   220  
   221  lsx32:
   222  	MOVV	$32, R7
   223  	BLT	R6, R7, tail
   224  lsx32_loop:
   225  	ADDV	$-32, R6
   226  	VMOVQ	0(R4), V0
   227  	VMOVQ	16(R4), V1
   228  	VMOVQ	0(R5), V2
   229  	VMOVQ	16(R5), V3
   230  	VSEQV	V0, V2, V0
   231  	VSEQV	V1, V3, V1
   232  	VANDV	V0, V1, V0
   233  	VSETALLNEV	V0, FCC0
   234  	BFPF	not_equal
   235  	BEQ	R6, equal
   236  	ADDV	$32, R4
   237  	ADDV	$32, R5
   238  	BGE	R6, R7, lsx32_loop
   239  	JMP tail
   240  
   241  	// Implemented using general instructions
   242  generic64_loop:
   243  	ADDV	$-64, R6
   244  	MOVV	0(R4), R7
   245  	MOVV	8(R4), R8
   246  	MOVV	16(R4), R9
   247  	MOVV	24(R4), R10
   248  	MOVV	0(R5), R15
   249  	MOVV	8(R5), R16
   250  	MOVV	16(R5), R17
   251  	MOVV	24(R5), R18
   252  	BNE	R7, R15, not_equal
   253  	BNE	R8, R16, not_equal
   254  	BNE	R9, R17, not_equal
   255  	BNE	R10, R18, not_equal
   256  	MOVV	32(R4), R11
   257  	MOVV	40(R4), R12
   258  	MOVV	48(R4), R13
   259  	MOVV	56(R4), R14
   260  	MOVV	32(R5), R19
   261  	MOVV	40(R5), R20
   262  	MOVV	48(R5), R21
   263  	MOVV	56(R5), R23
   264  	BNE	R11, R19, not_equal
   265  	BNE	R12, R20, not_equal
   266  	BNE	R13, R21, not_equal
   267  	BNE	R14, R23, not_equal
   268  	BEQ	R6, equal
   269  	ADDV	$64, R4
   270  	ADDV	$64, R5
   271  	MOVV	$64, R7
   272  	BGE	R6, R7, generic64_loop
   273  	JMP tail
   274  

View as plain text