Text file src/internal/bytealg/indexbyte_loong64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // input:
     9  //   R4 = b_base
    10  //   R5 = b_len
    11  //   R6 = b_cap (unused)
    12  //   R7 = byte to find
    13  TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
    14  	AND	$0xff, R7
    15  	JMP	indexbytebody<>(SB)
    16  
    17  // input:
    18  //   R4 = s_base
    19  //   R5 = s_len
    20  //   R6 = byte to find
    21  TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
    22  	AND	$0xff, R6, R7	// byte to find
    23  	JMP	indexbytebody<>(SB)
    24  
    25  // input:
    26  //   R4: b_base
    27  //   R5: len
    28  //   R7: byte to find
    29  TEXT indexbytebody<>(SB),NOSPLIT,$0
    30  	BEQ	R5, notfound	// len == 0
    31  
    32  	MOVV	R4, R6		// store base for later
    33  	ADDV	R4, R5, R8	// end
    34  
    35  	MOVV	$32, R9
    36  	BGE	R5, R9, lasx
    37  tail:
    38  	MOVV	$8, R9
    39  	BLT	R5, R9, lt_8
    40  generic8_loop:
    41  	MOVV	(R4), R10
    42  
    43  	AND	$0xff, R10, R11
    44  	BEQ	R7, R11, found
    45  
    46  	BSTRPICKV	$15, R10, $8, R11
    47  	BEQ	R7, R11, byte_1th
    48  
    49  	BSTRPICKV	$23, R10, $16, R11
    50  	BEQ	R7, R11, byte_2th
    51  
    52  	BSTRPICKV	$31, R10, $24, R11
    53  	BEQ	R7, R11, byte_3th
    54  
    55  	BSTRPICKV	$39, R10, $32, R11
    56  	BEQ	R7, R11, byte_4th
    57  
    58  	BSTRPICKV	$47, R10, $40, R11
    59  	BEQ	R7, R11, byte_5th
    60  
    61  	BSTRPICKV	$55, R10, $48, R11
    62  	BEQ	R7, R11, byte_6th
    63  
    64  	BSTRPICKV	$63, R10, $56, R11
    65  	BEQ	R7, R11, byte_7th
    66  
    67  	ADDV	$8, R4
    68  	ADDV	$-8, R5
    69  	BGE	R5, R9, generic8_loop
    70  
    71  lt_8:
    72  	BEQ	R4, R8, notfound
    73  	MOVBU	(R4), R10
    74  	BEQ	R7, R10, found
    75  	ADDV	$1, R4
    76  	JMP	lt_8
    77  
    78  byte_1th:
    79  	ADDV	$1, R4
    80  	SUBV	R6, R4
    81  	RET
    82  
    83  byte_2th:
    84  	ADDV	$2, R4
    85  	SUBV	R6, R4
    86  	RET
    87  
    88  byte_3th:
    89  	ADDV	$3, R4
    90  	SUBV	R6, R4
    91  	RET
    92  
    93  byte_4th:
    94  	ADDV	$4, R4
    95  	SUBV	R6, R4
    96  	RET
    97  
    98  byte_5th:
    99  	ADDV	$5, R4
   100  	SUBV	R6, R4
   101  	RET
   102  
   103  byte_6th:
   104  	ADDV	$6, R4
   105  	SUBV	R6, R4
   106  	RET
   107  
   108  byte_7th:
   109  	ADDV	$7, R4
   110  
   111  found:
   112  	SUBV	R6, R4
   113  	RET
   114  
   115  notfound:
   116  	MOVV	$-1, R4
   117  	RET
   118  
   119  lasx:
   120  	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
   121  	BEQ     R9, lsx
   122  	XVMOVQ	R7, X0.B32
   123  
   124  	MOVV	$128, R9
   125  	BLT	R5, R9, lasx32_loop
   126  lasx128_loop:
   127  	XVMOVQ	0(R4), X1
   128  	XVMOVQ	32(R4), X2
   129  	XVMOVQ	64(R4), X3
   130  	XVMOVQ	96(R4), X4
   131  
   132  	XVSEQB	X1, X0, X1
   133  	XVSETNEV	X1, FCC0
   134  	BFPT	lasx_found_add_0
   135  
   136  	XVSEQB	X2, X0, X1
   137  	XVSETNEV	X1, FCC0
   138  	BFPT	lasx_found_add_32
   139  
   140  	XVSEQB	X3, X0, X1
   141  	XVSETNEV	X1, FCC0
   142  	BFPT	lasx_found_add_64
   143  
   144  	XVSEQB	X4, X0, X1
   145  	XVSETNEV	X1, FCC0
   146  	BFPT	lasx_found_add_96
   147  
   148  	ADDV	$128, R4
   149  	ADDV	$-128, R5
   150  	BGE	R5, R9, lasx128_loop
   151  
   152  	BEQ	R5, notfound
   153  
   154  	MOVV	$32, R9
   155  	BLT	R5, R9, tail
   156  lasx32_loop:
   157  	XVMOVQ	0(R4), X1
   158  
   159  	XVSEQB	X1, X0, X1
   160  	XVSETNEV	X1, FCC0
   161  	BFPT	lasx_found_add_0
   162  
   163  	ADDV	$32, R4
   164  	ADDV	$-32, R5
   165  	BGE	R5, R9, lasx32_loop
   166  
   167  	BEQ	R5, notfound
   168  
   169  	JMP	tail
   170  
   171  lasx_found_add_0:
   172  	MOVV	R0, R11
   173  	JMP	lasx_index_cal
   174  
   175  lasx_found_add_32:
   176  	MOVV	$32, R11
   177  	JMP	lasx_index_cal
   178  
   179  lasx_found_add_64:
   180  	MOVV	$64, R11
   181  	JMP	lasx_index_cal
   182  
   183  lasx_found_add_96:
   184  	MOVV	$96, R11
   185  	JMP	lasx_index_cal
   186  
   187  lasx_index_cal:
   188  	MOVV	$64, R9
   189  	XVMOVQ	X1.V[0], R10
   190  	CTZV	R10, R10
   191  	BNE	R10, R9, index_cal
   192  	ADDV	$8, R11
   193  
   194  	XVMOVQ	X1.V[1], R10
   195  	CTZV	R10, R10
   196  	BNE	R10, R9, index_cal
   197  	ADDV	$8, R11
   198  
   199  	XVMOVQ	X1.V[2], R10
   200  	CTZV	R10, R10
   201  	BNE	R10, R9, index_cal
   202  	ADDV	$8, R11
   203  
   204  	XVMOVQ	X1.V[3], R10
   205  	CTZV	R10, R10
   206  	JMP	index_cal
   207  
   208  lsx:
   209  	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
   210  	BEQ     R9, tail
   211  	VMOVQ	R7, V0.B16
   212  
   213  	MOVV	$64, R9
   214  	BLT	R5, R9, lsx16_loop
   215  lsx64_loop:
   216  	VMOVQ	0(R4), V1
   217  	VMOVQ	16(R4), V2
   218  	VMOVQ	32(R4), V3
   219  	VMOVQ	48(R4), V4
   220  
   221  	VSEQB	V1, V0, V1
   222  	VSETNEV	V1, FCC0
   223  	BFPT	lsx_found_add_0
   224  
   225  	VSEQB	V2, V0, V1
   226  	VSETNEV	V1, FCC0
   227  	BFPT	lsx_found_add_16
   228  
   229  	VSEQB	V3, V0, V1
   230  	VSETNEV	V1, FCC0
   231  	BFPT	lsx_found_add_32
   232  
   233  	VSEQB	V4, V0, V1
   234  	VSETNEV	V1, FCC0
   235  	BFPT	lsx_found_add_48
   236  
   237  	ADDV	$64, R4
   238  	ADDV	$-64, R5
   239  	BGE	R5, R9, lsx64_loop
   240  
   241  	BEQ	R5, notfound
   242  
   243  	MOVV	$16, R9
   244  	BLT	R5, R9, tail
   245  lsx16_loop:
   246  	VMOVQ	0(R4), V1
   247  
   248  	VSEQB	V1, V0, V1
   249  	VSETNEV	V1, FCC0
   250  	BFPT	lsx_found_add_0
   251  
   252  	ADDV	$16, R4
   253  	ADDV	$-16, R5
   254  	BGE	R5, R9, lsx16_loop
   255  
   256  	BEQ	R5, notfound
   257  
   258  	JMP	tail
   259  
   260  lsx_found_add_0:
   261  	MOVV	R0, R11
   262  	JMP	lsx_index_cal
   263  
   264  lsx_found_add_16:
   265  	MOVV	$16, R11
   266  	JMP	lsx_index_cal
   267  
   268  lsx_found_add_32:
   269  	MOVV	$32, R11
   270  	JMP	lsx_index_cal
   271  
   272  lsx_found_add_48:
   273  	MOVV	$48, R11
   274  	JMP	lsx_index_cal
   275  
   276  lsx_index_cal:
   277  	MOVV	$64, R9
   278  
   279  	VMOVQ	V1.V[0], R10
   280  	CTZV	R10, R10
   281  	BNE	R10, R9, index_cal
   282  	ADDV	$8, R11
   283  
   284  	VMOVQ	V1.V[1], R10
   285  	CTZV	R10, R10
   286  	JMP	index_cal
   287  
   288  index_cal:
   289  	SRLV	$3, R10
   290  	ADDV	R11, R10
   291  	ADDV	R10, R4
   292  	JMP	found
   293  

View as plain text