1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // input:
9 // R4 = b_base
10 // R5 = b_len
11 // R6 = b_cap (unused)
12 // R7 = byte to find
13 TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
14 AND $0xff, R7
15 JMP indexbytebody<>(SB)
16
17 // input:
18 // R4 = s_base
19 // R5 = s_len
20 // R6 = byte to find
21 TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
22 AND $0xff, R6, R7 // byte to find
23 JMP indexbytebody<>(SB)
24
25 // input:
26 // R4: b_base
27 // R5: len
28 // R7: byte to find
29 TEXT indexbytebody<>(SB),NOSPLIT,$0
30 BEQ R5, notfound // len == 0
31
32 MOVV R4, R6 // store base for later
33 ADDV R4, R5, R8 // end
34
35 MOVV $32, R9
36 BGE R5, R9, lasx
37 tail:
38 MOVV $8, R9
39 BLT R5, R9, lt_8
40 generic8_loop:
41 MOVV (R4), R10
42
43 AND $0xff, R10, R11
44 BEQ R7, R11, found
45
46 BSTRPICKV $15, R10, $8, R11
47 BEQ R7, R11, byte_1th
48
49 BSTRPICKV $23, R10, $16, R11
50 BEQ R7, R11, byte_2th
51
52 BSTRPICKV $31, R10, $24, R11
53 BEQ R7, R11, byte_3th
54
55 BSTRPICKV $39, R10, $32, R11
56 BEQ R7, R11, byte_4th
57
58 BSTRPICKV $47, R10, $40, R11
59 BEQ R7, R11, byte_5th
60
61 BSTRPICKV $55, R10, $48, R11
62 BEQ R7, R11, byte_6th
63
64 BSTRPICKV $63, R10, $56, R11
65 BEQ R7, R11, byte_7th
66
67 ADDV $8, R4
68 ADDV $-8, R5
69 BGE R5, R9, generic8_loop
70
71 lt_8:
72 BEQ R4, R8, notfound
73 MOVBU (R4), R10
74 BEQ R7, R10, found
75 ADDV $1, R4
76 JMP lt_8
77
78 byte_1th:
79 ADDV $1, R4
80 SUBV R6, R4
81 RET
82
83 byte_2th:
84 ADDV $2, R4
85 SUBV R6, R4
86 RET
87
88 byte_3th:
89 ADDV $3, R4
90 SUBV R6, R4
91 RET
92
93 byte_4th:
94 ADDV $4, R4
95 SUBV R6, R4
96 RET
97
98 byte_5th:
99 ADDV $5, R4
100 SUBV R6, R4
101 RET
102
103 byte_6th:
104 ADDV $6, R4
105 SUBV R6, R4
106 RET
107
108 byte_7th:
109 ADDV $7, R4
110
111 found:
112 SUBV R6, R4
113 RET
114
115 notfound:
116 MOVV $-1, R4
117 RET
118
119 lasx:
120 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
121 BEQ R9, lsx
122 XVMOVQ R7, X0.B32
123
124 MOVV $128, R9
125 BLT R5, R9, lasx32_loop
126 lasx128_loop:
127 XVMOVQ 0(R4), X1
128 XVMOVQ 32(R4), X2
129 XVMOVQ 64(R4), X3
130 XVMOVQ 96(R4), X4
131
132 XVSEQB X1, X0, X1
133 XVSETNEV X1, FCC0
134 BFPT lasx_found_add_0
135
136 XVSEQB X2, X0, X1
137 XVSETNEV X1, FCC0
138 BFPT lasx_found_add_32
139
140 XVSEQB X3, X0, X1
141 XVSETNEV X1, FCC0
142 BFPT lasx_found_add_64
143
144 XVSEQB X4, X0, X1
145 XVSETNEV X1, FCC0
146 BFPT lasx_found_add_96
147
148 ADDV $128, R4
149 ADDV $-128, R5
150 BGE R5, R9, lasx128_loop
151
152 BEQ R5, notfound
153
154 MOVV $32, R9
155 BLT R5, R9, tail
156 lasx32_loop:
157 XVMOVQ 0(R4), X1
158
159 XVSEQB X1, X0, X1
160 XVSETNEV X1, FCC0
161 BFPT lasx_found_add_0
162
163 ADDV $32, R4
164 ADDV $-32, R5
165 BGE R5, R9, lasx32_loop
166
167 BEQ R5, notfound
168
169 JMP tail
170
171 lasx_found_add_0:
172 MOVV R0, R11
173 JMP lasx_index_cal
174
175 lasx_found_add_32:
176 MOVV $32, R11
177 JMP lasx_index_cal
178
179 lasx_found_add_64:
180 MOVV $64, R11
181 JMP lasx_index_cal
182
183 lasx_found_add_96:
184 MOVV $96, R11
185 JMP lasx_index_cal
186
187 lasx_index_cal:
188 MOVV $64, R9
189 XVMOVQ X1.V[0], R10
190 CTZV R10, R10
191 BNE R10, R9, index_cal
192 ADDV $8, R11
193
194 XVMOVQ X1.V[1], R10
195 CTZV R10, R10
196 BNE R10, R9, index_cal
197 ADDV $8, R11
198
199 XVMOVQ X1.V[2], R10
200 CTZV R10, R10
201 BNE R10, R9, index_cal
202 ADDV $8, R11
203
204 XVMOVQ X1.V[3], R10
205 CTZV R10, R10
206 JMP index_cal
207
208 lsx:
209 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
210 BEQ R9, tail
211 VMOVQ R7, V0.B16
212
213 MOVV $64, R9
214 BLT R5, R9, lsx16_loop
215 lsx64_loop:
216 VMOVQ 0(R4), V1
217 VMOVQ 16(R4), V2
218 VMOVQ 32(R4), V3
219 VMOVQ 48(R4), V4
220
221 VSEQB V1, V0, V1
222 VSETNEV V1, FCC0
223 BFPT lsx_found_add_0
224
225 VSEQB V2, V0, V1
226 VSETNEV V1, FCC0
227 BFPT lsx_found_add_16
228
229 VSEQB V3, V0, V1
230 VSETNEV V1, FCC0
231 BFPT lsx_found_add_32
232
233 VSEQB V4, V0, V1
234 VSETNEV V1, FCC0
235 BFPT lsx_found_add_48
236
237 ADDV $64, R4
238 ADDV $-64, R5
239 BGE R5, R9, lsx64_loop
240
241 BEQ R5, notfound
242
243 MOVV $16, R9
244 BLT R5, R9, tail
245 lsx16_loop:
246 VMOVQ 0(R4), V1
247
248 VSEQB V1, V0, V1
249 VSETNEV V1, FCC0
250 BFPT lsx_found_add_0
251
252 ADDV $16, R4
253 ADDV $-16, R5
254 BGE R5, R9, lsx16_loop
255
256 BEQ R5, notfound
257
258 JMP tail
259
260 lsx_found_add_0:
261 MOVV R0, R11
262 JMP lsx_index_cal
263
264 lsx_found_add_16:
265 MOVV $16, R11
266 JMP lsx_index_cal
267
268 lsx_found_add_32:
269 MOVV $32, R11
270 JMP lsx_index_cal
271
272 lsx_found_add_48:
273 MOVV $48, R11
274 JMP lsx_index_cal
275
276 lsx_index_cal:
277 MOVV $64, R9
278
279 VMOVQ V1.V[0], R10
280 CTZV R10, R10
281 BNE R10, R9, index_cal
282 ADDV $8, R11
283
284 VMOVQ V1.V[1], R10
285 CTZV R10, R10
286 JMP index_cal
287
288 index_cal:
289 SRLV $3, R10
290 ADDV R11, R10
291 ADDV R10, R4
292 JMP found
293
View as plain text