1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 #define REGCTXT R29
9
10 // memequal(a, b unsafe.Pointer, size uintptr) bool
11 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
12 // R4 = a_base
13 // R5 = b_base
14 // R6 = size
15 JMP equalbody<>(SB)
16
17 // memequal_varlen(a, b unsafe.Pointer) bool
18 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0
19 // R4 = a_base
20 // R5 = b_base
21 MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure
22 JMP equalbody<>(SB)
23
24 // input:
25 // R4 = a_base
26 // R5 = b_base
27 // R6 = size
28 TEXT equalbody<>(SB),NOSPLIT|NOFRAME,$0
29 // a_base == b_base
30 BEQ R4, R5, equal
31 // 0 bytes
32 BEQ R6, equal
33
34 MOVV $64, R7
35 BGE R6, R7, lasx
36
37 // size < 64 bytes
38 tail:
39 MOVV $16, R7
40 BLT R6, R7, lt_16
41 generic16_loop:
42 ADDV $-16, R6
43 MOVV 0(R4), R8
44 MOVV 8(R4), R9
45 MOVV 0(R5), R10
46 MOVV 8(R5), R11
47 BNE R8, R10, not_equal
48 BNE R9, R11, not_equal
49 BEQ R6, equal
50 ADDV $16, R4
51 ADDV $16, R5
52 BGE R6, R7, generic16_loop
53
54 // size < 16 bytes
55 lt_16:
56 MOVV $8, R7
57 BLT R6, R7, lt_8
58 ADDV $-8, R6
59 MOVV 0(R4), R8
60 MOVV 0(R5), R9
61 BNE R8, R9, not_equal
62 BEQ R6, equal
63 ADDV $8, R4
64 ADDV $8, R5
65
66 // size < 8 bytes
67 lt_8:
68 MOVV $4, R7
69 BLT R6, R7, lt_4
70 ADDV $-4, R6
71 MOVW 0(R4), R8
72 MOVW 0(R5), R9
73 BNE R8, R9, not_equal
74 BEQ R6, equal
75 ADDV $4, R4
76 ADDV $4, R5
77
78 // size < 4 bytes
79 lt_4:
80 MOVV $2, R7
81 BLT R6, R7, lt_2
82 ADDV $-2, R6
83 MOVH 0(R4), R8
84 MOVH 0(R5), R9
85 BNE R8, R9, not_equal
86 BEQ R6, equal
87 ADDV $2, R4
88 ADDV $2, R5
89
90 // size < 2 bytes
91 lt_2:
92 MOVB 0(R4), R8
93 MOVB 0(R5), R9
94 BNE R8, R9, not_equal
95
96 equal:
97 MOVV $1, R4
98 RET
99
100 not_equal:
101 MOVV R0, R4
102 RET
103
104 // Implemented using 256-bit SIMD instructions
105 lasx:
106 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R7
107 BEQ R7, lsx
108
109 lasx256:
110 MOVV $256, R7
111 BLT R6, R7, lasx64
112 lasx256_loop:
113 ADDV $-256, R6
114 XVMOVQ 0(R4), X0
115 XVMOVQ 32(R4), X1
116 XVMOVQ 64(R4), X2
117 XVMOVQ 96(R4), X3
118 XVMOVQ 128(R4), X4
119 XVMOVQ 160(R4), X5
120 XVMOVQ 192(R4), X6
121 XVMOVQ 224(R4), X7
122 XVMOVQ 0(R5), X8
123 XVMOVQ 32(R5), X9
124 XVMOVQ 64(R5), X10
125 XVMOVQ 96(R5), X11
126 XVMOVQ 128(R5), X12
127 XVMOVQ 160(R5), X13
128 XVMOVQ 192(R5), X14
129 XVMOVQ 224(R5), X15
130 XVSEQV X0, X8, X0
131 XVSEQV X1, X9, X1
132 XVSEQV X2, X10, X2
133 XVSEQV X3, X11, X3
134 XVSEQV X4, X12, X4
135 XVSEQV X5, X13, X5
136 XVSEQV X6, X14, X6
137 XVSEQV X7, X15, X7
138 XVANDV X0, X1, X0
139 XVANDV X2, X3, X2
140 XVANDV X4, X5, X4
141 XVANDV X6, X7, X6
142 XVANDV X0, X2, X0
143 XVANDV X4, X6, X4
144 XVANDV X0, X4, X0
145 XVSETALLNEV X0, FCC0
146 BFPF not_equal
147 BEQ R6, equal
148 ADDV $256, R4
149 ADDV $256, R5
150 BGE R6, R7, lasx256_loop
151
152 lasx64:
153 MOVV $64, R7
154 BLT R6, R7, tail
155 lasx64_loop:
156 ADDV $-64, R6
157 XVMOVQ 0(R4), X0
158 XVMOVQ 32(R4), X1
159 XVMOVQ 0(R5), X2
160 XVMOVQ 32(R5), X3
161 XVSEQV X0, X2, X0
162 XVSEQV X1, X3, X1
163 XVANDV X0, X1, X0
164 XVSETALLNEV X0, FCC0
165 BFPF not_equal
166 BEQ R6, equal
167 ADDV $64, R4
168 ADDV $64, R5
169 BGE R6, R7, lasx64_loop
170 JMP tail
171
172 // Implemented using 128-bit SIMD instructions
173 lsx:
174 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R7
175 BEQ R7, generic64_loop
176
177 lsx128:
178 MOVV $128, R7
179 BLT R6, R7, lsx32
180 lsx128_loop:
181 ADDV $-128, R6
182 VMOVQ 0(R4), V0
183 VMOVQ 16(R4), V1
184 VMOVQ 32(R4), V2
185 VMOVQ 48(R4), V3
186 VMOVQ 64(R4), V4
187 VMOVQ 80(R4), V5
188 VMOVQ 96(R4), V6
189 VMOVQ 112(R4), V7
190 VMOVQ 0(R5), V8
191 VMOVQ 16(R5), V9
192 VMOVQ 32(R5), V10
193 VMOVQ 48(R5), V11
194 VMOVQ 64(R5), V12
195 VMOVQ 80(R5), V13
196 VMOVQ 96(R5), V14
197 VMOVQ 112(R5), V15
198 VSEQV V0, V8, V0
199 VSEQV V1, V9, V1
200 VSEQV V2, V10, V2
201 VSEQV V3, V11, V3
202 VSEQV V4, V12, V4
203 VSEQV V5, V13, V5
204 VSEQV V6, V14, V6
205 VSEQV V7, V15, V7
206 VANDV V0, V1, V0
207 VANDV V2, V3, V2
208 VANDV V4, V5, V4
209 VANDV V6, V7, V6
210 VANDV V0, V2, V0
211 VANDV V4, V6, V4
212 VANDV V0, V4, V0
213 VSETALLNEV V0, FCC0
214 BFPF not_equal
215 BEQ R6, equal
216
217 ADDV $128, R4
218 ADDV $128, R5
219 BGE R6, R7, lsx128_loop
220
221 lsx32:
222 MOVV $32, R7
223 BLT R6, R7, tail
224 lsx32_loop:
225 ADDV $-32, R6
226 VMOVQ 0(R4), V0
227 VMOVQ 16(R4), V1
228 VMOVQ 0(R5), V2
229 VMOVQ 16(R5), V3
230 VSEQV V0, V2, V0
231 VSEQV V1, V3, V1
232 VANDV V0, V1, V0
233 VSETALLNEV V0, FCC0
234 BFPF not_equal
235 BEQ R6, equal
236 ADDV $32, R4
237 ADDV $32, R5
238 BGE R6, R7, lsx32_loop
239 JMP tail
240
241 // Implemented using general instructions
242 generic64_loop:
243 ADDV $-64, R6
244 MOVV 0(R4), R7
245 MOVV 8(R4), R8
246 MOVV 16(R4), R9
247 MOVV 24(R4), R10
248 MOVV 0(R5), R15
249 MOVV 8(R5), R16
250 MOVV 16(R5), R17
251 MOVV 24(R5), R18
252 BNE R7, R15, not_equal
253 BNE R8, R16, not_equal
254 BNE R9, R17, not_equal
255 BNE R10, R18, not_equal
256 MOVV 32(R4), R11
257 MOVV 40(R4), R12
258 MOVV 48(R4), R13
259 MOVV 56(R4), R14
260 MOVV 32(R5), R19
261 MOVV 40(R5), R20
262 MOVV 48(R5), R21
263 MOVV 56(R5), R23
264 BNE R11, R19, not_equal
265 BNE R12, R20, not_equal
266 BNE R13, R21, not_equal
267 BNE R14, R23, not_equal
268 BEQ R6, equal
269 ADDV $64, R4
270 ADDV $64, R5
271 MOVV $64, R7
272 BGE R6, R7, generic64_loop
273 JMP tail
274
View as plain text