1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Lowering of mul, div, and mod operations.
6 // Runs after prove, so that prove can analyze div and mod ops
7 // directly instead of these obscured expansions,
8 // but before decompose builtin, so that 32-bit systems
9 // can still lower 64-bit ops to 32-bit ones.
10 //
11 // See ../magic.go for a detailed description of these algorithms.
12 // See test/codegen/divmod.go for tests.
13
14 // Unsigned div and mod by power of 2 handled in generic.rules.
15 // (The equivalent unsigned right shift and mask are simple enough for prove to analyze.)
16
17 // Signed divide by power of 2.
18 // n / c = n >> log(c) if n >= 0
19 // = (n+c-1) >> log(c) if n < 0
20 // We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
21 (Div8 <t> n (Const8 [c])) && isPowerOfTwo(c) =>
22 (Rsh8x64
23 (Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [int64( 8-log8(c))])))
24 (Const64 <typ.UInt64> [int64(log8(c))]))
25 (Div16 <t> n (Const16 [c])) && isPowerOfTwo(c) =>
26 (Rsh16x64
27 (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [int64(16-log16(c))])))
28 (Const64 <typ.UInt64> [int64(log16(c))]))
29 (Div32 <t> n (Const32 [c])) && isPowerOfTwo(c) =>
30 (Rsh32x64
31 (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [int64(32-log32(c))])))
32 (Const64 <typ.UInt64> [int64(log32(c))]))
33 (Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) =>
34 (Rsh64x64
35 (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [int64(64-log64(c))])))
36 (Const64 <typ.UInt64> [int64(log64(c))]))
37
38 // Divide, not a power of 2, by strength reduction to double-width multiply and shift.
39 //
40 // umagicN(c) computes m, s such that N-bit unsigned divide
41 // x/c = (x*((1<<N)+m))>>N>>s = ((x*m)>>N+x)>>s
42 // where the multiplies are unsigned.
43 // Note that the returned m is always N+1 bits; umagicN omits the high 1<<N bit.
44 // The difficult part is implementing the 2N+1-bit multiply,
45 // since in general we have only a 2N-bit multiply available.
46 //
47 // smagic(c) computes m, s such that N-bit signed divide
48 // x/c = (x*m)>>N>>s - bool2int(x < 0).
49 // Here m is an unsigned N-bit number but x is signed.
50 //
51 // In general the division cases are:
52 //
53 // 1. A signed divide where 2N ≤ the register size.
54 // This form can use the signed algorithm directly.
55 //
56 // 2. A signed divide where m is even.
57 // This form can use a signed double-width multiply with m/2,
58 // shifting by s-1.
59 //
60 // 3. A signed divide where m is odd.
61 // This form can use x*m = ((x*(m-2^N))>>N+x) with a signed multiply.
62 // Since intN(m) is m-2^N < 0, the product and x have different signs,
63 // so there can be no overflow on the addition.
64 //
65 // 4. An unsigned divide where we know x < 1<<(N-1).
66 // This form can use the signed algorithm without the bool2int fixup,
67 // and since we know the product is only 2N-1 bits, we can use an
68 // unsigned multiply to obtain the high N bits directly, regardless
69 // of whether m is odd or even.
70 //
71 // 5. An unsigned divide where 2N+1 ≤ the register size.
72 // This form uses the unsigned algorithm with an explicit (1<<N)+m.
73 //
74 // 6. An unsigned divide where the N+1-bit m is even.
75 // This form can use an N-bit m/2 instead and shift one less bit.
76 //
77 // 7. An unsigned divide where m is odd but c is even.
78 // This form can shift once and then divide by (c/2) instead.
79 // The magic number m for c is ⌈2^k/c⌉, so we can use
80 // (m+1)/2 = ⌈2^k/(c/2)⌉ instead.
81 //
82 // 8. A general unsigned divide using an avg instruction.
83 // We noted above that (x*((1<<N)+m))>>N>>s = ((x*m)>>N+x)>>s.
84 // Let hi = (x*m)>>N, so we want (hi+x) >> s = avg(hi, x) >> (s-1).
85
86 // Case 1. Signed divides where 2N ≤ register size.
87 (Div8 <t> x (Const8 [c])) && smagicOK8(c) =>
88 (Sub8 <t>
89 (Rsh32x64 <t>
90 (Mul32 <typ.UInt32> (SignExt8to32 x) (Const32 <typ.UInt32> [int32(smagic8(c).m)]))
91 (Const64 <typ.UInt64> [8 + smagic8(c).s]))
92 (Rsh32x64 <t> (SignExt8to32 x) (Const64 <typ.UInt64> [31])))
93 (Div16 <t> x (Const16 [c])) && smagicOK16(c) =>
94 (Sub16 <t>
95 (Rsh32x64 <t>
96 (Mul32 <typ.UInt32> (SignExt16to32 x) (Const32 <typ.UInt32> [int32(smagic16(c).m)]))
97 (Const64 <typ.UInt64> [16 + smagic16(c).s]))
98 (Rsh32x64 <t> (SignExt16to32 x) (Const64 <typ.UInt64> [31])))
99 (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 8 =>
100 (Sub32 <t>
101 (Rsh64x64 <t>
102 (Mul64 <typ.UInt64> (SignExt32to64 x) (Const64 <typ.UInt64> [int64(smagic32(c).m)]))
103 (Const64 <typ.UInt64> [32 + smagic32(c).s]))
104 (Rsh64x64 <t> (SignExt32to64 x) (Const64 <typ.UInt64> [63])))
105
106 // Case 2. Signed divides where m is even.
107 (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 =>
108 (Sub32 <t>
109 (Rsh32x64 <t>
110 (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)]))
111 (Const64 <typ.UInt64> [smagic32(c).s - 1]))
112 (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
113 (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 =>
114 (Sub64 <t>
115 (Rsh64x64 <t>
116 (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)]))
117 (Const64 <typ.UInt64> [smagic64(c).s - 1]))
118 (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
119
120 // Case 3. Signed divides where m is odd.
121 (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 =>
122 (Sub32 <t>
123 (Rsh32x64 <t>
124 (Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])))
125 (Const64 <typ.UInt64> [smagic32(c).s]))
126 (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
127 (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 =>
128 (Sub64 <t>
129 (Rsh64x64 <t>
130 (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])))
131 (Const64 <typ.UInt64> [smagic64(c).s]))
132 (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
133
134 // Case 4. Unsigned divide where x < 1<<(N-1).
135 // Skip Div8u since case 5's handling is just as good.
136 (Div16u <t> x (Const16 [c])) && t.IsSigned() && smagicOK16(c) =>
137 (Rsh32Ux64 <t>
138 (Mul32 <typ.UInt32> (SignExt16to32 x) (Const32 <typ.UInt32> [int32(smagic16(c).m)]))
139 (Const64 <typ.UInt64> [16 + smagic16(c).s]))
140 (Div32u <t> x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 8 =>
141 (Rsh64Ux64 <t>
142 (Mul64 <typ.UInt64> (SignExt32to64 x) (Const64 <typ.UInt64> [int64(smagic32(c).m)]))
143 (Const64 <typ.UInt64> [32 + smagic32(c).s]))
144 (Div32u <t> x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 =>
145 (Rsh32Ux64 <t>
146 (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))
147 (Const64 <typ.UInt64> [smagic32(c).s]))
148 (Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) =>
149 (Rsh64Ux64 <t>
150 (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))
151 (Const64 <typ.UInt64> [smagic64(c).s]))
152
153 // Case 5. Unsigned divide where 2N+1 ≤ register size.
154 (Div8u <t> x (Const8 [c])) && umagicOK8(c) =>
155 (Trunc32to8 <t>
156 (Rsh32Ux64 <typ.UInt32>
157 (Mul32 <typ.UInt32> (ZeroExt8to32 x) (Const32 <typ.UInt32> [int32(1<<8 + umagic8(c).m)]))
158 (Const64 <typ.UInt64> [8 + umagic8(c).s])))
159 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 8 =>
160 (Trunc64to16 <t>
161 (Rsh64Ux64 <typ.UInt64>
162 (Mul64 <typ.UInt64> (ZeroExt16to64 x) (Const64 <typ.UInt64> [int64(1<<16 + umagic16(c).m)]))
163 (Const64 <typ.UInt64> [16 + umagic16(c).s])))
164
165 // Case 6. Unsigned divide where m is even.
166 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && umagic16(c).m&1 == 0 =>
167 (Trunc32to16 <t>
168 (Rsh32Ux64 <typ.UInt32>
169 (Mul32 <typ.UInt32> (ZeroExt16to32 x) (Const32 <typ.UInt32> [int32(1<<15 + umagic16(c).m/2)]))
170 (Const64 <typ.UInt64> [16 + umagic16(c).s - 1])))
171 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 8 =>
172 (Trunc64to32 <t>
173 (Rsh64Ux64 <typ.UInt64>
174 (Mul64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [int64(1<<31 + umagic32(c).m/2)]))
175 (Const64 <typ.UInt64> [32 + umagic32(c).s - 1])))
176 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 =>
177 (Rsh32Ux64 <t>
178 (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)]))
179 (Const64 <typ.UInt64> [umagic32(c).s - 1]))
180 (Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 =>
181 (Rsh64Ux64 <t>
182 (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)]))
183 (Const64 <typ.UInt64> [umagic64(c).s - 1]))
184
185 // Case 7. Unsigned divide where c is even.
186 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && c&1 == 0 =>
187 (Trunc32to16 <t>
188 (Rsh32Ux64 <typ.UInt32>
189 (Mul32 <typ.UInt32>
190 (Rsh32Ux64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [1]))
191 (Const32 <typ.UInt32> [int32(1<<15 + (umagic16(c).m+1)/2)]))
192 (Const64 <typ.UInt64> [16 + umagic16(c).s - 2])))
193 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && c&1 == 0 =>
194 (Trunc64to32 <t>
195 (Rsh64Ux64 <typ.UInt64>
196 (Mul64 <typ.UInt64>
197 (Rsh64Ux64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [1]))
198 (Const64 <typ.UInt64> [int64(1<<31 + (umagic32(c).m+1)/2)]))
199 (Const64 <typ.UInt64> [32 + umagic32(c).s - 2])))
200 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 =>
201 (Rsh32Ux64 <t>
202 (Hmul32u <typ.UInt32>
203 (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1]))
204 (Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)]))
205 (Const64 <typ.UInt64> [umagic32(c).s - 2]))
206 (Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 =>
207 (Rsh64Ux64 <t>
208 (Hmul64u <typ.UInt64>
209 (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1]))
210 (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)]))
211 (Const64 <typ.UInt64> [umagic64(c).s - 2]))
212
213 // Case 8. Unsigned divide using avg.
214 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 =>
215 (Trunc32to16 <t>
216 (Rsh32Ux64 <typ.UInt32>
217 (Avg32u
218 (Lsh32x64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [16]))
219 (Mul32 <typ.UInt32> (ZeroExt16to32 x) (Const32 <typ.UInt32> [int32(umagic16(c).m)])))
220 (Const64 <typ.UInt64> [16 + umagic16(c).s - 1])))
221 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 =>
222 (Trunc64to32 <t>
223 (Rsh64Ux64 <typ.UInt64>
224 (Avg64u
225 (Lsh64x64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [32]))
226 (Mul64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt32> [int64(umagic32(c).m)])))
227 (Const64 <typ.UInt64> [32 + umagic32(c).s - 1])))
228 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 =>
229 (Rsh32Ux64 <t>
230 (Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)])))
231 (Const64 <typ.UInt64> [umagic32(c).s - 1]))
232 (Div64u <t> x (Const64 [c])) && umagicOK64(c) =>
233 (Rsh64Ux64 <t>
234 (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)])))
235 (Const64 <typ.UInt64> [umagic64(c).s - 1]))
236
View as plain text