Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/reflectdata"
    12  	"cmd/compile/internal/rttype"
    13  	"cmd/compile/internal/typecheck"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/s390x"
    17  	"cmd/internal/objabi"
    18  	"cmd/internal/src"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"internal/buildcfg"
    22  	"io"
    23  	"math"
    24  	"math/bits"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  )
    29  
    30  type deadValueChoice bool
    31  
    32  const (
    33  	leaveDeadValues  deadValueChoice = false
    34  	removeDeadValues                 = true
    35  
    36  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    37  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    38  )
    39  
    40  // deadcode indicates whether rewrite should try to remove any values that become dead.
    41  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    42  	// repeat rewrites until we find no more rewrites
    43  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    44  	pendingLines.clear()
    45  	debug := f.pass.debug
    46  	if debug > 1 {
    47  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    48  	}
    49  	// if the number of rewrite iterations reaches itersLimit we will
    50  	// at that point turn on cycle detection. Instead of a fixed limit,
    51  	// size the limit according to func size to allow for cases such
    52  	// as the one in issue #66773.
    53  	itersLimit := f.NumBlocks()
    54  	if itersLimit < 20 {
    55  		itersLimit = 20
    56  	}
    57  	var iters int
    58  	var states map[string]bool
    59  	for {
    60  		if debug > 1 {
    61  			fmt.Printf("%s: iter %d\n", f.pass.name, iters)
    62  		}
    63  		change := false
    64  		deadChange := false
    65  		for _, b := range f.Blocks {
    66  			var b0 *Block
    67  			if debug > 1 {
    68  				fmt.Printf("%s: start block\n", f.pass.name)
    69  				b0 = new(Block)
    70  				*b0 = *b
    71  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    72  			}
    73  			for i, c := range b.ControlValues() {
    74  				for c.Op == OpCopy {
    75  					c = c.Args[0]
    76  					b.ReplaceControl(i, c)
    77  				}
    78  			}
    79  			if rb(b) {
    80  				change = true
    81  				if debug > 1 {
    82  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    83  				}
    84  			}
    85  			for j, v := range b.Values {
    86  				if debug > 1 {
    87  					fmt.Printf("%s: consider %v\n", f.pass.name, v.LongString())
    88  				}
    89  				var v0 *Value
    90  				if debug > 1 {
    91  					v0 = new(Value)
    92  					*v0 = *v
    93  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    94  				}
    95  				if v.Uses == 0 && v.removeable() {
    96  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    97  						// Reset any values that are now unused, so that we decrement
    98  						// the use count of all of its arguments.
    99  						// Not quite a deadcode pass, because it does not handle cycles.
   100  						// But it should help Uses==1 rules to fire.
   101  						v.reset(OpInvalid)
   102  						deadChange = true
   103  					}
   104  					// No point rewriting values which aren't used.
   105  					continue
   106  				}
   107  
   108  				vchange := phielimValue(v)
   109  				if vchange && debug > 1 {
   110  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   111  				}
   112  
   113  				// Eliminate copy inputs.
   114  				// If any copy input becomes unused, mark it
   115  				// as invalid and discard its argument. Repeat
   116  				// recursively on the discarded argument.
   117  				// This phase helps remove phantom "dead copy" uses
   118  				// of a value so that a x.Uses==1 rule condition
   119  				// fires reliably.
   120  				for i, a := range v.Args {
   121  					if a.Op != OpCopy {
   122  						continue
   123  					}
   124  					aa := copySource(a)
   125  					v.SetArg(i, aa)
   126  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   127  					// to hold it.  The first candidate is the value that will replace a (aa),
   128  					// if it shares the same block and line and is eligible.
   129  					// The second option is v, which has a as an input.  Because aa is earlier in
   130  					// the data flow, it is the better choice.
   131  					if a.Pos.IsStmt() == src.PosIsStmt {
   132  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   133  							aa.Pos = aa.Pos.WithIsStmt()
   134  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   135  							v.Pos = v.Pos.WithIsStmt()
   136  						} else {
   137  							// Record the lost line and look for a new home after all rewrites are complete.
   138  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   139  							// line to appear in more than one block, but only one block is stored, so if both end
   140  							// up here, then one will be lost.
   141  							pendingLines.set(a.Pos, int32(a.Block.ID))
   142  						}
   143  						a.Pos = a.Pos.WithNotStmt()
   144  					}
   145  					vchange = true
   146  					for a.Uses == 0 {
   147  						b := a.Args[0]
   148  						a.reset(OpInvalid)
   149  						a = b
   150  					}
   151  				}
   152  				if vchange && debug > 1 {
   153  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   154  				}
   155  
   156  				// apply rewrite function
   157  				if rv(v) {
   158  					vchange = true
   159  					// If value changed to a poor choice for a statement boundary, move the boundary
   160  					if v.Pos.IsStmt() == src.PosIsStmt {
   161  						if k := nextGoodStatementIndex(v, j, b); k != j {
   162  							v.Pos = v.Pos.WithNotStmt()
   163  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   164  						}
   165  					}
   166  				}
   167  
   168  				change = change || vchange
   169  				if vchange && debug > 1 {
   170  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   171  				}
   172  			}
   173  		}
   174  		if !change && !deadChange {
   175  			break
   176  		}
   177  		iters++
   178  		if (iters > itersLimit || debug >= 2) && change {
   179  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   180  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   181  			// and the maximum value encountered during make.bash is 12.
   182  			// Start checking for cycles. (This is too expensive to do routinely.)
   183  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   184  			if states == nil {
   185  				states = make(map[string]bool)
   186  			}
   187  			h := f.rewriteHash()
   188  			if _, ok := states[h]; ok {
   189  				// We've found a cycle.
   190  				// To diagnose it, set debug to 2 and start again,
   191  				// so that we'll print all rules applied until we complete another cycle.
   192  				// If debug is already >= 2, we've already done that, so it's time to crash.
   193  				if debug < 2 {
   194  					debug = 2
   195  					states = make(map[string]bool)
   196  				} else {
   197  					f.Fatalf("rewrite cycle detected")
   198  				}
   199  			}
   200  			states[h] = true
   201  		}
   202  	}
   203  	// remove clobbered values
   204  	for _, b := range f.Blocks {
   205  		j := 0
   206  		for i, v := range b.Values {
   207  			vl := v.Pos
   208  			if v.Op == OpInvalid {
   209  				if v.Pos.IsStmt() == src.PosIsStmt {
   210  					pendingLines.set(vl, int32(b.ID))
   211  				}
   212  				f.freeValue(v)
   213  				continue
   214  			}
   215  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   216  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   217  					pendingLines.remove(vl)
   218  					v.Pos = v.Pos.WithIsStmt()
   219  				}
   220  			}
   221  			if i != j {
   222  				b.Values[j] = v
   223  			}
   224  			j++
   225  		}
   226  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   227  			b.Pos = b.Pos.WithIsStmt()
   228  			pendingLines.remove(b.Pos)
   229  		}
   230  		b.truncateValues(j)
   231  	}
   232  }
   233  
   234  // Common functions called from rewriting rules
   235  
   236  func is64BitFloat(t *types.Type) bool {
   237  	return t.Size() == 8 && t.IsFloat()
   238  }
   239  
   240  func is32BitFloat(t *types.Type) bool {
   241  	return t.Size() == 4 && t.IsFloat()
   242  }
   243  
   244  func is64BitInt(t *types.Type) bool {
   245  	return t.Size() == 8 && t.IsInteger()
   246  }
   247  
   248  func is32BitInt(t *types.Type) bool {
   249  	return t.Size() == 4 && t.IsInteger()
   250  }
   251  
   252  func is16BitInt(t *types.Type) bool {
   253  	return t.Size() == 2 && t.IsInteger()
   254  }
   255  
   256  func is8BitInt(t *types.Type) bool {
   257  	return t.Size() == 1 && t.IsInteger()
   258  }
   259  
   260  func isPtr(t *types.Type) bool {
   261  	return t.IsPtrShaped()
   262  }
   263  
   264  func copyCompatibleType(t1, t2 *types.Type) bool {
   265  	if t1.Size() != t2.Size() {
   266  		return false
   267  	}
   268  	if t1.IsInteger() {
   269  		return t2.IsInteger()
   270  	}
   271  	if isPtr(t1) {
   272  		return isPtr(t2)
   273  	}
   274  	return t1.Compare(t2) == types.CMPeq
   275  }
   276  
   277  // mergeSym merges two symbolic offsets. There is no real merging of
   278  // offsets, we just pick the non-nil one.
   279  func mergeSym(x, y Sym) Sym {
   280  	if x == nil {
   281  		return y
   282  	}
   283  	if y == nil {
   284  		return x
   285  	}
   286  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   287  }
   288  
   289  func canMergeSym(x, y Sym) bool {
   290  	return x == nil || y == nil
   291  }
   292  
   293  // canMergeLoadClobber reports whether the load can be merged into target without
   294  // invalidating the schedule.
   295  // It also checks that the other non-load argument x is something we
   296  // are ok with clobbering.
   297  func canMergeLoadClobber(target, load, x *Value) bool {
   298  	// The register containing x is going to get clobbered.
   299  	// Don't merge if we still need the value of x.
   300  	// We don't have liveness information here, but we can
   301  	// approximate x dying with:
   302  	//  1) target is x's only use.
   303  	//  2) target is not in a deeper loop than x.
   304  	switch {
   305  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   306  		// This is a simple detector to determine that x is probably
   307  		// not live after target. (It does not need to be perfect,
   308  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   309  		// We have:
   310  		//   x = Phi(?, target)
   311  		//   target = Op(load, x)
   312  		// Because target has only one use as a Phi argument, we can schedule it
   313  		// very late. Hopefully, later than the other use of x. (The other use died
   314  		// between x and target, or exists on another branch entirely).
   315  	case x.Uses > 1:
   316  		return false
   317  	}
   318  	loopnest := x.Block.Func.loopnest()
   319  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   320  		return false
   321  	}
   322  	return canMergeLoad(target, load)
   323  }
   324  
   325  // canMergeLoad reports whether the load can be merged into target without
   326  // invalidating the schedule.
   327  func canMergeLoad(target, load *Value) bool {
   328  	if target.Block.ID != load.Block.ID {
   329  		// If the load is in a different block do not merge it.
   330  		return false
   331  	}
   332  
   333  	// We can't merge the load into the target if the load
   334  	// has more than one use.
   335  	if load.Uses != 1 {
   336  		return false
   337  	}
   338  
   339  	mem := load.MemoryArg()
   340  
   341  	// We need the load's memory arg to still be alive at target. That
   342  	// can't be the case if one of target's args depends on a memory
   343  	// state that is a successor of load's memory arg.
   344  	//
   345  	// For example, it would be invalid to merge load into target in
   346  	// the following situation because newmem has killed oldmem
   347  	// before target is reached:
   348  	//     load = read ... oldmem
   349  	//   newmem = write ... oldmem
   350  	//     arg0 = read ... newmem
   351  	//   target = add arg0 load
   352  	//
   353  	// If the argument comes from a different block then we can exclude
   354  	// it immediately because it must dominate load (which is in the
   355  	// same block as target).
   356  	var args []*Value
   357  	for _, a := range target.Args {
   358  		if a != load && a.Block.ID == target.Block.ID {
   359  			args = append(args, a)
   360  		}
   361  	}
   362  
   363  	// memPreds contains memory states known to be predecessors of load's
   364  	// memory state. It is lazily initialized.
   365  	var memPreds map[*Value]bool
   366  	for i := 0; len(args) > 0; i++ {
   367  		const limit = 100
   368  		if i >= limit {
   369  			// Give up if we have done a lot of iterations.
   370  			return false
   371  		}
   372  		v := args[len(args)-1]
   373  		args = args[:len(args)-1]
   374  		if target.Block.ID != v.Block.ID {
   375  			// Since target and load are in the same block
   376  			// we can stop searching when we leave the block.
   377  			continue
   378  		}
   379  		if v.Op == OpPhi {
   380  			// A Phi implies we have reached the top of the block.
   381  			// The memory phi, if it exists, is always
   382  			// the first logical store in the block.
   383  			continue
   384  		}
   385  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   386  			// We could handle this situation however it is likely
   387  			// to be very rare.
   388  			return false
   389  		}
   390  		if v.Op.SymEffect()&SymAddr != 0 {
   391  			// This case prevents an operation that calculates the
   392  			// address of a local variable from being forced to schedule
   393  			// before its corresponding VarDef.
   394  			// See issue 28445.
   395  			//   v1 = LOAD ...
   396  			//   v2 = VARDEF
   397  			//   v3 = LEAQ
   398  			//   v4 = CMPQ v1 v3
   399  			// We don't want to combine the CMPQ with the load, because
   400  			// that would force the CMPQ to schedule before the VARDEF, which
   401  			// in turn requires the LEAQ to schedule before the VARDEF.
   402  			return false
   403  		}
   404  		if v.Type.IsMemory() {
   405  			if memPreds == nil {
   406  				// Initialise a map containing memory states
   407  				// known to be predecessors of load's memory
   408  				// state.
   409  				memPreds = make(map[*Value]bool)
   410  				m := mem
   411  				const limit = 50
   412  				for i := 0; i < limit; i++ {
   413  					if m.Op == OpPhi {
   414  						// The memory phi, if it exists, is always
   415  						// the first logical store in the block.
   416  						break
   417  					}
   418  					if m.Block.ID != target.Block.ID {
   419  						break
   420  					}
   421  					if !m.Type.IsMemory() {
   422  						break
   423  					}
   424  					memPreds[m] = true
   425  					if len(m.Args) == 0 {
   426  						break
   427  					}
   428  					m = m.MemoryArg()
   429  				}
   430  			}
   431  
   432  			// We can merge if v is a predecessor of mem.
   433  			//
   434  			// For example, we can merge load into target in the
   435  			// following scenario:
   436  			//      x = read ... v
   437  			//    mem = write ... v
   438  			//   load = read ... mem
   439  			// target = add x load
   440  			if memPreds[v] {
   441  				continue
   442  			}
   443  			return false
   444  		}
   445  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   446  			// If v takes mem as an input then we know mem
   447  			// is valid at this point.
   448  			continue
   449  		}
   450  		for _, a := range v.Args {
   451  			if target.Block.ID == a.Block.ID {
   452  				args = append(args, a)
   453  			}
   454  		}
   455  	}
   456  
   457  	return true
   458  }
   459  
   460  // isSameCall reports whether aux is the same as the given named symbol.
   461  func isSameCall(aux Aux, name string) bool {
   462  	fn := aux.(*AuxCall).Fn
   463  	return fn != nil && fn.String() == name
   464  }
   465  
   466  func isMalloc(aux Aux) bool {
   467  	return isNewObject(aux) || isSpecializedMalloc(aux)
   468  }
   469  
   470  func isNewObject(aux Aux) bool {
   471  	fn := aux.(*AuxCall).Fn
   472  	return fn != nil && fn.String() == "runtime.newobject"
   473  }
   474  
   475  func isSpecializedMalloc(aux Aux) bool {
   476  	fn := aux.(*AuxCall).Fn
   477  	if fn == nil {
   478  		return false
   479  	}
   480  	name := fn.String()
   481  	return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
   482  		strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
   483  		strings.HasPrefix(name, "runtime.mallocgcTinySC")
   484  }
   485  
   486  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   487  func canLoadUnaligned(c *Config) bool {
   488  	return c.ctxt.Arch.Alignment == 1
   489  }
   490  
   491  // nlzX returns the number of leading zeros.
   492  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   493  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   494  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   495  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   496  
   497  // ntzX returns the number of trailing zeros.
   498  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   499  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   500  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   501  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   502  
   503  // oneBit reports whether x contains exactly one set bit.
   504  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   505  	return x&(x-1) == 0 && x != 0
   506  }
   507  
   508  // nto returns the number of trailing ones.
   509  func nto(x int64) int64 {
   510  	return int64(ntz64(^x))
   511  }
   512  
   513  // logX returns logarithm of n base 2.
   514  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   515  func log8(n int8) int64   { return log8u(uint8(n)) }
   516  func log16(n int16) int64 { return log16u(uint16(n)) }
   517  func log32(n int32) int64 { return log32u(uint32(n)) }
   518  func log64(n int64) int64 { return log64u(uint64(n)) }
   519  
   520  // logXu returns the logarithm of n base 2.
   521  // n must be a power of 2 (isPowerOfTwo returns true)
   522  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   523  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   524  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   525  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   526  
   527  // isPowerOfTwoX functions report whether n is a power of 2.
   528  func isPowerOfTwo[T int8 | int16 | int32 | int64 | uint8 | uint16 | uint32 | uint64](n T) bool {
   529  	return n > 0 && n&(n-1) == 0
   530  }
   531  
   532  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   533  func is32Bit(n int64) bool {
   534  	return n == int64(int32(n))
   535  }
   536  
   537  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   538  func is16Bit(n int64) bool {
   539  	return n == int64(int16(n))
   540  }
   541  
   542  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   543  func is8Bit(n int64) bool {
   544  	return n == int64(int8(n))
   545  }
   546  
   547  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   548  func isU8Bit(n int64) bool {
   549  	return n == int64(uint8(n))
   550  }
   551  
   552  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   553  func is12Bit(n int64) bool {
   554  	return -(1<<11) <= n && n < (1<<11)
   555  }
   556  
   557  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   558  func isU12Bit(n int64) bool {
   559  	return 0 <= n && n < (1<<12)
   560  }
   561  
   562  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   563  func isU16Bit(n int64) bool {
   564  	return n == int64(uint16(n))
   565  }
   566  
   567  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   568  func isU32Bit(n int64) bool {
   569  	return n == int64(uint32(n))
   570  }
   571  
   572  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   573  func is20Bit(n int64) bool {
   574  	return -(1<<19) <= n && n < (1<<19)
   575  }
   576  
   577  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   578  func b2i(b bool) int64 {
   579  	if b {
   580  		return 1
   581  	}
   582  	return 0
   583  }
   584  
   585  // b2i32 translates a boolean value to 0 or 1.
   586  func b2i32(b bool) int32 {
   587  	if b {
   588  		return 1
   589  	}
   590  	return 0
   591  }
   592  
   593  func canMulStrengthReduce(config *Config, x int64) bool {
   594  	_, ok := config.mulRecipes[x]
   595  	return ok
   596  }
   597  func canMulStrengthReduce32(config *Config, x int32) bool {
   598  	_, ok := config.mulRecipes[int64(x)]
   599  	return ok
   600  }
   601  
   602  // mulStrengthReduce returns v*x evaluated at the location
   603  // (block and source position) of m.
   604  // canMulStrengthReduce must have returned true.
   605  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   606  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   607  }
   608  
   609  // mulStrengthReduce32 returns v*x evaluated at the location
   610  // (block and source position) of m.
   611  // canMulStrengthReduce32 must have returned true.
   612  // The upper 32 bits of m might be set to junk.
   613  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   614  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   615  }
   616  
   617  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   618  // A shift is bounded if it is shifting by less than the width of the shifted value.
   619  func shiftIsBounded(v *Value) bool {
   620  	return v.AuxInt != 0
   621  }
   622  
   623  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   624  // generated code as much as possible.
   625  func canonLessThan(x, y *Value) bool {
   626  	if x.Op != y.Op {
   627  		return x.Op < y.Op
   628  	}
   629  	if !x.Pos.SameFileAndLine(y.Pos) {
   630  		return x.Pos.Before(y.Pos)
   631  	}
   632  	return x.ID < y.ID
   633  }
   634  
   635  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   636  // of the mantissa. It will panic if the truncation results in lost information.
   637  func truncate64Fto32F(f float64) float32 {
   638  	if !isExactFloat32(f) {
   639  		panic("truncate64Fto32F: truncation is not exact")
   640  	}
   641  	if !math.IsNaN(f) {
   642  		return float32(f)
   643  	}
   644  	// NaN bit patterns aren't necessarily preserved across conversion
   645  	// instructions so we need to do the conversion manually.
   646  	b := math.Float64bits(f)
   647  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   648  	//          | sign                  | exponent   | mantissa       |
   649  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   650  	return math.Float32frombits(r)
   651  }
   652  
   653  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   654  func DivisionNeedsFixUp(v *Value) bool {
   655  	return v.AuxInt == 0
   656  }
   657  
   658  // auxTo32F decodes a float32 from the AuxInt value provided.
   659  func auxTo32F(i int64) float32 {
   660  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   661  }
   662  
   663  func auxIntToBool(i int64) bool {
   664  	if i == 0 {
   665  		return false
   666  	}
   667  	return true
   668  }
   669  func auxIntToInt8(i int64) int8 {
   670  	return int8(i)
   671  }
   672  func auxIntToInt16(i int64) int16 {
   673  	return int16(i)
   674  }
   675  func auxIntToInt32(i int64) int32 {
   676  	return int32(i)
   677  }
   678  func auxIntToInt64(i int64) int64 {
   679  	return i
   680  }
   681  func auxIntToUint8(i int64) uint8 {
   682  	return uint8(i)
   683  }
   684  func auxIntToUint64(i int64) uint64 {
   685  	return uint64(i)
   686  }
   687  func auxIntToFloat32(i int64) float32 {
   688  	return float32(math.Float64frombits(uint64(i)))
   689  }
   690  func auxIntToFloat64(i int64) float64 {
   691  	return math.Float64frombits(uint64(i))
   692  }
   693  func auxIntToValAndOff(i int64) ValAndOff {
   694  	return ValAndOff(i)
   695  }
   696  func auxIntToArm64BitField(i int64) arm64BitField {
   697  	return arm64BitField(i)
   698  }
   699  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   700  	var params arm64ConditionalParams
   701  	params.cond = Op(i & 0xffff)
   702  	i >>= 16
   703  	params.nzcv = uint8(i & 0x0f)
   704  	i >>= 4
   705  	params.constValue = uint8(i & 0x1f)
   706  	i >>= 5
   707  	params.ind = i == 1
   708  	return params
   709  }
   710  func auxIntToFlagConstant(x int64) flagConstant {
   711  	return flagConstant(x)
   712  }
   713  
   714  func auxIntToOp(cc int64) Op {
   715  	return Op(cc)
   716  }
   717  
   718  func boolToAuxInt(b bool) int64 {
   719  	if b {
   720  		return 1
   721  	}
   722  	return 0
   723  }
   724  func int8ToAuxInt(i int8) int64 {
   725  	return int64(i)
   726  }
   727  func int16ToAuxInt(i int16) int64 {
   728  	return int64(i)
   729  }
   730  func int32ToAuxInt(i int32) int64 {
   731  	return int64(i)
   732  }
   733  func int64ToAuxInt(i int64) int64 {
   734  	return i
   735  }
   736  func uint8ToAuxInt(i uint8) int64 {
   737  	return int64(int8(i))
   738  }
   739  func uint64ToAuxInt(i uint64) int64 {
   740  	return int64(i)
   741  }
   742  func float32ToAuxInt(f float32) int64 {
   743  	return int64(math.Float64bits(float64(f)))
   744  }
   745  func float64ToAuxInt(f float64) int64 {
   746  	return int64(math.Float64bits(f))
   747  }
   748  func valAndOffToAuxInt(v ValAndOff) int64 {
   749  	return int64(v)
   750  }
   751  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   752  	return int64(v)
   753  }
   754  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   755  	if v.cond&^0xffff != 0 {
   756  		panic("condition value exceeds 16 bits")
   757  	}
   758  
   759  	var i int64
   760  	if v.ind {
   761  		i = 1 << 25
   762  	}
   763  	i |= int64(v.constValue) << 20
   764  	i |= int64(v.nzcv) << 16
   765  	i |= int64(v.cond)
   766  	return i
   767  }
   768  
   769  func flagConstantToAuxInt(x flagConstant) int64 {
   770  	return int64(x)
   771  }
   772  
   773  func opToAuxInt(o Op) int64 {
   774  	return int64(o)
   775  }
   776  
   777  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   778  type Aux interface {
   779  	CanBeAnSSAAux()
   780  }
   781  
   782  // for now only used to mark moves that need to avoid clobbering flags
   783  type auxMark bool
   784  
   785  func (auxMark) CanBeAnSSAAux() {}
   786  
   787  var AuxMark auxMark
   788  
   789  // stringAux wraps string values for use in Aux.
   790  type stringAux string
   791  
   792  func (stringAux) CanBeAnSSAAux() {}
   793  
   794  func auxToString(i Aux) string {
   795  	return string(i.(stringAux))
   796  }
   797  func auxToSym(i Aux) Sym {
   798  	// TODO: kind of a hack - allows nil interface through
   799  	s, _ := i.(Sym)
   800  	return s
   801  }
   802  func auxToType(i Aux) *types.Type {
   803  	return i.(*types.Type)
   804  }
   805  func auxToCall(i Aux) *AuxCall {
   806  	return i.(*AuxCall)
   807  }
   808  func auxToS390xCCMask(i Aux) s390x.CCMask {
   809  	return i.(s390x.CCMask)
   810  }
   811  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   812  	return i.(s390x.RotateParams)
   813  }
   814  
   815  func StringToAux(s string) Aux {
   816  	return stringAux(s)
   817  }
   818  func symToAux(s Sym) Aux {
   819  	return s
   820  }
   821  func callToAux(s *AuxCall) Aux {
   822  	return s
   823  }
   824  func typeToAux(t *types.Type) Aux {
   825  	return t
   826  }
   827  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   828  	return c
   829  }
   830  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   831  	return r
   832  }
   833  
   834  // uaddOvf reports whether unsigned a+b would overflow.
   835  func uaddOvf(a, b int64) bool {
   836  	return uint64(a)+uint64(b) < uint64(a)
   837  }
   838  
   839  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   840  	v.Op = OpStaticLECall
   841  	auxcall := v.Aux.(*AuxCall)
   842  	auxcall.Fn = sym
   843  	// Remove first arg
   844  	v.Args[0].Uses--
   845  	copy(v.Args[0:], v.Args[1:])
   846  	v.Args[len(v.Args)-1] = nil // aid GC
   847  	v.Args = v.Args[:len(v.Args)-1]
   848  	if f := v.Block.Func; f.pass.debug > 0 {
   849  		f.Warnl(v.Pos, "de-virtualizing call")
   850  	}
   851  	return v
   852  }
   853  
   854  // isSamePtr reports whether p1 and p2 point to the same address.
   855  func isSamePtr(p1, p2 *Value) bool {
   856  	if p1 == p2 {
   857  		return true
   858  	}
   859  	if p1.Op != p2.Op {
   860  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   861  			p1 = p1.Args[0]
   862  		}
   863  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   864  			p2 = p2.Args[0]
   865  		}
   866  		if p1 == p2 {
   867  			return true
   868  		}
   869  		if p1.Op != p2.Op {
   870  			return false
   871  		}
   872  	}
   873  	switch p1.Op {
   874  	case OpOffPtr:
   875  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   876  	case OpAddr, OpLocalAddr:
   877  		return p1.Aux == p2.Aux
   878  	case OpAddPtr:
   879  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   880  	}
   881  	return false
   882  }
   883  
   884  func isStackPtr(v *Value) bool {
   885  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   886  		v = v.Args[0]
   887  	}
   888  	return v.Op == OpSP || v.Op == OpLocalAddr
   889  }
   890  
   891  // disjoint reports whether the memory region specified by [p1:p1+n1)
   892  // does not overlap with [p2:p2+n2).
   893  // A return value of false does not imply the regions overlap.
   894  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   895  	if n1 == 0 || n2 == 0 {
   896  		return true
   897  	}
   898  	if p1 == p2 {
   899  		return false
   900  	}
   901  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   902  		base, offset = ptr, 0
   903  		for base.Op == OpOffPtr {
   904  			offset += base.AuxInt
   905  			base = base.Args[0]
   906  		}
   907  		if opcodeTable[base.Op].nilCheck {
   908  			base = base.Args[0]
   909  		}
   910  		return base, offset
   911  	}
   912  
   913  	// Run types-based analysis
   914  	if disjointTypes(p1.Type, p2.Type) {
   915  		return true
   916  	}
   917  
   918  	p1, off1 := baseAndOffset(p1)
   919  	p2, off2 := baseAndOffset(p2)
   920  	if isSamePtr(p1, p2) {
   921  		return !overlap(off1, n1, off2, n2)
   922  	}
   923  	// p1 and p2 are not the same, so if they are both OpAddrs then
   924  	// they point to different variables.
   925  	// If one pointer is on the stack and the other is an argument
   926  	// then they can't overlap.
   927  	switch p1.Op {
   928  	case OpAddr, OpLocalAddr:
   929  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   930  			return true
   931  		}
   932  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   933  	case OpArg, OpArgIntReg:
   934  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   935  			return true
   936  		}
   937  	case OpSP:
   938  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   939  	}
   940  	return false
   941  }
   942  
   943  // disjointTypes reports whether a memory region pointed to by a pointer of type
   944  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   945  // based on type aliasing rules.
   946  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   947  	// Unsafe pointer can alias with anything.
   948  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   949  		return false
   950  	}
   951  
   952  	if !t1.IsPtr() || !t2.IsPtr() {
   953  		// Treat non-pointer types (such as TFUNC, TMAP, uintptr) conservatively.
   954  		return false
   955  	}
   956  
   957  	t1 = t1.Elem()
   958  	t2 = t2.Elem()
   959  
   960  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   961  	// type.HasPointers check doesn't work for them correctly.
   962  	if t1.NotInHeap() || t2.NotInHeap() {
   963  		return false
   964  	}
   965  
   966  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   967  
   968  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   969  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   970  		(isPtrShaped(t2) && !t1.HasPointers()) {
   971  		return true
   972  	}
   973  
   974  	return false
   975  }
   976  
   977  // moveSize returns the number of bytes an aligned MOV instruction moves.
   978  func moveSize(align int64, c *Config) int64 {
   979  	switch {
   980  	case align%8 == 0 && c.PtrSize == 8:
   981  		return 8
   982  	case align%4 == 0:
   983  		return 4
   984  	case align%2 == 0:
   985  		return 2
   986  	}
   987  	return 1
   988  }
   989  
   990  // mergePoint finds a block among a's blocks which dominates b and is itself
   991  // dominated by all of a's blocks. Returns nil if it can't find one.
   992  // Might return nil even if one does exist.
   993  func mergePoint(b *Block, a ...*Value) *Block {
   994  	// Walk backward from b looking for one of the a's blocks.
   995  
   996  	// Max distance
   997  	d := 100
   998  
   999  	for d > 0 {
  1000  		for _, x := range a {
  1001  			if b == x.Block {
  1002  				goto found
  1003  			}
  1004  		}
  1005  		if len(b.Preds) > 1 {
  1006  			// Don't know which way to go back. Abort.
  1007  			return nil
  1008  		}
  1009  		b = b.Preds[0].b
  1010  		d--
  1011  	}
  1012  	return nil // too far away
  1013  found:
  1014  	// At this point, r is the first value in a that we find by walking backwards.
  1015  	// if we return anything, r will be it.
  1016  	r := b
  1017  
  1018  	// Keep going, counting the other a's that we find. They must all dominate r.
  1019  	na := 0
  1020  	for d > 0 {
  1021  		for _, x := range a {
  1022  			if b == x.Block {
  1023  				na++
  1024  			}
  1025  		}
  1026  		if na == len(a) {
  1027  			// Found all of a in a backwards walk. We can return r.
  1028  			return r
  1029  		}
  1030  		if len(b.Preds) > 1 {
  1031  			return nil
  1032  		}
  1033  		b = b.Preds[0].b
  1034  		d--
  1035  
  1036  	}
  1037  	return nil // too far away
  1038  }
  1039  
  1040  // clobber invalidates values. Returns true.
  1041  // clobber is used by rewrite rules to:
  1042  //
  1043  //	A) make sure the values are really dead and never used again.
  1044  //	B) decrement use counts of the values' args.
  1045  func clobber(vv ...*Value) bool {
  1046  	for _, v := range vv {
  1047  		v.reset(OpInvalid)
  1048  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1049  	}
  1050  	return true
  1051  }
  1052  
  1053  // resetCopy resets v to be a copy of arg.
  1054  // Always returns true.
  1055  func resetCopy(v *Value, arg *Value) bool {
  1056  	v.reset(OpCopy)
  1057  	v.AddArg(arg)
  1058  	return true
  1059  }
  1060  
  1061  // clobberIfDead resets v when use count is 1. Returns true.
  1062  // clobberIfDead is used by rewrite rules to decrement
  1063  // use counts of v's args when v is dead and never used.
  1064  func clobberIfDead(v *Value) bool {
  1065  	if v.Uses == 1 {
  1066  		v.reset(OpInvalid)
  1067  	}
  1068  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1069  	return true
  1070  }
  1071  
  1072  // noteRule is an easy way to track if a rule is matched when writing
  1073  // new ones.  Make the rule of interest also conditional on
  1074  //
  1075  //	noteRule("note to self: rule of interest matched")
  1076  //
  1077  // and that message will print when the rule matches.
  1078  func noteRule(s string) bool {
  1079  	fmt.Println(s)
  1080  	return true
  1081  }
  1082  
  1083  // countRule increments Func.ruleMatches[key].
  1084  // If Func.ruleMatches is non-nil at the end
  1085  // of compilation, it will be printed to stdout.
  1086  // This is intended to make it easier to find which functions
  1087  // which contain lots of rules matches when developing new rules.
  1088  func countRule(v *Value, key string) bool {
  1089  	f := v.Block.Func
  1090  	if f.ruleMatches == nil {
  1091  		f.ruleMatches = make(map[string]int)
  1092  	}
  1093  	f.ruleMatches[key]++
  1094  	return true
  1095  }
  1096  
  1097  // warnRule generates compiler debug output with string s when
  1098  // v is not in autogenerated code, cond is true and the rule has fired.
  1099  func warnRule(cond bool, v *Value, s string) bool {
  1100  	if pos := v.Pos; pos.Line() > 1 && cond {
  1101  		v.Block.Func.Warnl(pos, s)
  1102  	}
  1103  	return true
  1104  }
  1105  
  1106  // for a pseudo-op like (LessThan x), extract x.
  1107  func flagArg(v *Value) *Value {
  1108  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1109  		return nil
  1110  	}
  1111  	return v.Args[0]
  1112  }
  1113  
  1114  // amd64CapAVXShift caps an AMD64 AVX vector shift amount c so that over-shifts
  1115  // always result in 0.
  1116  //
  1117  // These instructions have room for an 8-bit immediate and any value larger than
  1118  // the element width will result in 0 or -1 (for an arithmetic right shift).
  1119  // Thus, we simply cap this at 255.
  1120  func amd64CapAVXShift(auxInt int64) uint8 {
  1121  	u := auxIntToUint64(auxInt)
  1122  	if u > 255 {
  1123  		return 255
  1124  	}
  1125  	return uint8(u)
  1126  }
  1127  
  1128  // arm64Negate finds the complement to an ARM64 condition code,
  1129  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1130  //
  1131  // For floating point, it's more subtle because NaN is unordered. We do
  1132  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1133  func arm64Negate(op Op) Op {
  1134  	switch op {
  1135  	case OpARM64LessThan:
  1136  		return OpARM64GreaterEqual
  1137  	case OpARM64LessThanU:
  1138  		return OpARM64GreaterEqualU
  1139  	case OpARM64GreaterThan:
  1140  		return OpARM64LessEqual
  1141  	case OpARM64GreaterThanU:
  1142  		return OpARM64LessEqualU
  1143  	case OpARM64LessEqual:
  1144  		return OpARM64GreaterThan
  1145  	case OpARM64LessEqualU:
  1146  		return OpARM64GreaterThanU
  1147  	case OpARM64GreaterEqual:
  1148  		return OpARM64LessThan
  1149  	case OpARM64GreaterEqualU:
  1150  		return OpARM64LessThanU
  1151  	case OpARM64Equal:
  1152  		return OpARM64NotEqual
  1153  	case OpARM64NotEqual:
  1154  		return OpARM64Equal
  1155  	case OpARM64LessThanF:
  1156  		return OpARM64NotLessThanF
  1157  	case OpARM64NotLessThanF:
  1158  		return OpARM64LessThanF
  1159  	case OpARM64LessEqualF:
  1160  		return OpARM64NotLessEqualF
  1161  	case OpARM64NotLessEqualF:
  1162  		return OpARM64LessEqualF
  1163  	case OpARM64GreaterThanF:
  1164  		return OpARM64NotGreaterThanF
  1165  	case OpARM64NotGreaterThanF:
  1166  		return OpARM64GreaterThanF
  1167  	case OpARM64GreaterEqualF:
  1168  		return OpARM64NotGreaterEqualF
  1169  	case OpARM64NotGreaterEqualF:
  1170  		return OpARM64GreaterEqualF
  1171  	default:
  1172  		panic("unreachable")
  1173  	}
  1174  }
  1175  
  1176  // arm64Invert evaluates (InvertFlags op), which
  1177  // is the same as altering the condition codes such
  1178  // that the same result would be produced if the arguments
  1179  // to the flag-generating instruction were reversed, e.g.
  1180  // (InvertFlags (CMP x y)) -> (CMP y x)
  1181  func arm64Invert(op Op) Op {
  1182  	switch op {
  1183  	case OpARM64LessThan:
  1184  		return OpARM64GreaterThan
  1185  	case OpARM64LessThanU:
  1186  		return OpARM64GreaterThanU
  1187  	case OpARM64GreaterThan:
  1188  		return OpARM64LessThan
  1189  	case OpARM64GreaterThanU:
  1190  		return OpARM64LessThanU
  1191  	case OpARM64LessEqual:
  1192  		return OpARM64GreaterEqual
  1193  	case OpARM64LessEqualU:
  1194  		return OpARM64GreaterEqualU
  1195  	case OpARM64GreaterEqual:
  1196  		return OpARM64LessEqual
  1197  	case OpARM64GreaterEqualU:
  1198  		return OpARM64LessEqualU
  1199  	case OpARM64Equal, OpARM64NotEqual:
  1200  		return op
  1201  	case OpARM64LessThanF:
  1202  		return OpARM64GreaterThanF
  1203  	case OpARM64GreaterThanF:
  1204  		return OpARM64LessThanF
  1205  	case OpARM64LessEqualF:
  1206  		return OpARM64GreaterEqualF
  1207  	case OpARM64GreaterEqualF:
  1208  		return OpARM64LessEqualF
  1209  	case OpARM64NotLessThanF:
  1210  		return OpARM64NotGreaterThanF
  1211  	case OpARM64NotGreaterThanF:
  1212  		return OpARM64NotLessThanF
  1213  	case OpARM64NotLessEqualF:
  1214  		return OpARM64NotGreaterEqualF
  1215  	case OpARM64NotGreaterEqualF:
  1216  		return OpARM64NotLessEqualF
  1217  	default:
  1218  		panic("unreachable")
  1219  	}
  1220  }
  1221  
  1222  // evaluate an ARM64 op against a flags value
  1223  // that is potentially constant; return 1 for true,
  1224  // -1 for false, and 0 for not constant.
  1225  func ccARM64Eval(op Op, flags *Value) int {
  1226  	fop := flags.Op
  1227  	if fop == OpARM64InvertFlags {
  1228  		return -ccARM64Eval(op, flags.Args[0])
  1229  	}
  1230  	if fop != OpARM64FlagConstant {
  1231  		return 0
  1232  	}
  1233  	fc := flagConstant(flags.AuxInt)
  1234  	b2i := func(b bool) int {
  1235  		if b {
  1236  			return 1
  1237  		}
  1238  		return -1
  1239  	}
  1240  	switch op {
  1241  	case OpARM64Equal:
  1242  		return b2i(fc.eq())
  1243  	case OpARM64NotEqual:
  1244  		return b2i(fc.ne())
  1245  	case OpARM64LessThan:
  1246  		return b2i(fc.lt())
  1247  	case OpARM64LessThanU:
  1248  		return b2i(fc.ult())
  1249  	case OpARM64GreaterThan:
  1250  		return b2i(fc.gt())
  1251  	case OpARM64GreaterThanU:
  1252  		return b2i(fc.ugt())
  1253  	case OpARM64LessEqual:
  1254  		return b2i(fc.le())
  1255  	case OpARM64LessEqualU:
  1256  		return b2i(fc.ule())
  1257  	case OpARM64GreaterEqual:
  1258  		return b2i(fc.ge())
  1259  	case OpARM64GreaterEqualU:
  1260  		return b2i(fc.uge())
  1261  	}
  1262  	return 0
  1263  }
  1264  
  1265  // logRule logs the use of the rule s. This will only be enabled if
  1266  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1267  func logRule(s string) {
  1268  	if ruleFile == nil {
  1269  		// Open a log file to write log to. We open in append
  1270  		// mode because all.bash runs the compiler lots of times,
  1271  		// and we want the concatenation of all of those logs.
  1272  		// This means, of course, that users need to rm the old log
  1273  		// to get fresh data.
  1274  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1275  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1276  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1277  		if err != nil {
  1278  			panic(err)
  1279  		}
  1280  		ruleFile = w
  1281  	}
  1282  	// Ignore errors in case of multiple processes fighting over the file.
  1283  	fmt.Fprintln(ruleFile, s)
  1284  }
  1285  
  1286  var ruleFile io.Writer
  1287  
  1288  func isConstZero(v *Value) bool {
  1289  	switch v.Op {
  1290  	case OpConstNil:
  1291  		return true
  1292  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1293  		return v.AuxInt == 0
  1294  	case OpStringMake, OpIMake, OpComplexMake:
  1295  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1296  	case OpSliceMake:
  1297  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1298  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1299  		return isConstZero(v.Args[0])
  1300  	}
  1301  	return false
  1302  }
  1303  
  1304  // reciprocalExact64 reports whether 1/c is exactly representable.
  1305  func reciprocalExact64(c float64) bool {
  1306  	b := math.Float64bits(c)
  1307  	man := b & (1<<52 - 1)
  1308  	if man != 0 {
  1309  		return false // not a power of 2, denormal, or NaN
  1310  	}
  1311  	exp := b >> 52 & (1<<11 - 1)
  1312  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1313  	// changes the exponent to 0x7fe-exp.
  1314  	switch exp {
  1315  	case 0:
  1316  		return false // ±0
  1317  	case 0x7ff:
  1318  		return false // ±inf
  1319  	case 0x7fe:
  1320  		return false // exponent is not representable
  1321  	default:
  1322  		return true
  1323  	}
  1324  }
  1325  
  1326  // reciprocalExact32 reports whether 1/c is exactly representable.
  1327  func reciprocalExact32(c float32) bool {
  1328  	b := math.Float32bits(c)
  1329  	man := b & (1<<23 - 1)
  1330  	if man != 0 {
  1331  		return false // not a power of 2, denormal, or NaN
  1332  	}
  1333  	exp := b >> 23 & (1<<8 - 1)
  1334  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1335  	// changes the exponent to 0xfe-exp.
  1336  	switch exp {
  1337  	case 0:
  1338  		return false // ±0
  1339  	case 0xff:
  1340  		return false // ±inf
  1341  	case 0xfe:
  1342  		return false // exponent is not representable
  1343  	default:
  1344  		return true
  1345  	}
  1346  }
  1347  
  1348  // check if an immediate can be directly encoded into an ARM's instruction.
  1349  func isARMImmRot(v uint32) bool {
  1350  	for i := 0; i < 16; i++ {
  1351  		if v&^0xff == 0 {
  1352  			return true
  1353  		}
  1354  		v = v<<2 | v>>30
  1355  	}
  1356  
  1357  	return false
  1358  }
  1359  
  1360  // overlap reports whether the ranges given by the given offset and
  1361  // size pairs overlap.
  1362  func overlap(offset1, size1, offset2, size2 int64) bool {
  1363  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1364  		return true
  1365  	}
  1366  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1367  		return true
  1368  	}
  1369  	return false
  1370  }
  1371  
  1372  // check if value zeroes out upper 32-bit of 64-bit register.
  1373  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1374  // because it catches same amount of cases as 4.
  1375  func ZeroUpper32Bits(x *Value, depth int) bool {
  1376  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1377  		// If the value is signed, it might get re-sign-extended
  1378  		// during spill and restore. See issue 68227.
  1379  		return false
  1380  	}
  1381  	switch x.Op {
  1382  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1383  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1384  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1385  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1386  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1387  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1388  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1389  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1390  		OpAMD64SHLL, OpAMD64SHLLconst:
  1391  		return true
  1392  	case OpAMD64MOVQconst:
  1393  		return uint64(uint32(x.AuxInt)) == uint64(x.AuxInt)
  1394  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1395  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1396  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1397  		return true
  1398  	case OpArg: // note: but not ArgIntReg
  1399  		// amd64 always loads args from the stack unsigned.
  1400  		// most other architectures load them sign/zero extended based on the type.
  1401  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1402  	case OpPhi, OpSelect0, OpSelect1:
  1403  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1404  		// just limit recursion depth.
  1405  		if depth <= 0 {
  1406  			return false
  1407  		}
  1408  		for i := range x.Args {
  1409  			if !ZeroUpper32Bits(x.Args[i], depth-1) {
  1410  				return false
  1411  			}
  1412  		}
  1413  		return true
  1414  
  1415  	}
  1416  	return false
  1417  }
  1418  
  1419  // ZeroUpper48Bits is similar to ZeroUpper32Bits, but for upper 48 bits.
  1420  func ZeroUpper48Bits(x *Value, depth int) bool {
  1421  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1422  		return false
  1423  	}
  1424  	switch x.Op {
  1425  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1426  		return true
  1427  	case OpAMD64MOVQconst, OpAMD64MOVLconst:
  1428  		return uint64(uint16(x.AuxInt)) == uint64(x.AuxInt)
  1429  	case OpArg: // note: but not ArgIntReg
  1430  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1431  	case OpPhi, OpSelect0, OpSelect1:
  1432  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1433  		// just limit recursion depth.
  1434  		if depth <= 0 {
  1435  			return false
  1436  		}
  1437  		for i := range x.Args {
  1438  			if !ZeroUpper48Bits(x.Args[i], depth-1) {
  1439  				return false
  1440  			}
  1441  		}
  1442  		return true
  1443  
  1444  	}
  1445  	return false
  1446  }
  1447  
  1448  // ZeroUpper56Bits is similar to ZeroUpper32Bits, but for upper 56 bits.
  1449  func ZeroUpper56Bits(x *Value, depth int) bool {
  1450  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1451  		return false
  1452  	}
  1453  	switch x.Op {
  1454  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1455  		return true
  1456  	case OpAMD64MOVQconst, OpAMD64MOVLconst:
  1457  		return uint64(uint8(x.AuxInt)) == uint64(x.AuxInt)
  1458  	case OpArg: // note: but not ArgIntReg
  1459  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1460  	case OpPhi, OpSelect0, OpSelect1:
  1461  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1462  		// just limit recursion depth.
  1463  		if depth <= 0 {
  1464  			return false
  1465  		}
  1466  		for i := range x.Args {
  1467  			if !ZeroUpper56Bits(x.Args[i], depth-1) {
  1468  				return false
  1469  			}
  1470  		}
  1471  		return true
  1472  
  1473  	}
  1474  	return false
  1475  }
  1476  
  1477  func isInlinableMemclr(c *Config, sz int64) bool {
  1478  	if sz < 0 {
  1479  		return false
  1480  	}
  1481  	// TODO: expand this check to allow other architectures
  1482  	// see CL 454255 and issue 56997
  1483  	switch c.arch {
  1484  	case "amd64", "arm64":
  1485  		return true
  1486  	case "ppc64le", "ppc64", "loong64":
  1487  		return sz < 512
  1488  	}
  1489  	return false
  1490  }
  1491  
  1492  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1493  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1494  // safe, either because Move will do all of its loads before any of its stores, or
  1495  // because the arguments are known to be disjoint.
  1496  // This is used as a check for replacing memmove with Move ops.
  1497  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1498  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1499  	// Move ops may or may not be faster for large sizes depending on how the platform
  1500  	// lowers them, so we only perform this optimization on platforms that we know to
  1501  	// have fast Move ops.
  1502  	switch c.arch {
  1503  	case "amd64":
  1504  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1505  	case "arm64":
  1506  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1507  	case "loong64":
  1508  		return sz <= 16 || (sz <= 64 && disjoint(dst, sz, src, sz))
  1509  	case "386":
  1510  		return sz <= 8
  1511  	case "s390x", "ppc64", "ppc64le":
  1512  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1513  	case "arm", "mips", "mips64", "mipsle", "mips64le":
  1514  		return sz <= 4
  1515  	}
  1516  	return false
  1517  }
  1518  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1519  	return isInlinableMemmove(dst, src, sz, c)
  1520  }
  1521  
  1522  // logLargeCopy logs the occurrence of a large copy.
  1523  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1524  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1525  func logLargeCopy(v *Value, s int64) bool {
  1526  	if s < 128 {
  1527  		return true
  1528  	}
  1529  	if logopt.Enabled() {
  1530  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1531  	}
  1532  	return true
  1533  }
  1534  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1535  	if s < 128 {
  1536  		return
  1537  	}
  1538  	if logopt.Enabled() {
  1539  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1540  	}
  1541  }
  1542  
  1543  // hasSmallRotate reports whether the architecture has rotate instructions
  1544  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1545  func hasSmallRotate(c *Config) bool {
  1546  	switch c.arch {
  1547  	case "amd64", "386":
  1548  		return true
  1549  	default:
  1550  		return false
  1551  	}
  1552  }
  1553  
  1554  func supportsPPC64PCRel() bool {
  1555  	// PCRel is currently supported for >= power10, linux only
  1556  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1557  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1558  }
  1559  
  1560  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1561  	if sh < 0 || sh >= sz {
  1562  		panic("PPC64 shift arg sh out of range")
  1563  	}
  1564  	if mb < 0 || mb >= sz {
  1565  		panic("PPC64 shift arg mb out of range")
  1566  	}
  1567  	if me < 0 || me >= sz {
  1568  		panic("PPC64 shift arg me out of range")
  1569  	}
  1570  	return int32(sh<<16 | mb<<8 | me)
  1571  }
  1572  
  1573  func GetPPC64Shiftsh(auxint int64) int64 {
  1574  	return int64(int8(auxint >> 16))
  1575  }
  1576  
  1577  func GetPPC64Shiftmb(auxint int64) int64 {
  1578  	return int64(int8(auxint >> 8))
  1579  }
  1580  
  1581  // Test if this value can encoded as a mask for a rlwinm like
  1582  // operation.  Masks can also extend from the msb and wrap to
  1583  // the lsb too.  That is, the valid masks are 32 bit strings
  1584  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1585  //
  1586  // Note: This ignores the upper 32 bits of the input. When a
  1587  // zero extended result is desired (e.g a 64 bit result), the
  1588  // user must verify the upper 32 bits are 0 and the mask is
  1589  // contiguous (that is, non-wrapping).
  1590  func isPPC64WordRotateMask(v64 int64) bool {
  1591  	// Isolate rightmost 1 (if none 0) and add.
  1592  	v := uint32(v64)
  1593  	vp := (v & -v) + v
  1594  	// Likewise, for the wrapping case.
  1595  	vn := ^v
  1596  	vpn := (vn & -vn) + vn
  1597  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1598  }
  1599  
  1600  // Test if this mask is a valid, contiguous bitmask which can be
  1601  // represented by a RLWNM mask and also clears the upper 32 bits
  1602  // of the register.
  1603  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1604  	// Isolate rightmost 1 (if none 0) and add.
  1605  	v := uint32(v64)
  1606  	vp := (v & -v) + v
  1607  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1608  }
  1609  
  1610  // Compress mask and shift into single value of the form
  1611  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1612  // be used to regenerate the input mask.
  1613  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1614  	var mb, me, mbn, men int
  1615  
  1616  	// Determine boundaries and then decode them
  1617  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1618  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1619  	} else if nbits == 32 {
  1620  		mb = bits.LeadingZeros32(uint32(mask))
  1621  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1622  		mbn = bits.LeadingZeros32(^uint32(mask))
  1623  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1624  	} else {
  1625  		mb = bits.LeadingZeros64(uint64(mask))
  1626  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1627  		mbn = bits.LeadingZeros64(^uint64(mask))
  1628  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1629  	}
  1630  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1631  	if mb == 0 && me == int(nbits) {
  1632  		// swap the inverted values
  1633  		mb, me = men, mbn
  1634  	}
  1635  
  1636  	return int64(me) | int64(mb<<8) | rotate<<16 | nbits<<24
  1637  }
  1638  
  1639  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1640  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1641  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1642  // operations can be combined. This functions assumes the two opcodes can
  1643  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1644  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1645  	mb := s
  1646  	r := 64 - s
  1647  	// A larger mb is a smaller mask.
  1648  	if (encoded>>8)&0xFF < mb {
  1649  		encoded = (encoded &^ 0xFF00) | mb<<8
  1650  	}
  1651  	// The rotate is expected to be 0.
  1652  	if (encoded & 0xFF0000) != 0 {
  1653  		panic("non-zero rotate")
  1654  	}
  1655  	return encoded | r<<16
  1656  }
  1657  
  1658  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1659  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1660  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1661  	auxint := uint64(sauxint)
  1662  	rotate = int64((auxint >> 16) & 0xFF)
  1663  	mb = int64((auxint >> 8) & 0xFF)
  1664  	me = int64((auxint >> 0) & 0xFF)
  1665  	nbits := int64((auxint >> 24) & 0xFF)
  1666  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1667  	if mb > me {
  1668  		mask = ^mask
  1669  	}
  1670  	if nbits == 32 {
  1671  		mask = uint64(uint32(mask))
  1672  	}
  1673  
  1674  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1675  	// is inclusive.
  1676  	me = (me - 1) & (nbits - 1)
  1677  	return
  1678  }
  1679  
  1680  // This verifies that the mask is a set of
  1681  // consecutive bits including the least
  1682  // significant bit.
  1683  func isPPC64ValidShiftMask(v int64) bool {
  1684  	if (v != 0) && ((v+1)&v) == 0 {
  1685  		return true
  1686  	}
  1687  	return false
  1688  }
  1689  
  1690  func getPPC64ShiftMaskLength(v int64) int64 {
  1691  	return int64(bits.Len64(uint64(v)))
  1692  }
  1693  
  1694  // Decompose a shift right into an equivalent rotate/mask,
  1695  // and return mask & m.
  1696  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1697  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1698  	return m & int64(smask)
  1699  }
  1700  
  1701  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1702  func mergePPC64AndSrwi(m, s int64) int64 {
  1703  	mask := mergePPC64RShiftMask(m, s, 32)
  1704  	if !isPPC64WordRotateMask(mask) {
  1705  		return 0
  1706  	}
  1707  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1708  }
  1709  
  1710  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1711  func mergePPC64AndSrdi(m, s int64) int64 {
  1712  	mask := mergePPC64RShiftMask(m, s, 64)
  1713  
  1714  	// Verify the rotate and mask result only uses the lower 32 bits.
  1715  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1716  	if rv&uint64(mask) != 0 {
  1717  		return 0
  1718  	}
  1719  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1720  		return 0
  1721  	}
  1722  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1723  }
  1724  
  1725  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1726  func mergePPC64AndSldi(m, s int64) int64 {
  1727  	mask := -1 << s & m
  1728  
  1729  	// Verify the rotate and mask result only uses the lower 32 bits.
  1730  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1731  	if rv&uint64(mask) != 0 {
  1732  		return 0
  1733  	}
  1734  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1735  		return 0
  1736  	}
  1737  	return encodePPC64RotateMask(s&31, mask, 32)
  1738  }
  1739  
  1740  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1741  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1742  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1743  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1744  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1745  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1746  
  1747  	// Rewrite mask to apply after the final left shift.
  1748  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1749  
  1750  	r_1 := 32 - srw
  1751  	r_2 := GetPPC64Shiftsh(sld)
  1752  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1753  
  1754  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1755  		return 0
  1756  	}
  1757  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1758  }
  1759  
  1760  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1761  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1762  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1763  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1764  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1765  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1766  
  1767  	// Rewrite mask to apply after the final left shift.
  1768  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1769  
  1770  	r_1 := 64 - srd
  1771  	r_2 := GetPPC64Shiftsh(sld)
  1772  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1773  
  1774  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1775  		return 0
  1776  	}
  1777  	// This combine only works when selecting and shifting the lower 32 bits.
  1778  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1779  	if v1&mask_3 != 0 {
  1780  		return 0
  1781  	}
  1782  	return encodePPC64RotateMask(r_3&31, int64(mask_3), 32)
  1783  }
  1784  
  1785  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1786  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1787  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1788  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1789  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1790  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1791  
  1792  	// combine the masks, and adjust for the final left shift.
  1793  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1794  	r_2 := GetPPC64Shiftsh(int64(sld))
  1795  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1796  
  1797  	// Verify the result is still a valid bitmask of <= 32 bits.
  1798  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1799  		return 0
  1800  	}
  1801  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1802  }
  1803  
  1804  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1805  // or 0 if they cannot be merged.
  1806  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1807  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1808  	mask_out := (mask_rlw & uint64(mask))
  1809  
  1810  	// Verify the result is still a valid bitmask of <= 32 bits.
  1811  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1812  		return 0
  1813  	}
  1814  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1815  }
  1816  
  1817  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1818  // result. Return rlw if it does, 0 otherwise.
  1819  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1820  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1821  	if mb > me {
  1822  		return 0
  1823  	}
  1824  	return rlw
  1825  }
  1826  
  1827  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1828  // or 0 if they cannot be merged.
  1829  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1830  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1831  
  1832  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1833  	r_mask := bits.RotateLeft32(mask, int(r))
  1834  
  1835  	mask_out := (mask_rlw & uint64(r_mask))
  1836  
  1837  	// Verify the result is still a valid bitmask of <= 32 bits.
  1838  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1839  		return 0
  1840  	}
  1841  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1842  }
  1843  
  1844  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1845  // or 0 if they cannot be merged.
  1846  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1847  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1848  	if mb > me || mb < sldi {
  1849  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1850  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1851  		return 0
  1852  	}
  1853  	// combine the masks, and adjust for the final left shift.
  1854  	mask_3 := mask_1 << sldi
  1855  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1856  
  1857  	// Verify the result is still a valid bitmask of <= 32 bits.
  1858  	if uint64(uint32(mask_3)) != mask_3 {
  1859  		return 0
  1860  	}
  1861  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1862  }
  1863  
  1864  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1865  // or return 0 if they cannot be combined.
  1866  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1867  	if sld > srw || srw >= 32 {
  1868  		return 0
  1869  	}
  1870  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1871  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1872  	mask := (mask_r & mask_l) << uint(sld)
  1873  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1874  }
  1875  
  1876  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1877  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1878  // of op.
  1879  //
  1880  // E.g consider the case:
  1881  // a = (ADD x y)
  1882  // b = (CMPconst [0] a)
  1883  // c = (OR a z)
  1884  //
  1885  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1886  // would produce:
  1887  // a  = (ADD x y)
  1888  // a' = (ADDCC x y)
  1889  // a” = (Select0 a')
  1890  // b  = (CMPconst [0] a”)
  1891  // c  = (OR a z)
  1892  //
  1893  // which makes it impossible to rewrite the second user. Instead the result
  1894  // of this conversion is:
  1895  // a' = (ADDCC x y)
  1896  // a  = (Select0 a')
  1897  // b  = (CMPconst [0] a)
  1898  // c  = (OR a z)
  1899  //
  1900  // Which makes it trivial to rewrite b using a lowering rule.
  1901  func convertPPC64OpToOpCC(op *Value) *Value {
  1902  	ccOpMap := map[Op]Op{
  1903  		OpPPC64ADD:      OpPPC64ADDCC,
  1904  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1905  		OpPPC64AND:      OpPPC64ANDCC,
  1906  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1907  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1908  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1909  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1910  		OpPPC64NEG:      OpPPC64NEGCC,
  1911  		OpPPC64NOR:      OpPPC64NORCC,
  1912  		OpPPC64OR:       OpPPC64ORCC,
  1913  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1914  		OpPPC64SUB:      OpPPC64SUBCC,
  1915  		OpPPC64XOR:      OpPPC64XORCC,
  1916  	}
  1917  	b := op.Block
  1918  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1919  	opCC.AddArgs(op.Args...)
  1920  	op.reset(OpSelect0)
  1921  	op.AddArgs(opCC)
  1922  	return op
  1923  }
  1924  
  1925  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1926  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1927  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1928  	if r != 0 || mask&0xFFFF != mask {
  1929  		return 0
  1930  	}
  1931  	return int64(mask)
  1932  }
  1933  
  1934  // Convenience function to rotate a 32 bit constant value by another constant.
  1935  func rotateLeft32(v, rotate int64) int64 {
  1936  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1937  }
  1938  
  1939  func rotateRight64(v, rotate int64) int64 {
  1940  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1941  }
  1942  
  1943  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1944  func armBFAuxInt(lsb, width int64) arm64BitField {
  1945  	if lsb < 0 || lsb > 63 {
  1946  		panic("ARM(64) bit field lsb constant out of range")
  1947  	}
  1948  	if width < 1 || lsb+width > 64 {
  1949  		panic("ARM(64) bit field width constant out of range")
  1950  	}
  1951  	return arm64BitField(width | lsb<<8)
  1952  }
  1953  
  1954  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1955  func (bfc arm64BitField) lsb() int64 {
  1956  	return int64(uint64(bfc) >> 8)
  1957  }
  1958  
  1959  // returns the width part of the auxInt field of arm64 bitfield ops.
  1960  func (bfc arm64BitField) width() int64 {
  1961  	return int64(bfc) & 0xff
  1962  }
  1963  
  1964  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1965  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1966  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1967  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1968  }
  1969  
  1970  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1971  func arm64BFWidth(mask, rshift int64) int64 {
  1972  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1973  	if shiftedMask == 0 {
  1974  		panic("ARM64 BF mask is zero")
  1975  	}
  1976  	return nto(shiftedMask)
  1977  }
  1978  
  1979  // encodes condition code and NZCV flags into result.
  1980  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1981  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1982  		panic("Wrong conditional operation")
  1983  	}
  1984  	if nzcv&0x0f != nzcv {
  1985  		panic("Wrong value of NZCV flag")
  1986  	}
  1987  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1988  }
  1989  
  1990  // encodes condition code, NZCV flags and constant value into auxint.
  1991  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1992  	if value&0x1f != value {
  1993  		panic("Wrong value of constant")
  1994  	}
  1995  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1996  	params.constValue = value
  1997  	params.ind = true
  1998  	return params
  1999  }
  2000  
  2001  // extracts condition code from auxint.
  2002  func (condParams arm64ConditionalParams) Cond() Op {
  2003  	return condParams.cond
  2004  }
  2005  
  2006  // extracts NZCV flags from auxint.
  2007  func (condParams arm64ConditionalParams) Nzcv() int64 {
  2008  	return int64(condParams.nzcv)
  2009  }
  2010  
  2011  // extracts constant value from auxint if present.
  2012  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  2013  	return int64(condParams.constValue), condParams.ind
  2014  }
  2015  
  2016  // registerizable reports whether t is a primitive type that fits in
  2017  // a register. It assumes float64 values will always fit into registers
  2018  // even if that isn't strictly true.
  2019  func registerizable(b *Block, typ *types.Type) bool {
  2020  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  2021  		return true
  2022  	}
  2023  	if typ.IsInteger() {
  2024  		return typ.Size() <= b.Func.Config.RegSize
  2025  	}
  2026  	return false
  2027  }
  2028  
  2029  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  2030  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  2031  	f := v.Block.Func
  2032  	if !f.Config.Race {
  2033  		return false
  2034  	}
  2035  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2036  		return false
  2037  	}
  2038  	for _, b := range f.Blocks {
  2039  		for _, v := range b.Values {
  2040  			switch v.Op {
  2041  			case OpStaticCall, OpStaticLECall:
  2042  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2043  				// Allow calls to panic*
  2044  				s := v.Aux.(*AuxCall).Fn.String()
  2045  				switch s {
  2046  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2047  					"runtime.panicdivide", "runtime.panicwrap",
  2048  					"runtime.panicshift":
  2049  					continue
  2050  				}
  2051  				// If we encountered any call, we need to keep racefunc*,
  2052  				// for accurate stacktraces.
  2053  				return false
  2054  			case OpPanicBounds, OpPanicExtend:
  2055  				// Note: these are panic generators that are ok (like the static calls above).
  2056  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2057  				// We must keep the race functions if there are any other call types.
  2058  				return false
  2059  			}
  2060  		}
  2061  	}
  2062  	if isSameCall(sym, "runtime.racefuncenter") {
  2063  		// TODO REGISTER ABI this needs to be cleaned up.
  2064  		// If we're removing racefuncenter, remove its argument as well.
  2065  		if v.Args[0].Op != OpStore {
  2066  			if v.Op == OpStaticLECall {
  2067  				// there is no store, yet.
  2068  				return true
  2069  			}
  2070  			return false
  2071  		}
  2072  		mem := v.Args[0].Args[2]
  2073  		v.Args[0].reset(OpCopy)
  2074  		v.Args[0].AddArg(mem)
  2075  	}
  2076  	return true
  2077  }
  2078  
  2079  // symIsRO reports whether sym is a read-only global.
  2080  func symIsRO(sym Sym) bool {
  2081  	lsym := sym.(*obj.LSym)
  2082  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2083  }
  2084  
  2085  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2086  func symIsROZero(sym Sym) bool {
  2087  	lsym := sym.(*obj.LSym)
  2088  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2089  		return false
  2090  	}
  2091  	for _, b := range lsym.P {
  2092  		if b != 0 {
  2093  			return false
  2094  		}
  2095  	}
  2096  	return true
  2097  }
  2098  
  2099  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2100  // and can be rewritten by rewriteFixedLoad.
  2101  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2102  	lsym := sym.(*obj.LSym)
  2103  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2104  		for _, r := range lsym.R {
  2105  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2106  				return true
  2107  			}
  2108  		}
  2109  		return false
  2110  	}
  2111  
  2112  	if ti := lsym.TypeInfo(); ti != nil {
  2113  		// Type symbols do not contain information about their fields, unlike the cases above.
  2114  		// Hand-implement field accesses.
  2115  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2116  
  2117  		t := ti.Type.(*types.Type)
  2118  
  2119  		for _, f := range rttype.Type.Fields() {
  2120  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2121  				switch f.Sym.Name {
  2122  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2123  					return true
  2124  				default:
  2125  					// fmt.Println("unknown field", f.Sym.Name)
  2126  					return false
  2127  				}
  2128  			}
  2129  		}
  2130  
  2131  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2132  			return true
  2133  		}
  2134  
  2135  		return false
  2136  	}
  2137  
  2138  	return false
  2139  }
  2140  
  2141  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2142  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2143  	b := v.Block
  2144  	f := b.Func
  2145  
  2146  	lsym := sym.(*obj.LSym)
  2147  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2148  		for _, r := range lsym.R {
  2149  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2150  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2151  					// In case we're loading a type out of a dictionary, we need to record
  2152  					// that the containing function might put that type in an interface.
  2153  					// That information is currently recorded in relocations in the dictionary,
  2154  					// but if we perform this load at compile time then the dictionary
  2155  					// might be dead.
  2156  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2157  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2158  					// Same, but if we're using an itab we need to record that the
  2159  					// itab._type might be put in an interface.
  2160  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2161  				}
  2162  				v.reset(OpAddr)
  2163  				v.Aux = symToAux(r.Sym)
  2164  				v.AddArg(sb)
  2165  				return v
  2166  			}
  2167  		}
  2168  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2169  	}
  2170  
  2171  	if ti := lsym.TypeInfo(); ti != nil {
  2172  		// Type symbols do not contain information about their fields, unlike the cases above.
  2173  		// Hand-implement field accesses.
  2174  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2175  
  2176  		t := ti.Type.(*types.Type)
  2177  
  2178  		ptrSizedOpConst := OpConst64
  2179  		if f.Config.PtrSize == 4 {
  2180  			ptrSizedOpConst = OpConst32
  2181  		}
  2182  
  2183  		for _, f := range rttype.Type.Fields() {
  2184  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2185  				switch f.Sym.Name {
  2186  				case "Size_":
  2187  					v.reset(ptrSizedOpConst)
  2188  					v.AuxInt = t.Size()
  2189  					return v
  2190  				case "PtrBytes":
  2191  					v.reset(ptrSizedOpConst)
  2192  					v.AuxInt = types.PtrDataSize(t)
  2193  					return v
  2194  				case "Hash":
  2195  					v.reset(OpConst32)
  2196  					v.AuxInt = int64(int32(types.TypeHash(t)))
  2197  					return v
  2198  				case "Kind_":
  2199  					v.reset(OpConst8)
  2200  					v.AuxInt = int64(int8(reflectdata.ABIKindOfType(t)))
  2201  					return v
  2202  				case "GCData":
  2203  					gcdata, _ := reflectdata.GCSym(t, true)
  2204  					v.reset(OpAddr)
  2205  					v.Aux = symToAux(gcdata)
  2206  					v.AddArg(sb)
  2207  					return v
  2208  				default:
  2209  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2210  				}
  2211  			}
  2212  		}
  2213  
  2214  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2215  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2216  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2217  			v.reset(OpAddr)
  2218  			v.Aux = symToAux(elemSym)
  2219  			v.AddArg(sb)
  2220  			return v
  2221  		}
  2222  
  2223  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2224  	}
  2225  
  2226  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2227  	return nil
  2228  }
  2229  
  2230  // read8 reads one byte from the read-only global sym at offset off.
  2231  func read8(sym Sym, off int64) uint8 {
  2232  	lsym := sym.(*obj.LSym)
  2233  	if off >= int64(len(lsym.P)) || off < 0 {
  2234  		// Invalid index into the global sym.
  2235  		// This can happen in dead code, so we don't want to panic.
  2236  		// Just return any value, it will eventually get ignored.
  2237  		// See issue 29215.
  2238  		return 0
  2239  	}
  2240  	return lsym.P[off]
  2241  }
  2242  
  2243  // read16 reads two bytes from the read-only global sym at offset off.
  2244  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2245  	lsym := sym.(*obj.LSym)
  2246  	// lsym.P is written lazily.
  2247  	// Bytes requested after the end of lsym.P are 0.
  2248  	var src []byte
  2249  	if 0 <= off && off < int64(len(lsym.P)) {
  2250  		src = lsym.P[off:]
  2251  	}
  2252  	buf := make([]byte, 2)
  2253  	copy(buf, src)
  2254  	return byteorder.Uint16(buf)
  2255  }
  2256  
  2257  // read32 reads four bytes from the read-only global sym at offset off.
  2258  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2259  	lsym := sym.(*obj.LSym)
  2260  	var src []byte
  2261  	if 0 <= off && off < int64(len(lsym.P)) {
  2262  		src = lsym.P[off:]
  2263  	}
  2264  	buf := make([]byte, 4)
  2265  	copy(buf, src)
  2266  	return byteorder.Uint32(buf)
  2267  }
  2268  
  2269  // read64 reads eight bytes from the read-only global sym at offset off.
  2270  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2271  	lsym := sym.(*obj.LSym)
  2272  	var src []byte
  2273  	if 0 <= off && off < int64(len(lsym.P)) {
  2274  		src = lsym.P[off:]
  2275  	}
  2276  	buf := make([]byte, 8)
  2277  	copy(buf, src)
  2278  	return byteorder.Uint64(buf)
  2279  }
  2280  
  2281  // sequentialAddresses reports true if it can prove that x + n == y
  2282  func sequentialAddresses(x, y *Value, n int64) bool {
  2283  	if x == y && n == 0 {
  2284  		return true
  2285  	}
  2286  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2287  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2288  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2289  		return true
  2290  	}
  2291  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2292  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2293  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2294  		return true
  2295  	}
  2296  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2297  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2298  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2299  		return true
  2300  	}
  2301  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2302  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2303  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2304  		return true
  2305  	}
  2306  	return false
  2307  }
  2308  
  2309  // flagConstant represents the result of a compile-time comparison.
  2310  // The sense of these flags does not necessarily represent the hardware's notion
  2311  // of a flags register - these are just a compile-time construct.
  2312  // We happen to match the semantics to those of arm/arm64.
  2313  // Note that these semantics differ from x86: the carry flag has the opposite
  2314  // sense on a subtraction!
  2315  //
  2316  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2317  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2318  //	 (because it does x + ^y + C).
  2319  //
  2320  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2321  type flagConstant uint8
  2322  
  2323  // N reports whether the result of an operation is negative (high bit set).
  2324  func (fc flagConstant) N() bool {
  2325  	return fc&1 != 0
  2326  }
  2327  
  2328  // Z reports whether the result of an operation is 0.
  2329  func (fc flagConstant) Z() bool {
  2330  	return fc&2 != 0
  2331  }
  2332  
  2333  // C reports whether an unsigned add overflowed (carry), or an
  2334  // unsigned subtract did not underflow (borrow).
  2335  func (fc flagConstant) C() bool {
  2336  	return fc&4 != 0
  2337  }
  2338  
  2339  // V reports whether a signed operation overflowed or underflowed.
  2340  func (fc flagConstant) V() bool {
  2341  	return fc&8 != 0
  2342  }
  2343  
  2344  func (fc flagConstant) eq() bool {
  2345  	return fc.Z()
  2346  }
  2347  func (fc flagConstant) ne() bool {
  2348  	return !fc.Z()
  2349  }
  2350  func (fc flagConstant) lt() bool {
  2351  	return fc.N() != fc.V()
  2352  }
  2353  func (fc flagConstant) le() bool {
  2354  	return fc.Z() || fc.lt()
  2355  }
  2356  func (fc flagConstant) gt() bool {
  2357  	return !fc.Z() && fc.ge()
  2358  }
  2359  func (fc flagConstant) ge() bool {
  2360  	return fc.N() == fc.V()
  2361  }
  2362  func (fc flagConstant) ult() bool {
  2363  	return !fc.C()
  2364  }
  2365  func (fc flagConstant) ule() bool {
  2366  	return fc.Z() || fc.ult()
  2367  }
  2368  func (fc flagConstant) ugt() bool {
  2369  	return !fc.Z() && fc.uge()
  2370  }
  2371  func (fc flagConstant) uge() bool {
  2372  	return fc.C()
  2373  }
  2374  
  2375  func (fc flagConstant) ltNoov() bool {
  2376  	return fc.lt() && !fc.V()
  2377  }
  2378  func (fc flagConstant) leNoov() bool {
  2379  	return fc.le() && !fc.V()
  2380  }
  2381  func (fc flagConstant) gtNoov() bool {
  2382  	return fc.gt() && !fc.V()
  2383  }
  2384  func (fc flagConstant) geNoov() bool {
  2385  	return fc.ge() && !fc.V()
  2386  }
  2387  
  2388  func (fc flagConstant) String() string {
  2389  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2390  }
  2391  
  2392  type flagConstantBuilder struct {
  2393  	N bool
  2394  	Z bool
  2395  	C bool
  2396  	V bool
  2397  }
  2398  
  2399  func (fcs flagConstantBuilder) encode() flagConstant {
  2400  	var fc flagConstant
  2401  	if fcs.N {
  2402  		fc |= 1
  2403  	}
  2404  	if fcs.Z {
  2405  		fc |= 2
  2406  	}
  2407  	if fcs.C {
  2408  		fc |= 4
  2409  	}
  2410  	if fcs.V {
  2411  		fc |= 8
  2412  	}
  2413  	return fc
  2414  }
  2415  
  2416  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2417  //  - the results of the C flag are different
  2418  //  - the results of the V flag when y==minint are different
  2419  
  2420  // addFlags64 returns the flags that would be set from computing x+y.
  2421  func addFlags64(x, y int64) flagConstant {
  2422  	var fcb flagConstantBuilder
  2423  	fcb.Z = x+y == 0
  2424  	fcb.N = x+y < 0
  2425  	fcb.C = uint64(x+y) < uint64(x)
  2426  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2427  	return fcb.encode()
  2428  }
  2429  
  2430  // subFlags64 returns the flags that would be set from computing x-y.
  2431  func subFlags64(x, y int64) flagConstant {
  2432  	var fcb flagConstantBuilder
  2433  	fcb.Z = x-y == 0
  2434  	fcb.N = x-y < 0
  2435  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2436  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2437  	return fcb.encode()
  2438  }
  2439  
  2440  // addFlags32 returns the flags that would be set from computing x+y.
  2441  func addFlags32(x, y int32) flagConstant {
  2442  	var fcb flagConstantBuilder
  2443  	fcb.Z = x+y == 0
  2444  	fcb.N = x+y < 0
  2445  	fcb.C = uint32(x+y) < uint32(x)
  2446  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2447  	return fcb.encode()
  2448  }
  2449  
  2450  // subFlags32 returns the flags that would be set from computing x-y.
  2451  func subFlags32(x, y int32) flagConstant {
  2452  	var fcb flagConstantBuilder
  2453  	fcb.Z = x-y == 0
  2454  	fcb.N = x-y < 0
  2455  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2456  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2457  	return fcb.encode()
  2458  }
  2459  
  2460  // logicFlags64 returns flags set to the sign/zeroness of x.
  2461  // C and V are set to false.
  2462  func logicFlags64(x int64) flagConstant {
  2463  	var fcb flagConstantBuilder
  2464  	fcb.Z = x == 0
  2465  	fcb.N = x < 0
  2466  	return fcb.encode()
  2467  }
  2468  
  2469  // logicFlags32 returns flags set to the sign/zeroness of x.
  2470  // C and V are set to false.
  2471  func logicFlags32(x int32) flagConstant {
  2472  	var fcb flagConstantBuilder
  2473  	fcb.Z = x == 0
  2474  	fcb.N = x < 0
  2475  	return fcb.encode()
  2476  }
  2477  
  2478  func makeJumpTableSym(b *Block) *obj.LSym {
  2479  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2480  	// The jump table symbol is accessed only from the function symbol.
  2481  	s.Set(obj.AttrStatic, true)
  2482  	return s
  2483  }
  2484  
  2485  // canRotate reports whether the architecture supports
  2486  // rotates of integer registers with the given number of bits.
  2487  func canRotate(c *Config, bits int64) bool {
  2488  	if bits > c.PtrSize*8 {
  2489  		// Don't rewrite to rotates bigger than the machine word.
  2490  		return false
  2491  	}
  2492  	switch c.arch {
  2493  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2494  		return true
  2495  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2496  		return bits >= 32
  2497  	default:
  2498  		return false
  2499  	}
  2500  }
  2501  
  2502  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2503  func isARM64bitcon(x uint64) bool {
  2504  	if x == 1<<64-1 || x == 0 {
  2505  		return false
  2506  	}
  2507  	// determine the period and sign-extend a unit to 64 bits
  2508  	switch {
  2509  	case x != x>>32|x<<32:
  2510  		// period is 64
  2511  		// nothing to do
  2512  	case x != x>>16|x<<48:
  2513  		// period is 32
  2514  		x = uint64(int64(int32(x)))
  2515  	case x != x>>8|x<<56:
  2516  		// period is 16
  2517  		x = uint64(int64(int16(x)))
  2518  	case x != x>>4|x<<60:
  2519  		// period is 8
  2520  		x = uint64(int64(int8(x)))
  2521  	default:
  2522  		// period is 4 or 2, always true
  2523  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2524  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2525  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2526  		// 0101, 1010             -- 01   rotate, repeat
  2527  		return true
  2528  	}
  2529  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2530  }
  2531  
  2532  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2533  func sequenceOfOnes(x uint64) bool {
  2534  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2535  	y += x
  2536  	return (y-1)&y == 0
  2537  }
  2538  
  2539  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2540  func isARM64addcon(v int64) bool {
  2541  	/* uimm12 or uimm24? */
  2542  	if v < 0 {
  2543  		return false
  2544  	}
  2545  	if (v & 0xFFF) == 0 {
  2546  		v >>= 12
  2547  	}
  2548  	return v <= 0xFFF
  2549  }
  2550  
  2551  // setPos sets the position of v to pos, then returns true.
  2552  // Useful for setting the result of a rewrite's position to
  2553  // something other than the default.
  2554  func setPos(v *Value, pos src.XPos) bool {
  2555  	v.Pos = pos
  2556  	return true
  2557  }
  2558  
  2559  // isNonNegative reports whether v is known to be greater or equal to zero.
  2560  // Note that this is pretty simplistic. The prove pass generates more detailed
  2561  // nonnegative information about values.
  2562  func isNonNegative(v *Value) bool {
  2563  	if !v.Type.IsInteger() {
  2564  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2565  	}
  2566  	// TODO: return true if !v.Type.IsSigned()
  2567  	// SSA isn't type-safe enough to do that now (issue 37753).
  2568  	// The checks below depend only on the pattern of bits.
  2569  
  2570  	switch v.Op {
  2571  	case OpConst64:
  2572  		return v.AuxInt >= 0
  2573  
  2574  	case OpConst32:
  2575  		return int32(v.AuxInt) >= 0
  2576  
  2577  	case OpConst16:
  2578  		return int16(v.AuxInt) >= 0
  2579  
  2580  	case OpConst8:
  2581  		return int8(v.AuxInt) >= 0
  2582  
  2583  	case OpStringLen, OpSliceLen, OpSliceCap,
  2584  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2585  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2586  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2587  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2588  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2589  		return true
  2590  
  2591  	case OpRsh64Ux64, OpRsh32Ux64:
  2592  		by := v.Args[1]
  2593  		return by.Op == OpConst64 && by.AuxInt > 0
  2594  
  2595  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2596  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2597  		return isNonNegative(v.Args[0])
  2598  
  2599  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2600  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2601  
  2602  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2603  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2604  		OpOr64, OpOr32, OpOr16, OpOr8,
  2605  		OpXor64, OpXor32, OpXor16, OpXor8:
  2606  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2607  
  2608  		// We could handle OpPhi here, but the improvements from doing
  2609  		// so are very minor, and it is neither simple nor cheap.
  2610  	}
  2611  	return false
  2612  }
  2613  
  2614  func rewriteStructLoad(v *Value) *Value {
  2615  	b := v.Block
  2616  	ptr := v.Args[0]
  2617  	mem := v.Args[1]
  2618  
  2619  	t := v.Type
  2620  	args := make([]*Value, t.NumFields())
  2621  	for i := range args {
  2622  		ft := t.FieldType(i)
  2623  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2624  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2625  	}
  2626  
  2627  	v.reset(OpStructMake)
  2628  	v.AddArgs(args...)
  2629  	return v
  2630  }
  2631  
  2632  func rewriteStructStore(v *Value) *Value {
  2633  	b := v.Block
  2634  	dst := v.Args[0]
  2635  	x := v.Args[1]
  2636  	if x.Op != OpStructMake {
  2637  		base.Fatalf("invalid struct store: %v", x)
  2638  	}
  2639  	mem := v.Args[2]
  2640  
  2641  	t := x.Type
  2642  	for i, arg := range x.Args {
  2643  		ft := t.FieldType(i)
  2644  
  2645  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2646  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2647  	}
  2648  
  2649  	return mem
  2650  }
  2651  
  2652  // isDirectAndComparableType reports whether v represents a type
  2653  // (a *runtime._type) whose value is stored directly in an
  2654  // interface (i.e., is pointer or pointer-like) and is comparable.
  2655  func isDirectAndComparableType(v *Value) bool {
  2656  	return isDirectAndComparableType1(v)
  2657  }
  2658  
  2659  // v is a type
  2660  func isDirectAndComparableType1(v *Value) bool {
  2661  	switch v.Op {
  2662  	case OpITab:
  2663  		return isDirectAndComparableType2(v.Args[0])
  2664  	case OpAddr:
  2665  		lsym := v.Aux.(*obj.LSym)
  2666  		if ti := lsym.TypeInfo(); ti != nil {
  2667  			t := ti.Type.(*types.Type)
  2668  			return types.IsDirectIface(t) && types.IsComparable(t)
  2669  		}
  2670  	}
  2671  	return false
  2672  }
  2673  
  2674  // v is an empty interface
  2675  func isDirectAndComparableType2(v *Value) bool {
  2676  	switch v.Op {
  2677  	case OpIMake:
  2678  		return isDirectAndComparableType1(v.Args[0])
  2679  	}
  2680  	return false
  2681  }
  2682  
  2683  // isDirectAndComparableIface reports whether v represents an itab
  2684  // (a *runtime._itab) for a type whose value is stored directly
  2685  // in an interface (i.e., is pointer or pointer-like) and is comparable.
  2686  func isDirectAndComparableIface(v *Value) bool {
  2687  	return isDirectAndComparableIface1(v, 9)
  2688  }
  2689  
  2690  // v is an itab
  2691  func isDirectAndComparableIface1(v *Value, depth int) bool {
  2692  	if depth == 0 {
  2693  		return false
  2694  	}
  2695  	switch v.Op {
  2696  	case OpITab:
  2697  		return isDirectAndComparableIface2(v.Args[0], depth-1)
  2698  	case OpAddr:
  2699  		lsym := v.Aux.(*obj.LSym)
  2700  		if ii := lsym.ItabInfo(); ii != nil {
  2701  			t := ii.Type.(*types.Type)
  2702  			return types.IsDirectIface(t) && types.IsComparable(t)
  2703  		}
  2704  	case OpConstNil:
  2705  		// We can treat this as direct, because if the itab is
  2706  		// nil, the data field must be nil also.
  2707  		return true
  2708  	}
  2709  	return false
  2710  }
  2711  
  2712  // v is an interface
  2713  func isDirectAndComparableIface2(v *Value, depth int) bool {
  2714  	if depth == 0 {
  2715  		return false
  2716  	}
  2717  	switch v.Op {
  2718  	case OpIMake:
  2719  		return isDirectAndComparableIface1(v.Args[0], depth-1)
  2720  	case OpPhi:
  2721  		for _, a := range v.Args {
  2722  			if !isDirectAndComparableIface2(a, depth-1) {
  2723  				return false
  2724  			}
  2725  		}
  2726  		return true
  2727  	}
  2728  	return false
  2729  }
  2730  
  2731  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2732  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2733  	r.sum, r.carry = int64(s), int64(c)
  2734  	return
  2735  }
  2736  
  2737  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2738  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2739  	r.hi, r.lo = int64(hi), int64(lo)
  2740  	return
  2741  }
  2742  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2743  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2744  	r.hi, r.lo = int32(hi), int32(lo)
  2745  	return
  2746  }
  2747  
  2748  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2749  func flagify(v *Value) bool {
  2750  	var flagVersion Op
  2751  	switch v.Op {
  2752  	case OpAMD64ADDQconst:
  2753  		flagVersion = OpAMD64ADDQconstflags
  2754  	case OpAMD64ADDLconst:
  2755  		flagVersion = OpAMD64ADDLconstflags
  2756  	default:
  2757  		base.Fatalf("can't flagify op %s", v.Op)
  2758  	}
  2759  	inner := v.copyInto(v.Block)
  2760  	inner.Op = flagVersion
  2761  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2762  	v.reset(OpSelect0)
  2763  	v.AddArg(inner)
  2764  	return true
  2765  }
  2766  
  2767  // PanicBoundsC contains a constant for a bounds failure.
  2768  type PanicBoundsC struct {
  2769  	C int64
  2770  }
  2771  
  2772  // PanicBoundsCC contains 2 constants for a bounds failure.
  2773  type PanicBoundsCC struct {
  2774  	Cx int64
  2775  	Cy int64
  2776  }
  2777  
  2778  func (p PanicBoundsC) CanBeAnSSAAux() {
  2779  }
  2780  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2781  }
  2782  
  2783  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2784  	return i.(PanicBoundsC)
  2785  }
  2786  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2787  	return i.(PanicBoundsCC)
  2788  }
  2789  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2790  	return p
  2791  }
  2792  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2793  	return p
  2794  }
  2795  
  2796  func isDictArgSym(sym Sym) bool {
  2797  	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
  2798  }
  2799  
  2800  // When v is (IMake typ (StructMake ...)), convert to
  2801  // (IMake typ arg) where arg is the pointer-y argument to
  2802  // the StructMake (there must be exactly one).
  2803  func imakeOfStructMake(v *Value) *Value {
  2804  	var arg *Value
  2805  	for _, a := range v.Args[1].Args {
  2806  		if a.Type.Size() > 0 {
  2807  			arg = a
  2808  			break
  2809  		}
  2810  	}
  2811  	return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
  2812  }
  2813  
  2814  // bool2int converts bool to int: true to 1, false to 0
  2815  func bool2int(x bool) int {
  2816  	var b int
  2817  	if x {
  2818  		b = 1
  2819  	}
  2820  	return b
  2821  }
  2822  
  2823  // rewriteCondSelectIntoMath reports whether x OP (y * constant) should be used instead of a CondSelect.
  2824  // x arbitrary, y in [0,1]
  2825  func rewriteCondSelectIntoMath(config *Config, op Op, constant int64) bool {
  2826  	switch config.arch {
  2827  	case "amd64":
  2828  		if constant == 1 {
  2829  			return true
  2830  		}
  2831  		switch op {
  2832  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2833  			switch constant {
  2834  			case 2, 4, 8:
  2835  				// Implemented with LEA a + b * displacement form
  2836  				return true
  2837  			}
  2838  		}
  2839  	case "arm64":
  2840  		switch op {
  2841  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2842  			if constant == 1 {
  2843  				return false // better done as CSINC
  2844  			}
  2845  			fallthrough
  2846  		case OpSub64, OpSub32, OpSub16, OpSub8,
  2847  			OpAnd64, OpAnd32, OpAnd16, OpAnd8,
  2848  			OpOr64, OpOr32, OpOr16, OpOr8,
  2849  			OpXor64, OpXor32, OpXor16, OpXor8:
  2850  			// Implemented using an inline LSL
  2851  			return isPowerOfTwo(uint64(constant))
  2852  		default:
  2853  			if constant == 1 {
  2854  				return true
  2855  			}
  2856  		}
  2857  	default:
  2858  		// TODO: fine tune for other architectures.
  2859  		return constant == 1
  2860  	}
  2861  	return false
  2862  }
  2863  
  2864  func addToSub(op Op) Op {
  2865  	switch op {
  2866  	case OpAdd64:
  2867  		return OpSub64
  2868  	case OpAdd32:
  2869  		return OpSub32
  2870  	case OpAdd16:
  2871  		return OpSub16
  2872  	case OpAdd8:
  2873  		return OpSub8
  2874  	default:
  2875  		panic(fmt.Sprintf("unexpected op %v", op))
  2876  	}
  2877  }
  2878  
  2879  func modularMultiplicativeInverse(x uint64) (y uint64) {
  2880  	if x%2 != 1 {
  2881  		panic("even numbers in a power-of-two modulus do not have a multiplicative inverse")
  2882  	}
  2883  	// we start with 3 bits of precision because each odd number is its own multiplicative inverse mod 8
  2884  	y = x // 3 bits
  2885  
  2886  	// now use the Newton-Raphson method to double the number of correct bits in each iteration.
  2887  	y *= 2 - x*y // 6 bits
  2888  	y *= 2 - x*y // 12 bits
  2889  	y *= 2 - x*y // 24 bits
  2890  	y *= 2 - x*y // 48 bits
  2891  	y *= 2 - x*y // 96 bits; good enough
  2892  	return
  2893  }
  2894  

View as plain text