Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/reflectdata"
    12  	"cmd/compile/internal/rttype"
    13  	"cmd/compile/internal/typecheck"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/s390x"
    17  	"cmd/internal/objabi"
    18  	"cmd/internal/src"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"internal/buildcfg"
    22  	"io"
    23  	"math"
    24  	"math/bits"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  )
    29  
    30  type deadValueChoice bool
    31  
    32  const (
    33  	leaveDeadValues  deadValueChoice = false
    34  	removeDeadValues                 = true
    35  
    36  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    37  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    38  )
    39  
    40  // deadcode indicates whether rewrite should try to remove any values that become dead.
    41  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    42  	// repeat rewrites until we find no more rewrites
    43  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    44  	pendingLines.clear()
    45  	debug := f.pass.debug
    46  	if debug > 1 {
    47  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    48  	}
    49  	// if the number of rewrite iterations reaches itersLimit we will
    50  	// at that point turn on cycle detection. Instead of a fixed limit,
    51  	// size the limit according to func size to allow for cases such
    52  	// as the one in issue #66773.
    53  	itersLimit := f.NumBlocks()
    54  	if itersLimit < 20 {
    55  		itersLimit = 20
    56  	}
    57  	var iters int
    58  	var states map[string]bool
    59  	for {
    60  		change := false
    61  		deadChange := false
    62  		for _, b := range f.Blocks {
    63  			var b0 *Block
    64  			if debug > 1 {
    65  				b0 = new(Block)
    66  				*b0 = *b
    67  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    68  			}
    69  			for i, c := range b.ControlValues() {
    70  				for c.Op == OpCopy {
    71  					c = c.Args[0]
    72  					b.ReplaceControl(i, c)
    73  				}
    74  			}
    75  			if rb(b) {
    76  				change = true
    77  				if debug > 1 {
    78  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    79  				}
    80  			}
    81  			for j, v := range b.Values {
    82  				var v0 *Value
    83  				if debug > 1 {
    84  					v0 = new(Value)
    85  					*v0 = *v
    86  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    87  				}
    88  				if v.Uses == 0 && v.removeable() {
    89  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    90  						// Reset any values that are now unused, so that we decrement
    91  						// the use count of all of its arguments.
    92  						// Not quite a deadcode pass, because it does not handle cycles.
    93  						// But it should help Uses==1 rules to fire.
    94  						v.reset(OpInvalid)
    95  						deadChange = true
    96  					}
    97  					// No point rewriting values which aren't used.
    98  					continue
    99  				}
   100  
   101  				vchange := phielimValue(v)
   102  				if vchange && debug > 1 {
   103  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   104  				}
   105  
   106  				// Eliminate copy inputs.
   107  				// If any copy input becomes unused, mark it
   108  				// as invalid and discard its argument. Repeat
   109  				// recursively on the discarded argument.
   110  				// This phase helps remove phantom "dead copy" uses
   111  				// of a value so that a x.Uses==1 rule condition
   112  				// fires reliably.
   113  				for i, a := range v.Args {
   114  					if a.Op != OpCopy {
   115  						continue
   116  					}
   117  					aa := copySource(a)
   118  					v.SetArg(i, aa)
   119  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   120  					// to hold it.  The first candidate is the value that will replace a (aa),
   121  					// if it shares the same block and line and is eligible.
   122  					// The second option is v, which has a as an input.  Because aa is earlier in
   123  					// the data flow, it is the better choice.
   124  					if a.Pos.IsStmt() == src.PosIsStmt {
   125  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   126  							aa.Pos = aa.Pos.WithIsStmt()
   127  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   128  							v.Pos = v.Pos.WithIsStmt()
   129  						} else {
   130  							// Record the lost line and look for a new home after all rewrites are complete.
   131  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   132  							// line to appear in more than one block, but only one block is stored, so if both end
   133  							// up here, then one will be lost.
   134  							pendingLines.set(a.Pos, int32(a.Block.ID))
   135  						}
   136  						a.Pos = a.Pos.WithNotStmt()
   137  					}
   138  					vchange = true
   139  					for a.Uses == 0 {
   140  						b := a.Args[0]
   141  						a.reset(OpInvalid)
   142  						a = b
   143  					}
   144  				}
   145  				if vchange && debug > 1 {
   146  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   147  				}
   148  
   149  				// apply rewrite function
   150  				if rv(v) {
   151  					vchange = true
   152  					// If value changed to a poor choice for a statement boundary, move the boundary
   153  					if v.Pos.IsStmt() == src.PosIsStmt {
   154  						if k := nextGoodStatementIndex(v, j, b); k != j {
   155  							v.Pos = v.Pos.WithNotStmt()
   156  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   157  						}
   158  					}
   159  				}
   160  
   161  				change = change || vchange
   162  				if vchange && debug > 1 {
   163  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   164  				}
   165  			}
   166  		}
   167  		if !change && !deadChange {
   168  			break
   169  		}
   170  		iters++
   171  		if (iters > itersLimit || debug >= 2) && change {
   172  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   173  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   174  			// and the maximum value encountered during make.bash is 12.
   175  			// Start checking for cycles. (This is too expensive to do routinely.)
   176  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   177  			if states == nil {
   178  				states = make(map[string]bool)
   179  			}
   180  			h := f.rewriteHash()
   181  			if _, ok := states[h]; ok {
   182  				// We've found a cycle.
   183  				// To diagnose it, set debug to 2 and start again,
   184  				// so that we'll print all rules applied until we complete another cycle.
   185  				// If debug is already >= 2, we've already done that, so it's time to crash.
   186  				if debug < 2 {
   187  					debug = 2
   188  					states = make(map[string]bool)
   189  				} else {
   190  					f.Fatalf("rewrite cycle detected")
   191  				}
   192  			}
   193  			states[h] = true
   194  		}
   195  	}
   196  	// remove clobbered values
   197  	for _, b := range f.Blocks {
   198  		j := 0
   199  		for i, v := range b.Values {
   200  			vl := v.Pos
   201  			if v.Op == OpInvalid {
   202  				if v.Pos.IsStmt() == src.PosIsStmt {
   203  					pendingLines.set(vl, int32(b.ID))
   204  				}
   205  				f.freeValue(v)
   206  				continue
   207  			}
   208  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   209  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   210  					pendingLines.remove(vl)
   211  					v.Pos = v.Pos.WithIsStmt()
   212  				}
   213  			}
   214  			if i != j {
   215  				b.Values[j] = v
   216  			}
   217  			j++
   218  		}
   219  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   220  			b.Pos = b.Pos.WithIsStmt()
   221  			pendingLines.remove(b.Pos)
   222  		}
   223  		b.truncateValues(j)
   224  	}
   225  }
   226  
   227  // Common functions called from rewriting rules
   228  
   229  func is64BitFloat(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsFloat()
   231  }
   232  
   233  func is32BitFloat(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsFloat()
   235  }
   236  
   237  func is64BitInt(t *types.Type) bool {
   238  	return t.Size() == 8 && t.IsInteger()
   239  }
   240  
   241  func is32BitInt(t *types.Type) bool {
   242  	return t.Size() == 4 && t.IsInteger()
   243  }
   244  
   245  func is16BitInt(t *types.Type) bool {
   246  	return t.Size() == 2 && t.IsInteger()
   247  }
   248  
   249  func is8BitInt(t *types.Type) bool {
   250  	return t.Size() == 1 && t.IsInteger()
   251  }
   252  
   253  func isPtr(t *types.Type) bool {
   254  	return t.IsPtrShaped()
   255  }
   256  
   257  func copyCompatibleType(t1, t2 *types.Type) bool {
   258  	if t1.Size() != t2.Size() {
   259  		return false
   260  	}
   261  	if t1.IsInteger() {
   262  		return t2.IsInteger()
   263  	}
   264  	if isPtr(t1) {
   265  		return isPtr(t2)
   266  	}
   267  	return t1.Compare(t2) == types.CMPeq
   268  }
   269  
   270  // mergeSym merges two symbolic offsets. There is no real merging of
   271  // offsets, we just pick the non-nil one.
   272  func mergeSym(x, y Sym) Sym {
   273  	if x == nil {
   274  		return y
   275  	}
   276  	if y == nil {
   277  		return x
   278  	}
   279  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   280  }
   281  
   282  func canMergeSym(x, y Sym) bool {
   283  	return x == nil || y == nil
   284  }
   285  
   286  // canMergeLoadClobber reports whether the load can be merged into target without
   287  // invalidating the schedule.
   288  // It also checks that the other non-load argument x is something we
   289  // are ok with clobbering.
   290  func canMergeLoadClobber(target, load, x *Value) bool {
   291  	// The register containing x is going to get clobbered.
   292  	// Don't merge if we still need the value of x.
   293  	// We don't have liveness information here, but we can
   294  	// approximate x dying with:
   295  	//  1) target is x's only use.
   296  	//  2) target is not in a deeper loop than x.
   297  	switch {
   298  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   299  		// This is a simple detector to determine that x is probably
   300  		// not live after target. (It does not need to be perfect,
   301  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   302  		// We have:
   303  		//   x = Phi(?, target)
   304  		//   target = Op(load, x)
   305  		// Because target has only one use as a Phi argument, we can schedule it
   306  		// very late. Hopefully, later than the other use of x. (The other use died
   307  		// between x and target, or exists on another branch entirely).
   308  	case x.Uses > 1:
   309  		return false
   310  	}
   311  	loopnest := x.Block.Func.loopnest()
   312  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   313  		return false
   314  	}
   315  	return canMergeLoad(target, load)
   316  }
   317  
   318  // canMergeLoad reports whether the load can be merged into target without
   319  // invalidating the schedule.
   320  func canMergeLoad(target, load *Value) bool {
   321  	if target.Block.ID != load.Block.ID {
   322  		// If the load is in a different block do not merge it.
   323  		return false
   324  	}
   325  
   326  	// We can't merge the load into the target if the load
   327  	// has more than one use.
   328  	if load.Uses != 1 {
   329  		return false
   330  	}
   331  
   332  	mem := load.MemoryArg()
   333  
   334  	// We need the load's memory arg to still be alive at target. That
   335  	// can't be the case if one of target's args depends on a memory
   336  	// state that is a successor of load's memory arg.
   337  	//
   338  	// For example, it would be invalid to merge load into target in
   339  	// the following situation because newmem has killed oldmem
   340  	// before target is reached:
   341  	//     load = read ... oldmem
   342  	//   newmem = write ... oldmem
   343  	//     arg0 = read ... newmem
   344  	//   target = add arg0 load
   345  	//
   346  	// If the argument comes from a different block then we can exclude
   347  	// it immediately because it must dominate load (which is in the
   348  	// same block as target).
   349  	var args []*Value
   350  	for _, a := range target.Args {
   351  		if a != load && a.Block.ID == target.Block.ID {
   352  			args = append(args, a)
   353  		}
   354  	}
   355  
   356  	// memPreds contains memory states known to be predecessors of load's
   357  	// memory state. It is lazily initialized.
   358  	var memPreds map[*Value]bool
   359  	for i := 0; len(args) > 0; i++ {
   360  		const limit = 100
   361  		if i >= limit {
   362  			// Give up if we have done a lot of iterations.
   363  			return false
   364  		}
   365  		v := args[len(args)-1]
   366  		args = args[:len(args)-1]
   367  		if target.Block.ID != v.Block.ID {
   368  			// Since target and load are in the same block
   369  			// we can stop searching when we leave the block.
   370  			continue
   371  		}
   372  		if v.Op == OpPhi {
   373  			// A Phi implies we have reached the top of the block.
   374  			// The memory phi, if it exists, is always
   375  			// the first logical store in the block.
   376  			continue
   377  		}
   378  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   379  			// We could handle this situation however it is likely
   380  			// to be very rare.
   381  			return false
   382  		}
   383  		if v.Op.SymEffect()&SymAddr != 0 {
   384  			// This case prevents an operation that calculates the
   385  			// address of a local variable from being forced to schedule
   386  			// before its corresponding VarDef.
   387  			// See issue 28445.
   388  			//   v1 = LOAD ...
   389  			//   v2 = VARDEF
   390  			//   v3 = LEAQ
   391  			//   v4 = CMPQ v1 v3
   392  			// We don't want to combine the CMPQ with the load, because
   393  			// that would force the CMPQ to schedule before the VARDEF, which
   394  			// in turn requires the LEAQ to schedule before the VARDEF.
   395  			return false
   396  		}
   397  		if v.Type.IsMemory() {
   398  			if memPreds == nil {
   399  				// Initialise a map containing memory states
   400  				// known to be predecessors of load's memory
   401  				// state.
   402  				memPreds = make(map[*Value]bool)
   403  				m := mem
   404  				const limit = 50
   405  				for i := 0; i < limit; i++ {
   406  					if m.Op == OpPhi {
   407  						// The memory phi, if it exists, is always
   408  						// the first logical store in the block.
   409  						break
   410  					}
   411  					if m.Block.ID != target.Block.ID {
   412  						break
   413  					}
   414  					if !m.Type.IsMemory() {
   415  						break
   416  					}
   417  					memPreds[m] = true
   418  					if len(m.Args) == 0 {
   419  						break
   420  					}
   421  					m = m.MemoryArg()
   422  				}
   423  			}
   424  
   425  			// We can merge if v is a predecessor of mem.
   426  			//
   427  			// For example, we can merge load into target in the
   428  			// following scenario:
   429  			//      x = read ... v
   430  			//    mem = write ... v
   431  			//   load = read ... mem
   432  			// target = add x load
   433  			if memPreds[v] {
   434  				continue
   435  			}
   436  			return false
   437  		}
   438  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   439  			// If v takes mem as an input then we know mem
   440  			// is valid at this point.
   441  			continue
   442  		}
   443  		for _, a := range v.Args {
   444  			if target.Block.ID == a.Block.ID {
   445  				args = append(args, a)
   446  			}
   447  		}
   448  	}
   449  
   450  	return true
   451  }
   452  
   453  // isSameCall reports whether aux is the same as the given named symbol.
   454  func isSameCall(aux Aux, name string) bool {
   455  	fn := aux.(*AuxCall).Fn
   456  	return fn != nil && fn.String() == name
   457  }
   458  
   459  func isMalloc(aux Aux) bool {
   460  	return isNewObject(aux) || isSpecializedMalloc(aux)
   461  }
   462  
   463  func isNewObject(aux Aux) bool {
   464  	fn := aux.(*AuxCall).Fn
   465  	return fn != nil && fn.String() == "runtime.newobject"
   466  }
   467  
   468  func isSpecializedMalloc(aux Aux) bool {
   469  	fn := aux.(*AuxCall).Fn
   470  	if fn == nil {
   471  		return false
   472  	}
   473  	name := fn.String()
   474  	return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
   475  		strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
   476  		strings.HasPrefix(name, "runtime.mallocTiny")
   477  }
   478  
   479  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   480  func canLoadUnaligned(c *Config) bool {
   481  	return c.ctxt.Arch.Alignment == 1
   482  }
   483  
   484  // nlzX returns the number of leading zeros.
   485  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   486  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   487  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   488  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   489  
   490  // ntzX returns the number of trailing zeros.
   491  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   492  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   493  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   494  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   495  
   496  // oneBit reports whether x contains exactly one set bit.
   497  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   498  	return x&(x-1) == 0 && x != 0
   499  }
   500  
   501  // nto returns the number of trailing ones.
   502  func nto(x int64) int64 {
   503  	return int64(ntz64(^x))
   504  }
   505  
   506  // logX returns logarithm of n base 2.
   507  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   508  func log8(n int8) int64   { return log8u(uint8(n)) }
   509  func log16(n int16) int64 { return log16u(uint16(n)) }
   510  func log32(n int32) int64 { return log32u(uint32(n)) }
   511  func log64(n int64) int64 { return log64u(uint64(n)) }
   512  
   513  // logXu returns the logarithm of n base 2.
   514  // n must be a power of 2 (isUnsignedPowerOfTwo returns true)
   515  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   516  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   517  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   518  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   519  
   520  // isPowerOfTwoX functions report whether n is a power of 2.
   521  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   522  	return n > 0 && n&(n-1) == 0
   523  }
   524  
   525  // isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
   526  func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
   527  	return n != 0 && n&(n-1) == 0
   528  }
   529  
   530  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   531  func is32Bit(n int64) bool {
   532  	return n == int64(int32(n))
   533  }
   534  
   535  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   536  func is16Bit(n int64) bool {
   537  	return n == int64(int16(n))
   538  }
   539  
   540  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   541  func is8Bit(n int64) bool {
   542  	return n == int64(int8(n))
   543  }
   544  
   545  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   546  func isU8Bit(n int64) bool {
   547  	return n == int64(uint8(n))
   548  }
   549  
   550  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   551  func is12Bit(n int64) bool {
   552  	return -(1<<11) <= n && n < (1<<11)
   553  }
   554  
   555  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   556  func isU12Bit(n int64) bool {
   557  	return 0 <= n && n < (1<<12)
   558  }
   559  
   560  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   561  func isU16Bit(n int64) bool {
   562  	return n == int64(uint16(n))
   563  }
   564  
   565  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   566  func isU32Bit(n int64) bool {
   567  	return n == int64(uint32(n))
   568  }
   569  
   570  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   571  func is20Bit(n int64) bool {
   572  	return -(1<<19) <= n && n < (1<<19)
   573  }
   574  
   575  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   576  func b2i(b bool) int64 {
   577  	if b {
   578  		return 1
   579  	}
   580  	return 0
   581  }
   582  
   583  // b2i32 translates a boolean value to 0 or 1.
   584  func b2i32(b bool) int32 {
   585  	if b {
   586  		return 1
   587  	}
   588  	return 0
   589  }
   590  
   591  func canMulStrengthReduce(config *Config, x int64) bool {
   592  	_, ok := config.mulRecipes[x]
   593  	return ok
   594  }
   595  func canMulStrengthReduce32(config *Config, x int32) bool {
   596  	_, ok := config.mulRecipes[int64(x)]
   597  	return ok
   598  }
   599  
   600  // mulStrengthReduce returns v*x evaluated at the location
   601  // (block and source position) of m.
   602  // canMulStrengthReduce must have returned true.
   603  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   604  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   605  }
   606  
   607  // mulStrengthReduce32 returns v*x evaluated at the location
   608  // (block and source position) of m.
   609  // canMulStrengthReduce32 must have returned true.
   610  // The upper 32 bits of m might be set to junk.
   611  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   612  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   613  }
   614  
   615  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   616  // A shift is bounded if it is shifting by less than the width of the shifted value.
   617  func shiftIsBounded(v *Value) bool {
   618  	return v.AuxInt != 0
   619  }
   620  
   621  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   622  // generated code as much as possible.
   623  func canonLessThan(x, y *Value) bool {
   624  	if x.Op != y.Op {
   625  		return x.Op < y.Op
   626  	}
   627  	if !x.Pos.SameFileAndLine(y.Pos) {
   628  		return x.Pos.Before(y.Pos)
   629  	}
   630  	return x.ID < y.ID
   631  }
   632  
   633  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   634  // of the mantissa. It will panic if the truncation results in lost information.
   635  func truncate64Fto32F(f float64) float32 {
   636  	if !isExactFloat32(f) {
   637  		panic("truncate64Fto32F: truncation is not exact")
   638  	}
   639  	if !math.IsNaN(f) {
   640  		return float32(f)
   641  	}
   642  	// NaN bit patterns aren't necessarily preserved across conversion
   643  	// instructions so we need to do the conversion manually.
   644  	b := math.Float64bits(f)
   645  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   646  	//          | sign                  | exponent   | mantissa       |
   647  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   648  	return math.Float32frombits(r)
   649  }
   650  
   651  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   652  func DivisionNeedsFixUp(v *Value) bool {
   653  	return v.AuxInt == 0
   654  }
   655  
   656  // auxTo32F decodes a float32 from the AuxInt value provided.
   657  func auxTo32F(i int64) float32 {
   658  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   659  }
   660  
   661  func auxIntToBool(i int64) bool {
   662  	if i == 0 {
   663  		return false
   664  	}
   665  	return true
   666  }
   667  func auxIntToInt8(i int64) int8 {
   668  	return int8(i)
   669  }
   670  func auxIntToInt16(i int64) int16 {
   671  	return int16(i)
   672  }
   673  func auxIntToInt32(i int64) int32 {
   674  	return int32(i)
   675  }
   676  func auxIntToInt64(i int64) int64 {
   677  	return i
   678  }
   679  func auxIntToUint8(i int64) uint8 {
   680  	return uint8(i)
   681  }
   682  func auxIntToFloat32(i int64) float32 {
   683  	return float32(math.Float64frombits(uint64(i)))
   684  }
   685  func auxIntToFloat64(i int64) float64 {
   686  	return math.Float64frombits(uint64(i))
   687  }
   688  func auxIntToValAndOff(i int64) ValAndOff {
   689  	return ValAndOff(i)
   690  }
   691  func auxIntToArm64BitField(i int64) arm64BitField {
   692  	return arm64BitField(i)
   693  }
   694  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   695  	var params arm64ConditionalParams
   696  	params.cond = Op(i & 0xffff)
   697  	i >>= 16
   698  	params.nzcv = uint8(i & 0x0f)
   699  	i >>= 4
   700  	params.constValue = uint8(i & 0x1f)
   701  	i >>= 5
   702  	params.ind = i == 1
   703  	return params
   704  }
   705  func auxIntToFlagConstant(x int64) flagConstant {
   706  	return flagConstant(x)
   707  }
   708  
   709  func auxIntToOp(cc int64) Op {
   710  	return Op(cc)
   711  }
   712  
   713  func boolToAuxInt(b bool) int64 {
   714  	if b {
   715  		return 1
   716  	}
   717  	return 0
   718  }
   719  func int8ToAuxInt(i int8) int64 {
   720  	return int64(i)
   721  }
   722  func int16ToAuxInt(i int16) int64 {
   723  	return int64(i)
   724  }
   725  func int32ToAuxInt(i int32) int64 {
   726  	return int64(i)
   727  }
   728  func int64ToAuxInt(i int64) int64 {
   729  	return i
   730  }
   731  func uint8ToAuxInt(i uint8) int64 {
   732  	return int64(int8(i))
   733  }
   734  func float32ToAuxInt(f float32) int64 {
   735  	return int64(math.Float64bits(float64(f)))
   736  }
   737  func float64ToAuxInt(f float64) int64 {
   738  	return int64(math.Float64bits(f))
   739  }
   740  func valAndOffToAuxInt(v ValAndOff) int64 {
   741  	return int64(v)
   742  }
   743  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   744  	return int64(v)
   745  }
   746  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   747  	if v.cond&^0xffff != 0 {
   748  		panic("condition value exceeds 16 bits")
   749  	}
   750  
   751  	var i int64
   752  	if v.ind {
   753  		i = 1 << 25
   754  	}
   755  	i |= int64(v.constValue) << 20
   756  	i |= int64(v.nzcv) << 16
   757  	i |= int64(v.cond)
   758  	return i
   759  }
   760  func flagConstantToAuxInt(x flagConstant) int64 {
   761  	return int64(x)
   762  }
   763  
   764  func opToAuxInt(o Op) int64 {
   765  	return int64(o)
   766  }
   767  
   768  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   769  type Aux interface {
   770  	CanBeAnSSAAux()
   771  }
   772  
   773  // for now only used to mark moves that need to avoid clobbering flags
   774  type auxMark bool
   775  
   776  func (auxMark) CanBeAnSSAAux() {}
   777  
   778  var AuxMark auxMark
   779  
   780  // stringAux wraps string values for use in Aux.
   781  type stringAux string
   782  
   783  func (stringAux) CanBeAnSSAAux() {}
   784  
   785  func auxToString(i Aux) string {
   786  	return string(i.(stringAux))
   787  }
   788  func auxToSym(i Aux) Sym {
   789  	// TODO: kind of a hack - allows nil interface through
   790  	s, _ := i.(Sym)
   791  	return s
   792  }
   793  func auxToType(i Aux) *types.Type {
   794  	return i.(*types.Type)
   795  }
   796  func auxToCall(i Aux) *AuxCall {
   797  	return i.(*AuxCall)
   798  }
   799  func auxToS390xCCMask(i Aux) s390x.CCMask {
   800  	return i.(s390x.CCMask)
   801  }
   802  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   803  	return i.(s390x.RotateParams)
   804  }
   805  
   806  func StringToAux(s string) Aux {
   807  	return stringAux(s)
   808  }
   809  func symToAux(s Sym) Aux {
   810  	return s
   811  }
   812  func callToAux(s *AuxCall) Aux {
   813  	return s
   814  }
   815  func typeToAux(t *types.Type) Aux {
   816  	return t
   817  }
   818  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   819  	return c
   820  }
   821  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   822  	return r
   823  }
   824  
   825  // uaddOvf reports whether unsigned a+b would overflow.
   826  func uaddOvf(a, b int64) bool {
   827  	return uint64(a)+uint64(b) < uint64(a)
   828  }
   829  
   830  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   831  	v.Op = OpStaticLECall
   832  	auxcall := v.Aux.(*AuxCall)
   833  	auxcall.Fn = sym
   834  	// Remove first arg
   835  	v.Args[0].Uses--
   836  	copy(v.Args[0:], v.Args[1:])
   837  	v.Args[len(v.Args)-1] = nil // aid GC
   838  	v.Args = v.Args[:len(v.Args)-1]
   839  	if f := v.Block.Func; f.pass.debug > 0 {
   840  		f.Warnl(v.Pos, "de-virtualizing call")
   841  	}
   842  	return v
   843  }
   844  
   845  // isSamePtr reports whether p1 and p2 point to the same address.
   846  func isSamePtr(p1, p2 *Value) bool {
   847  	if p1 == p2 {
   848  		return true
   849  	}
   850  	if p1.Op != p2.Op {
   851  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   852  			p1 = p1.Args[0]
   853  		}
   854  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   855  			p2 = p2.Args[0]
   856  		}
   857  		if p1 == p2 {
   858  			return true
   859  		}
   860  		if p1.Op != p2.Op {
   861  			return false
   862  		}
   863  	}
   864  	switch p1.Op {
   865  	case OpOffPtr:
   866  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   867  	case OpAddr, OpLocalAddr:
   868  		return p1.Aux == p2.Aux
   869  	case OpAddPtr:
   870  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   871  	}
   872  	return false
   873  }
   874  
   875  func isStackPtr(v *Value) bool {
   876  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   877  		v = v.Args[0]
   878  	}
   879  	return v.Op == OpSP || v.Op == OpLocalAddr
   880  }
   881  
   882  // disjoint reports whether the memory region specified by [p1:p1+n1)
   883  // does not overlap with [p2:p2+n2).
   884  // A return value of false does not imply the regions overlap.
   885  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   886  	if n1 == 0 || n2 == 0 {
   887  		return true
   888  	}
   889  	if p1 == p2 {
   890  		return false
   891  	}
   892  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   893  		base, offset = ptr, 0
   894  		for base.Op == OpOffPtr {
   895  			offset += base.AuxInt
   896  			base = base.Args[0]
   897  		}
   898  		if opcodeTable[base.Op].nilCheck {
   899  			base = base.Args[0]
   900  		}
   901  		return base, offset
   902  	}
   903  
   904  	// Run types-based analysis
   905  	if disjointTypes(p1.Type, p2.Type) {
   906  		return true
   907  	}
   908  
   909  	p1, off1 := baseAndOffset(p1)
   910  	p2, off2 := baseAndOffset(p2)
   911  	if isSamePtr(p1, p2) {
   912  		return !overlap(off1, n1, off2, n2)
   913  	}
   914  	// p1 and p2 are not the same, so if they are both OpAddrs then
   915  	// they point to different variables.
   916  	// If one pointer is on the stack and the other is an argument
   917  	// then they can't overlap.
   918  	switch p1.Op {
   919  	case OpAddr, OpLocalAddr:
   920  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   921  			return true
   922  		}
   923  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   924  	case OpArg, OpArgIntReg:
   925  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   926  			return true
   927  		}
   928  	case OpSP:
   929  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   930  	}
   931  	return false
   932  }
   933  
   934  // disjointTypes reports whether a memory region pointed to by a pointer of type
   935  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   936  // based on type aliasing rules.
   937  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   938  	// Unsafe pointer can alias with anything.
   939  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   940  		return false
   941  	}
   942  
   943  	if !t1.IsPtr() || !t2.IsPtr() {
   944  		panic("disjointTypes: one of arguments is not a pointer")
   945  	}
   946  
   947  	t1 = t1.Elem()
   948  	t2 = t2.Elem()
   949  
   950  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   951  	// type.HasPointers check doesn't work for them correctly.
   952  	if t1.NotInHeap() || t2.NotInHeap() {
   953  		return false
   954  	}
   955  
   956  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   957  
   958  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   959  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   960  		(isPtrShaped(t2) && !t1.HasPointers()) {
   961  		return true
   962  	}
   963  
   964  	return false
   965  }
   966  
   967  // moveSize returns the number of bytes an aligned MOV instruction moves.
   968  func moveSize(align int64, c *Config) int64 {
   969  	switch {
   970  	case align%8 == 0 && c.PtrSize == 8:
   971  		return 8
   972  	case align%4 == 0:
   973  		return 4
   974  	case align%2 == 0:
   975  		return 2
   976  	}
   977  	return 1
   978  }
   979  
   980  // mergePoint finds a block among a's blocks which dominates b and is itself
   981  // dominated by all of a's blocks. Returns nil if it can't find one.
   982  // Might return nil even if one does exist.
   983  func mergePoint(b *Block, a ...*Value) *Block {
   984  	// Walk backward from b looking for one of the a's blocks.
   985  
   986  	// Max distance
   987  	d := 100
   988  
   989  	for d > 0 {
   990  		for _, x := range a {
   991  			if b == x.Block {
   992  				goto found
   993  			}
   994  		}
   995  		if len(b.Preds) > 1 {
   996  			// Don't know which way to go back. Abort.
   997  			return nil
   998  		}
   999  		b = b.Preds[0].b
  1000  		d--
  1001  	}
  1002  	return nil // too far away
  1003  found:
  1004  	// At this point, r is the first value in a that we find by walking backwards.
  1005  	// if we return anything, r will be it.
  1006  	r := b
  1007  
  1008  	// Keep going, counting the other a's that we find. They must all dominate r.
  1009  	na := 0
  1010  	for d > 0 {
  1011  		for _, x := range a {
  1012  			if b == x.Block {
  1013  				na++
  1014  			}
  1015  		}
  1016  		if na == len(a) {
  1017  			// Found all of a in a backwards walk. We can return r.
  1018  			return r
  1019  		}
  1020  		if len(b.Preds) > 1 {
  1021  			return nil
  1022  		}
  1023  		b = b.Preds[0].b
  1024  		d--
  1025  
  1026  	}
  1027  	return nil // too far away
  1028  }
  1029  
  1030  // clobber invalidates values. Returns true.
  1031  // clobber is used by rewrite rules to:
  1032  //
  1033  //	A) make sure the values are really dead and never used again.
  1034  //	B) decrement use counts of the values' args.
  1035  func clobber(vv ...*Value) bool {
  1036  	for _, v := range vv {
  1037  		v.reset(OpInvalid)
  1038  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1039  	}
  1040  	return true
  1041  }
  1042  
  1043  // resetCopy resets v to be a copy of arg.
  1044  // Always returns true.
  1045  func resetCopy(v *Value, arg *Value) bool {
  1046  	v.reset(OpCopy)
  1047  	v.AddArg(arg)
  1048  	return true
  1049  }
  1050  
  1051  // clobberIfDead resets v when use count is 1. Returns true.
  1052  // clobberIfDead is used by rewrite rules to decrement
  1053  // use counts of v's args when v is dead and never used.
  1054  func clobberIfDead(v *Value) bool {
  1055  	if v.Uses == 1 {
  1056  		v.reset(OpInvalid)
  1057  	}
  1058  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1059  	return true
  1060  }
  1061  
  1062  // noteRule is an easy way to track if a rule is matched when writing
  1063  // new ones.  Make the rule of interest also conditional on
  1064  //
  1065  //	noteRule("note to self: rule of interest matched")
  1066  //
  1067  // and that message will print when the rule matches.
  1068  func noteRule(s string) bool {
  1069  	fmt.Println(s)
  1070  	return true
  1071  }
  1072  
  1073  // countRule increments Func.ruleMatches[key].
  1074  // If Func.ruleMatches is non-nil at the end
  1075  // of compilation, it will be printed to stdout.
  1076  // This is intended to make it easier to find which functions
  1077  // which contain lots of rules matches when developing new rules.
  1078  func countRule(v *Value, key string) bool {
  1079  	f := v.Block.Func
  1080  	if f.ruleMatches == nil {
  1081  		f.ruleMatches = make(map[string]int)
  1082  	}
  1083  	f.ruleMatches[key]++
  1084  	return true
  1085  }
  1086  
  1087  // warnRule generates compiler debug output with string s when
  1088  // v is not in autogenerated code, cond is true and the rule has fired.
  1089  func warnRule(cond bool, v *Value, s string) bool {
  1090  	if pos := v.Pos; pos.Line() > 1 && cond {
  1091  		v.Block.Func.Warnl(pos, s)
  1092  	}
  1093  	return true
  1094  }
  1095  
  1096  // for a pseudo-op like (LessThan x), extract x.
  1097  func flagArg(v *Value) *Value {
  1098  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1099  		return nil
  1100  	}
  1101  	return v.Args[0]
  1102  }
  1103  
  1104  // arm64Negate finds the complement to an ARM64 condition code,
  1105  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1106  //
  1107  // For floating point, it's more subtle because NaN is unordered. We do
  1108  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1109  func arm64Negate(op Op) Op {
  1110  	switch op {
  1111  	case OpARM64LessThan:
  1112  		return OpARM64GreaterEqual
  1113  	case OpARM64LessThanU:
  1114  		return OpARM64GreaterEqualU
  1115  	case OpARM64GreaterThan:
  1116  		return OpARM64LessEqual
  1117  	case OpARM64GreaterThanU:
  1118  		return OpARM64LessEqualU
  1119  	case OpARM64LessEqual:
  1120  		return OpARM64GreaterThan
  1121  	case OpARM64LessEqualU:
  1122  		return OpARM64GreaterThanU
  1123  	case OpARM64GreaterEqual:
  1124  		return OpARM64LessThan
  1125  	case OpARM64GreaterEqualU:
  1126  		return OpARM64LessThanU
  1127  	case OpARM64Equal:
  1128  		return OpARM64NotEqual
  1129  	case OpARM64NotEqual:
  1130  		return OpARM64Equal
  1131  	case OpARM64LessThanF:
  1132  		return OpARM64NotLessThanF
  1133  	case OpARM64NotLessThanF:
  1134  		return OpARM64LessThanF
  1135  	case OpARM64LessEqualF:
  1136  		return OpARM64NotLessEqualF
  1137  	case OpARM64NotLessEqualF:
  1138  		return OpARM64LessEqualF
  1139  	case OpARM64GreaterThanF:
  1140  		return OpARM64NotGreaterThanF
  1141  	case OpARM64NotGreaterThanF:
  1142  		return OpARM64GreaterThanF
  1143  	case OpARM64GreaterEqualF:
  1144  		return OpARM64NotGreaterEqualF
  1145  	case OpARM64NotGreaterEqualF:
  1146  		return OpARM64GreaterEqualF
  1147  	default:
  1148  		panic("unreachable")
  1149  	}
  1150  }
  1151  
  1152  // arm64Invert evaluates (InvertFlags op), which
  1153  // is the same as altering the condition codes such
  1154  // that the same result would be produced if the arguments
  1155  // to the flag-generating instruction were reversed, e.g.
  1156  // (InvertFlags (CMP x y)) -> (CMP y x)
  1157  func arm64Invert(op Op) Op {
  1158  	switch op {
  1159  	case OpARM64LessThan:
  1160  		return OpARM64GreaterThan
  1161  	case OpARM64LessThanU:
  1162  		return OpARM64GreaterThanU
  1163  	case OpARM64GreaterThan:
  1164  		return OpARM64LessThan
  1165  	case OpARM64GreaterThanU:
  1166  		return OpARM64LessThanU
  1167  	case OpARM64LessEqual:
  1168  		return OpARM64GreaterEqual
  1169  	case OpARM64LessEqualU:
  1170  		return OpARM64GreaterEqualU
  1171  	case OpARM64GreaterEqual:
  1172  		return OpARM64LessEqual
  1173  	case OpARM64GreaterEqualU:
  1174  		return OpARM64LessEqualU
  1175  	case OpARM64Equal, OpARM64NotEqual:
  1176  		return op
  1177  	case OpARM64LessThanF:
  1178  		return OpARM64GreaterThanF
  1179  	case OpARM64GreaterThanF:
  1180  		return OpARM64LessThanF
  1181  	case OpARM64LessEqualF:
  1182  		return OpARM64GreaterEqualF
  1183  	case OpARM64GreaterEqualF:
  1184  		return OpARM64LessEqualF
  1185  	case OpARM64NotLessThanF:
  1186  		return OpARM64NotGreaterThanF
  1187  	case OpARM64NotGreaterThanF:
  1188  		return OpARM64NotLessThanF
  1189  	case OpARM64NotLessEqualF:
  1190  		return OpARM64NotGreaterEqualF
  1191  	case OpARM64NotGreaterEqualF:
  1192  		return OpARM64NotLessEqualF
  1193  	default:
  1194  		panic("unreachable")
  1195  	}
  1196  }
  1197  
  1198  // evaluate an ARM64 op against a flags value
  1199  // that is potentially constant; return 1 for true,
  1200  // -1 for false, and 0 for not constant.
  1201  func ccARM64Eval(op Op, flags *Value) int {
  1202  	fop := flags.Op
  1203  	if fop == OpARM64InvertFlags {
  1204  		return -ccARM64Eval(op, flags.Args[0])
  1205  	}
  1206  	if fop != OpARM64FlagConstant {
  1207  		return 0
  1208  	}
  1209  	fc := flagConstant(flags.AuxInt)
  1210  	b2i := func(b bool) int {
  1211  		if b {
  1212  			return 1
  1213  		}
  1214  		return -1
  1215  	}
  1216  	switch op {
  1217  	case OpARM64Equal:
  1218  		return b2i(fc.eq())
  1219  	case OpARM64NotEqual:
  1220  		return b2i(fc.ne())
  1221  	case OpARM64LessThan:
  1222  		return b2i(fc.lt())
  1223  	case OpARM64LessThanU:
  1224  		return b2i(fc.ult())
  1225  	case OpARM64GreaterThan:
  1226  		return b2i(fc.gt())
  1227  	case OpARM64GreaterThanU:
  1228  		return b2i(fc.ugt())
  1229  	case OpARM64LessEqual:
  1230  		return b2i(fc.le())
  1231  	case OpARM64LessEqualU:
  1232  		return b2i(fc.ule())
  1233  	case OpARM64GreaterEqual:
  1234  		return b2i(fc.ge())
  1235  	case OpARM64GreaterEqualU:
  1236  		return b2i(fc.uge())
  1237  	}
  1238  	return 0
  1239  }
  1240  
  1241  // logRule logs the use of the rule s. This will only be enabled if
  1242  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1243  func logRule(s string) {
  1244  	if ruleFile == nil {
  1245  		// Open a log file to write log to. We open in append
  1246  		// mode because all.bash runs the compiler lots of times,
  1247  		// and we want the concatenation of all of those logs.
  1248  		// This means, of course, that users need to rm the old log
  1249  		// to get fresh data.
  1250  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1251  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1252  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1253  		if err != nil {
  1254  			panic(err)
  1255  		}
  1256  		ruleFile = w
  1257  	}
  1258  	_, err := fmt.Fprintln(ruleFile, s)
  1259  	if err != nil {
  1260  		panic(err)
  1261  	}
  1262  }
  1263  
  1264  var ruleFile io.Writer
  1265  
  1266  func isConstZero(v *Value) bool {
  1267  	switch v.Op {
  1268  	case OpConstNil:
  1269  		return true
  1270  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1271  		return v.AuxInt == 0
  1272  	case OpStringMake, OpIMake, OpComplexMake:
  1273  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1274  	case OpSliceMake:
  1275  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1276  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1277  		return isConstZero(v.Args[0])
  1278  	}
  1279  	return false
  1280  }
  1281  
  1282  // reciprocalExact64 reports whether 1/c is exactly representable.
  1283  func reciprocalExact64(c float64) bool {
  1284  	b := math.Float64bits(c)
  1285  	man := b & (1<<52 - 1)
  1286  	if man != 0 {
  1287  		return false // not a power of 2, denormal, or NaN
  1288  	}
  1289  	exp := b >> 52 & (1<<11 - 1)
  1290  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1291  	// changes the exponent to 0x7fe-exp.
  1292  	switch exp {
  1293  	case 0:
  1294  		return false // ±0
  1295  	case 0x7ff:
  1296  		return false // ±inf
  1297  	case 0x7fe:
  1298  		return false // exponent is not representable
  1299  	default:
  1300  		return true
  1301  	}
  1302  }
  1303  
  1304  // reciprocalExact32 reports whether 1/c is exactly representable.
  1305  func reciprocalExact32(c float32) bool {
  1306  	b := math.Float32bits(c)
  1307  	man := b & (1<<23 - 1)
  1308  	if man != 0 {
  1309  		return false // not a power of 2, denormal, or NaN
  1310  	}
  1311  	exp := b >> 23 & (1<<8 - 1)
  1312  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1313  	// changes the exponent to 0xfe-exp.
  1314  	switch exp {
  1315  	case 0:
  1316  		return false // ±0
  1317  	case 0xff:
  1318  		return false // ±inf
  1319  	case 0xfe:
  1320  		return false // exponent is not representable
  1321  	default:
  1322  		return true
  1323  	}
  1324  }
  1325  
  1326  // check if an immediate can be directly encoded into an ARM's instruction.
  1327  func isARMImmRot(v uint32) bool {
  1328  	for i := 0; i < 16; i++ {
  1329  		if v&^0xff == 0 {
  1330  			return true
  1331  		}
  1332  		v = v<<2 | v>>30
  1333  	}
  1334  
  1335  	return false
  1336  }
  1337  
  1338  // overlap reports whether the ranges given by the given offset and
  1339  // size pairs overlap.
  1340  func overlap(offset1, size1, offset2, size2 int64) bool {
  1341  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1342  		return true
  1343  	}
  1344  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1345  		return true
  1346  	}
  1347  	return false
  1348  }
  1349  
  1350  // check if value zeroes out upper 32-bit of 64-bit register.
  1351  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1352  // because it catches same amount of cases as 4.
  1353  func zeroUpper32Bits(x *Value, depth int) bool {
  1354  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1355  		// If the value is signed, it might get re-sign-extended
  1356  		// during spill and restore. See issue 68227.
  1357  		return false
  1358  	}
  1359  	switch x.Op {
  1360  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1361  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1362  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1363  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1364  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1365  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1366  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1367  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1368  		OpAMD64SHLL, OpAMD64SHLLconst:
  1369  		return true
  1370  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1371  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1372  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1373  		return true
  1374  	case OpArg: // note: but not ArgIntReg
  1375  		// amd64 always loads args from the stack unsigned.
  1376  		// most other architectures load them sign/zero extended based on the type.
  1377  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1378  	case OpPhi, OpSelect0, OpSelect1:
  1379  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1380  		// just limit recursion depth.
  1381  		if depth <= 0 {
  1382  			return false
  1383  		}
  1384  		for i := range x.Args {
  1385  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1386  				return false
  1387  			}
  1388  		}
  1389  		return true
  1390  
  1391  	}
  1392  	return false
  1393  }
  1394  
  1395  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1396  func zeroUpper48Bits(x *Value, depth int) bool {
  1397  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1398  		return false
  1399  	}
  1400  	switch x.Op {
  1401  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1402  		return true
  1403  	case OpArg: // note: but not ArgIntReg
  1404  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1405  	case OpPhi, OpSelect0, OpSelect1:
  1406  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1407  		// just limit recursion depth.
  1408  		if depth <= 0 {
  1409  			return false
  1410  		}
  1411  		for i := range x.Args {
  1412  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1413  				return false
  1414  			}
  1415  		}
  1416  		return true
  1417  
  1418  	}
  1419  	return false
  1420  }
  1421  
  1422  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1423  func zeroUpper56Bits(x *Value, depth int) bool {
  1424  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1425  		return false
  1426  	}
  1427  	switch x.Op {
  1428  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1429  		return true
  1430  	case OpArg: // note: but not ArgIntReg
  1431  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1432  	case OpPhi, OpSelect0, OpSelect1:
  1433  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1434  		// just limit recursion depth.
  1435  		if depth <= 0 {
  1436  			return false
  1437  		}
  1438  		for i := range x.Args {
  1439  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1440  				return false
  1441  			}
  1442  		}
  1443  		return true
  1444  
  1445  	}
  1446  	return false
  1447  }
  1448  
  1449  func isInlinableMemclr(c *Config, sz int64) bool {
  1450  	if sz < 0 {
  1451  		return false
  1452  	}
  1453  	// TODO: expand this check to allow other architectures
  1454  	// see CL 454255 and issue 56997
  1455  	switch c.arch {
  1456  	case "amd64", "arm64":
  1457  		return true
  1458  	case "ppc64le", "ppc64", "loong64":
  1459  		return sz < 512
  1460  	}
  1461  	return false
  1462  }
  1463  
  1464  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1465  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1466  // safe, either because Move will do all of its loads before any of its stores, or
  1467  // because the arguments are known to be disjoint.
  1468  // This is used as a check for replacing memmove with Move ops.
  1469  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1470  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1471  	// Move ops may or may not be faster for large sizes depending on how the platform
  1472  	// lowers them, so we only perform this optimization on platforms that we know to
  1473  	// have fast Move ops.
  1474  	switch c.arch {
  1475  	case "amd64":
  1476  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1477  	case "arm64":
  1478  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1479  	case "386":
  1480  		return sz <= 8
  1481  	case "s390x", "ppc64", "ppc64le":
  1482  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1483  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1484  		return sz <= 4
  1485  	}
  1486  	return false
  1487  }
  1488  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1489  	return isInlinableMemmove(dst, src, sz, c)
  1490  }
  1491  
  1492  // logLargeCopy logs the occurrence of a large copy.
  1493  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1494  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1495  func logLargeCopy(v *Value, s int64) bool {
  1496  	if s < 128 {
  1497  		return true
  1498  	}
  1499  	if logopt.Enabled() {
  1500  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1501  	}
  1502  	return true
  1503  }
  1504  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1505  	if s < 128 {
  1506  		return
  1507  	}
  1508  	if logopt.Enabled() {
  1509  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1510  	}
  1511  }
  1512  
  1513  // hasSmallRotate reports whether the architecture has rotate instructions
  1514  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1515  func hasSmallRotate(c *Config) bool {
  1516  	switch c.arch {
  1517  	case "amd64", "386":
  1518  		return true
  1519  	default:
  1520  		return false
  1521  	}
  1522  }
  1523  
  1524  func supportsPPC64PCRel() bool {
  1525  	// PCRel is currently supported for >= power10, linux only
  1526  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1527  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1528  }
  1529  
  1530  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1531  	if sh < 0 || sh >= sz {
  1532  		panic("PPC64 shift arg sh out of range")
  1533  	}
  1534  	if mb < 0 || mb >= sz {
  1535  		panic("PPC64 shift arg mb out of range")
  1536  	}
  1537  	if me < 0 || me >= sz {
  1538  		panic("PPC64 shift arg me out of range")
  1539  	}
  1540  	return int32(sh<<16 | mb<<8 | me)
  1541  }
  1542  
  1543  func GetPPC64Shiftsh(auxint int64) int64 {
  1544  	return int64(int8(auxint >> 16))
  1545  }
  1546  
  1547  func GetPPC64Shiftmb(auxint int64) int64 {
  1548  	return int64(int8(auxint >> 8))
  1549  }
  1550  
  1551  // Test if this value can encoded as a mask for a rlwinm like
  1552  // operation.  Masks can also extend from the msb and wrap to
  1553  // the lsb too.  That is, the valid masks are 32 bit strings
  1554  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1555  //
  1556  // Note: This ignores the upper 32 bits of the input. When a
  1557  // zero extended result is desired (e.g a 64 bit result), the
  1558  // user must verify the upper 32 bits are 0 and the mask is
  1559  // contiguous (that is, non-wrapping).
  1560  func isPPC64WordRotateMask(v64 int64) bool {
  1561  	// Isolate rightmost 1 (if none 0) and add.
  1562  	v := uint32(v64)
  1563  	vp := (v & -v) + v
  1564  	// Likewise, for the wrapping case.
  1565  	vn := ^v
  1566  	vpn := (vn & -vn) + vn
  1567  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1568  }
  1569  
  1570  // Test if this mask is a valid, contiguous bitmask which can be
  1571  // represented by a RLWNM mask and also clears the upper 32 bits
  1572  // of the register.
  1573  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1574  	// Isolate rightmost 1 (if none 0) and add.
  1575  	v := uint32(v64)
  1576  	vp := (v & -v) + v
  1577  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1578  }
  1579  
  1580  // Compress mask and shift into single value of the form
  1581  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1582  // be used to regenerate the input mask.
  1583  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1584  	var mb, me, mbn, men int
  1585  
  1586  	// Determine boundaries and then decode them
  1587  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1588  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1589  	} else if nbits == 32 {
  1590  		mb = bits.LeadingZeros32(uint32(mask))
  1591  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1592  		mbn = bits.LeadingZeros32(^uint32(mask))
  1593  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1594  	} else {
  1595  		mb = bits.LeadingZeros64(uint64(mask))
  1596  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1597  		mbn = bits.LeadingZeros64(^uint64(mask))
  1598  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1599  	}
  1600  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1601  	if mb == 0 && me == int(nbits) {
  1602  		// swap the inverted values
  1603  		mb, me = men, mbn
  1604  	}
  1605  
  1606  	return int64(me) | int64(mb<<8) | rotate<<16 | nbits<<24
  1607  }
  1608  
  1609  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1610  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1611  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1612  // operations can be combined. This functions assumes the two opcodes can
  1613  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1614  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1615  	mb := s
  1616  	r := 64 - s
  1617  	// A larger mb is a smaller mask.
  1618  	if (encoded>>8)&0xFF < mb {
  1619  		encoded = (encoded &^ 0xFF00) | mb<<8
  1620  	}
  1621  	// The rotate is expected to be 0.
  1622  	if (encoded & 0xFF0000) != 0 {
  1623  		panic("non-zero rotate")
  1624  	}
  1625  	return encoded | r<<16
  1626  }
  1627  
  1628  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1629  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1630  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1631  	auxint := uint64(sauxint)
  1632  	rotate = int64((auxint >> 16) & 0xFF)
  1633  	mb = int64((auxint >> 8) & 0xFF)
  1634  	me = int64((auxint >> 0) & 0xFF)
  1635  	nbits := int64((auxint >> 24) & 0xFF)
  1636  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1637  	if mb > me {
  1638  		mask = ^mask
  1639  	}
  1640  	if nbits == 32 {
  1641  		mask = uint64(uint32(mask))
  1642  	}
  1643  
  1644  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1645  	// is inclusive.
  1646  	me = (me - 1) & (nbits - 1)
  1647  	return
  1648  }
  1649  
  1650  // This verifies that the mask is a set of
  1651  // consecutive bits including the least
  1652  // significant bit.
  1653  func isPPC64ValidShiftMask(v int64) bool {
  1654  	if (v != 0) && ((v+1)&v) == 0 {
  1655  		return true
  1656  	}
  1657  	return false
  1658  }
  1659  
  1660  func getPPC64ShiftMaskLength(v int64) int64 {
  1661  	return int64(bits.Len64(uint64(v)))
  1662  }
  1663  
  1664  // Decompose a shift right into an equivalent rotate/mask,
  1665  // and return mask & m.
  1666  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1667  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1668  	return m & int64(smask)
  1669  }
  1670  
  1671  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1672  func mergePPC64AndSrwi(m, s int64) int64 {
  1673  	mask := mergePPC64RShiftMask(m, s, 32)
  1674  	if !isPPC64WordRotateMask(mask) {
  1675  		return 0
  1676  	}
  1677  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1678  }
  1679  
  1680  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1681  func mergePPC64AndSrdi(m, s int64) int64 {
  1682  	mask := mergePPC64RShiftMask(m, s, 64)
  1683  
  1684  	// Verify the rotate and mask result only uses the lower 32 bits.
  1685  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1686  	if rv&uint64(mask) != 0 {
  1687  		return 0
  1688  	}
  1689  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1690  		return 0
  1691  	}
  1692  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1693  }
  1694  
  1695  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1696  func mergePPC64AndSldi(m, s int64) int64 {
  1697  	mask := -1 << s & m
  1698  
  1699  	// Verify the rotate and mask result only uses the lower 32 bits.
  1700  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1701  	if rv&uint64(mask) != 0 {
  1702  		return 0
  1703  	}
  1704  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1705  		return 0
  1706  	}
  1707  	return encodePPC64RotateMask(s&31, mask, 32)
  1708  }
  1709  
  1710  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1711  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1712  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1713  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1714  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1715  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1716  
  1717  	// Rewrite mask to apply after the final left shift.
  1718  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1719  
  1720  	r_1 := 32 - srw
  1721  	r_2 := GetPPC64Shiftsh(sld)
  1722  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1723  
  1724  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1725  		return 0
  1726  	}
  1727  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1728  }
  1729  
  1730  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1731  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1732  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1733  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1734  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1735  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1736  
  1737  	// Rewrite mask to apply after the final left shift.
  1738  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1739  
  1740  	r_1 := 64 - srd
  1741  	r_2 := GetPPC64Shiftsh(sld)
  1742  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1743  
  1744  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1745  		return 0
  1746  	}
  1747  	// This combine only works when selecting and shifting the lower 32 bits.
  1748  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1749  	if v1&mask_3 != 0 {
  1750  		return 0
  1751  	}
  1752  	return encodePPC64RotateMask(r_3&31, int64(mask_3), 32)
  1753  }
  1754  
  1755  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1756  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1757  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1758  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1759  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1760  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1761  
  1762  	// combine the masks, and adjust for the final left shift.
  1763  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1764  	r_2 := GetPPC64Shiftsh(int64(sld))
  1765  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1766  
  1767  	// Verify the result is still a valid bitmask of <= 32 bits.
  1768  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1769  		return 0
  1770  	}
  1771  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1772  }
  1773  
  1774  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1775  // or 0 if they cannot be merged.
  1776  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1777  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1778  	mask_out := (mask_rlw & uint64(mask))
  1779  
  1780  	// Verify the result is still a valid bitmask of <= 32 bits.
  1781  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1782  		return 0
  1783  	}
  1784  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1785  }
  1786  
  1787  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1788  // result. Return rlw if it does, 0 otherwise.
  1789  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1790  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1791  	if mb > me {
  1792  		return 0
  1793  	}
  1794  	return rlw
  1795  }
  1796  
  1797  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1798  // or 0 if they cannot be merged.
  1799  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1800  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1801  
  1802  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1803  	r_mask := bits.RotateLeft32(mask, int(r))
  1804  
  1805  	mask_out := (mask_rlw & uint64(r_mask))
  1806  
  1807  	// Verify the result is still a valid bitmask of <= 32 bits.
  1808  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1809  		return 0
  1810  	}
  1811  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1812  }
  1813  
  1814  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1815  // or 0 if they cannot be merged.
  1816  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1817  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1818  	if mb > me || mb < sldi {
  1819  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1820  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1821  		return 0
  1822  	}
  1823  	// combine the masks, and adjust for the final left shift.
  1824  	mask_3 := mask_1 << sldi
  1825  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1826  
  1827  	// Verify the result is still a valid bitmask of <= 32 bits.
  1828  	if uint64(uint32(mask_3)) != mask_3 {
  1829  		return 0
  1830  	}
  1831  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1832  }
  1833  
  1834  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1835  // or return 0 if they cannot be combined.
  1836  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1837  	if sld > srw || srw >= 32 {
  1838  		return 0
  1839  	}
  1840  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1841  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1842  	mask := (mask_r & mask_l) << uint(sld)
  1843  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1844  }
  1845  
  1846  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1847  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1848  // of op.
  1849  //
  1850  // E.g consider the case:
  1851  // a = (ADD x y)
  1852  // b = (CMPconst [0] a)
  1853  // c = (OR a z)
  1854  //
  1855  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1856  // would produce:
  1857  // a  = (ADD x y)
  1858  // a' = (ADDCC x y)
  1859  // a” = (Select0 a')
  1860  // b  = (CMPconst [0] a”)
  1861  // c  = (OR a z)
  1862  //
  1863  // which makes it impossible to rewrite the second user. Instead the result
  1864  // of this conversion is:
  1865  // a' = (ADDCC x y)
  1866  // a  = (Select0 a')
  1867  // b  = (CMPconst [0] a)
  1868  // c  = (OR a z)
  1869  //
  1870  // Which makes it trivial to rewrite b using a lowering rule.
  1871  func convertPPC64OpToOpCC(op *Value) *Value {
  1872  	ccOpMap := map[Op]Op{
  1873  		OpPPC64ADD:      OpPPC64ADDCC,
  1874  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1875  		OpPPC64AND:      OpPPC64ANDCC,
  1876  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1877  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1878  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1879  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1880  		OpPPC64NEG:      OpPPC64NEGCC,
  1881  		OpPPC64NOR:      OpPPC64NORCC,
  1882  		OpPPC64OR:       OpPPC64ORCC,
  1883  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1884  		OpPPC64SUB:      OpPPC64SUBCC,
  1885  		OpPPC64XOR:      OpPPC64XORCC,
  1886  	}
  1887  	b := op.Block
  1888  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1889  	opCC.AddArgs(op.Args...)
  1890  	op.reset(OpSelect0)
  1891  	op.AddArgs(opCC)
  1892  	return op
  1893  }
  1894  
  1895  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1896  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1897  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1898  	if r != 0 || mask&0xFFFF != mask {
  1899  		return 0
  1900  	}
  1901  	return int64(mask)
  1902  }
  1903  
  1904  // Convenience function to rotate a 32 bit constant value by another constant.
  1905  func rotateLeft32(v, rotate int64) int64 {
  1906  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1907  }
  1908  
  1909  func rotateRight64(v, rotate int64) int64 {
  1910  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1911  }
  1912  
  1913  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1914  func armBFAuxInt(lsb, width int64) arm64BitField {
  1915  	if lsb < 0 || lsb > 63 {
  1916  		panic("ARM(64) bit field lsb constant out of range")
  1917  	}
  1918  	if width < 1 || lsb+width > 64 {
  1919  		panic("ARM(64) bit field width constant out of range")
  1920  	}
  1921  	return arm64BitField(width | lsb<<8)
  1922  }
  1923  
  1924  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1925  func (bfc arm64BitField) lsb() int64 {
  1926  	return int64(uint64(bfc) >> 8)
  1927  }
  1928  
  1929  // returns the width part of the auxInt field of arm64 bitfield ops.
  1930  func (bfc arm64BitField) width() int64 {
  1931  	return int64(bfc) & 0xff
  1932  }
  1933  
  1934  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1935  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1936  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1937  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1938  }
  1939  
  1940  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1941  func arm64BFWidth(mask, rshift int64) int64 {
  1942  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1943  	if shiftedMask == 0 {
  1944  		panic("ARM64 BF mask is zero")
  1945  	}
  1946  	return nto(shiftedMask)
  1947  }
  1948  
  1949  // encodes condition code and NZCV flags into auxint.
  1950  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1951  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1952  		panic("Wrong conditional operation")
  1953  	}
  1954  	if nzcv&0x0f != nzcv {
  1955  		panic("Wrong value of NZCV flag")
  1956  	}
  1957  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1958  }
  1959  
  1960  // encodes condition code, NZCV flags and constant value into auxint.
  1961  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1962  	if value&0x1f != value {
  1963  		panic("Wrong value of constant")
  1964  	}
  1965  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1966  	params.constValue = value
  1967  	params.ind = true
  1968  	return params
  1969  }
  1970  
  1971  // extracts condition code from auxint.
  1972  func (condParams arm64ConditionalParams) Cond() Op {
  1973  	return condParams.cond
  1974  }
  1975  
  1976  // extracts NZCV flags from auxint.
  1977  func (condParams arm64ConditionalParams) Nzcv() int64 {
  1978  	return int64(condParams.nzcv)
  1979  }
  1980  
  1981  // extracts constant value from auxint if present.
  1982  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  1983  	return int64(condParams.constValue), condParams.ind
  1984  }
  1985  
  1986  // registerizable reports whether t is a primitive type that fits in
  1987  // a register. It assumes float64 values will always fit into registers
  1988  // even if that isn't strictly true.
  1989  func registerizable(b *Block, typ *types.Type) bool {
  1990  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1991  		return true
  1992  	}
  1993  	if typ.IsInteger() {
  1994  		return typ.Size() <= b.Func.Config.RegSize
  1995  	}
  1996  	return false
  1997  }
  1998  
  1999  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  2000  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  2001  	f := v.Block.Func
  2002  	if !f.Config.Race {
  2003  		return false
  2004  	}
  2005  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2006  		return false
  2007  	}
  2008  	for _, b := range f.Blocks {
  2009  		for _, v := range b.Values {
  2010  			switch v.Op {
  2011  			case OpStaticCall, OpStaticLECall:
  2012  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2013  				// Allow calls to panic*
  2014  				s := v.Aux.(*AuxCall).Fn.String()
  2015  				switch s {
  2016  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2017  					"runtime.panicdivide", "runtime.panicwrap",
  2018  					"runtime.panicshift":
  2019  					continue
  2020  				}
  2021  				// If we encountered any call, we need to keep racefunc*,
  2022  				// for accurate stacktraces.
  2023  				return false
  2024  			case OpPanicBounds, OpPanicExtend:
  2025  				// Note: these are panic generators that are ok (like the static calls above).
  2026  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2027  				// We must keep the race functions if there are any other call types.
  2028  				return false
  2029  			}
  2030  		}
  2031  	}
  2032  	if isSameCall(sym, "runtime.racefuncenter") {
  2033  		// TODO REGISTER ABI this needs to be cleaned up.
  2034  		// If we're removing racefuncenter, remove its argument as well.
  2035  		if v.Args[0].Op != OpStore {
  2036  			if v.Op == OpStaticLECall {
  2037  				// there is no store, yet.
  2038  				return true
  2039  			}
  2040  			return false
  2041  		}
  2042  		mem := v.Args[0].Args[2]
  2043  		v.Args[0].reset(OpCopy)
  2044  		v.Args[0].AddArg(mem)
  2045  	}
  2046  	return true
  2047  }
  2048  
  2049  // symIsRO reports whether sym is a read-only global.
  2050  func symIsRO(sym Sym) bool {
  2051  	lsym := sym.(*obj.LSym)
  2052  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2053  }
  2054  
  2055  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2056  func symIsROZero(sym Sym) bool {
  2057  	lsym := sym.(*obj.LSym)
  2058  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2059  		return false
  2060  	}
  2061  	for _, b := range lsym.P {
  2062  		if b != 0 {
  2063  			return false
  2064  		}
  2065  	}
  2066  	return true
  2067  }
  2068  
  2069  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2070  // and can be rewritten by rewriteFixedLoad.
  2071  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2072  	lsym := sym.(*obj.LSym)
  2073  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2074  		for _, r := range lsym.R {
  2075  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2076  				return true
  2077  			}
  2078  		}
  2079  		return false
  2080  	}
  2081  
  2082  	if ti := lsym.TypeInfo(); ti != nil {
  2083  		// Type symbols do not contain information about their fields, unlike the cases above.
  2084  		// Hand-implement field accesses.
  2085  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2086  
  2087  		t := ti.Type.(*types.Type)
  2088  
  2089  		for _, f := range rttype.Type.Fields() {
  2090  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2091  				switch f.Sym.Name {
  2092  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2093  					return true
  2094  				default:
  2095  					// fmt.Println("unknown field", f.Sym.Name)
  2096  					return false
  2097  				}
  2098  			}
  2099  		}
  2100  
  2101  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2102  			return true
  2103  		}
  2104  
  2105  		return false
  2106  	}
  2107  
  2108  	return false
  2109  }
  2110  
  2111  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2112  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2113  	b := v.Block
  2114  	f := b.Func
  2115  
  2116  	lsym := sym.(*obj.LSym)
  2117  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2118  		for _, r := range lsym.R {
  2119  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2120  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2121  					// In case we're loading a type out of a dictionary, we need to record
  2122  					// that the containing function might put that type in an interface.
  2123  					// That information is currently recorded in relocations in the dictionary,
  2124  					// but if we perform this load at compile time then the dictionary
  2125  					// might be dead.
  2126  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2127  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2128  					// Same, but if we're using an itab we need to record that the
  2129  					// itab._type might be put in an interface.
  2130  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2131  				}
  2132  				v.reset(OpAddr)
  2133  				v.Aux = symToAux(r.Sym)
  2134  				v.AddArg(sb)
  2135  				return v
  2136  			}
  2137  		}
  2138  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2139  	}
  2140  
  2141  	if ti := lsym.TypeInfo(); ti != nil {
  2142  		// Type symbols do not contain information about their fields, unlike the cases above.
  2143  		// Hand-implement field accesses.
  2144  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2145  
  2146  		t := ti.Type.(*types.Type)
  2147  
  2148  		ptrSizedOpConst := OpConst64
  2149  		if f.Config.PtrSize == 4 {
  2150  			ptrSizedOpConst = OpConst32
  2151  		}
  2152  
  2153  		for _, f := range rttype.Type.Fields() {
  2154  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2155  				switch f.Sym.Name {
  2156  				case "Size_":
  2157  					v.reset(ptrSizedOpConst)
  2158  					v.AuxInt = t.Size()
  2159  					return v
  2160  				case "PtrBytes":
  2161  					v.reset(ptrSizedOpConst)
  2162  					v.AuxInt = types.PtrDataSize(t)
  2163  					return v
  2164  				case "Hash":
  2165  					v.reset(OpConst32)
  2166  					v.AuxInt = int64(types.TypeHash(t))
  2167  					return v
  2168  				case "Kind_":
  2169  					v.reset(OpConst8)
  2170  					v.AuxInt = int64(reflectdata.ABIKindOfType(t))
  2171  					return v
  2172  				case "GCData":
  2173  					gcdata, _ := reflectdata.GCSym(t, true)
  2174  					v.reset(OpAddr)
  2175  					v.Aux = symToAux(gcdata)
  2176  					v.AddArg(sb)
  2177  					return v
  2178  				default:
  2179  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2180  				}
  2181  			}
  2182  		}
  2183  
  2184  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2185  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2186  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2187  			v.reset(OpAddr)
  2188  			v.Aux = symToAux(elemSym)
  2189  			v.AddArg(sb)
  2190  			return v
  2191  		}
  2192  
  2193  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2194  	}
  2195  
  2196  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2197  	return nil
  2198  }
  2199  
  2200  // read8 reads one byte from the read-only global sym at offset off.
  2201  func read8(sym Sym, off int64) uint8 {
  2202  	lsym := sym.(*obj.LSym)
  2203  	if off >= int64(len(lsym.P)) || off < 0 {
  2204  		// Invalid index into the global sym.
  2205  		// This can happen in dead code, so we don't want to panic.
  2206  		// Just return any value, it will eventually get ignored.
  2207  		// See issue 29215.
  2208  		return 0
  2209  	}
  2210  	return lsym.P[off]
  2211  }
  2212  
  2213  // read16 reads two bytes from the read-only global sym at offset off.
  2214  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2215  	lsym := sym.(*obj.LSym)
  2216  	// lsym.P is written lazily.
  2217  	// Bytes requested after the end of lsym.P are 0.
  2218  	var src []byte
  2219  	if 0 <= off && off < int64(len(lsym.P)) {
  2220  		src = lsym.P[off:]
  2221  	}
  2222  	buf := make([]byte, 2)
  2223  	copy(buf, src)
  2224  	return byteorder.Uint16(buf)
  2225  }
  2226  
  2227  // read32 reads four bytes from the read-only global sym at offset off.
  2228  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2229  	lsym := sym.(*obj.LSym)
  2230  	var src []byte
  2231  	if 0 <= off && off < int64(len(lsym.P)) {
  2232  		src = lsym.P[off:]
  2233  	}
  2234  	buf := make([]byte, 4)
  2235  	copy(buf, src)
  2236  	return byteorder.Uint32(buf)
  2237  }
  2238  
  2239  // read64 reads eight bytes from the read-only global sym at offset off.
  2240  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2241  	lsym := sym.(*obj.LSym)
  2242  	var src []byte
  2243  	if 0 <= off && off < int64(len(lsym.P)) {
  2244  		src = lsym.P[off:]
  2245  	}
  2246  	buf := make([]byte, 8)
  2247  	copy(buf, src)
  2248  	return byteorder.Uint64(buf)
  2249  }
  2250  
  2251  // sequentialAddresses reports true if it can prove that x + n == y
  2252  func sequentialAddresses(x, y *Value, n int64) bool {
  2253  	if x == y && n == 0 {
  2254  		return true
  2255  	}
  2256  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2257  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2258  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2259  		return true
  2260  	}
  2261  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2262  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2263  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2264  		return true
  2265  	}
  2266  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2267  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2268  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2269  		return true
  2270  	}
  2271  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2272  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2273  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2274  		return true
  2275  	}
  2276  	return false
  2277  }
  2278  
  2279  // flagConstant represents the result of a compile-time comparison.
  2280  // The sense of these flags does not necessarily represent the hardware's notion
  2281  // of a flags register - these are just a compile-time construct.
  2282  // We happen to match the semantics to those of arm/arm64.
  2283  // Note that these semantics differ from x86: the carry flag has the opposite
  2284  // sense on a subtraction!
  2285  //
  2286  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2287  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2288  //	 (because it does x + ^y + C).
  2289  //
  2290  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2291  type flagConstant uint8
  2292  
  2293  // N reports whether the result of an operation is negative (high bit set).
  2294  func (fc flagConstant) N() bool {
  2295  	return fc&1 != 0
  2296  }
  2297  
  2298  // Z reports whether the result of an operation is 0.
  2299  func (fc flagConstant) Z() bool {
  2300  	return fc&2 != 0
  2301  }
  2302  
  2303  // C reports whether an unsigned add overflowed (carry), or an
  2304  // unsigned subtract did not underflow (borrow).
  2305  func (fc flagConstant) C() bool {
  2306  	return fc&4 != 0
  2307  }
  2308  
  2309  // V reports whether a signed operation overflowed or underflowed.
  2310  func (fc flagConstant) V() bool {
  2311  	return fc&8 != 0
  2312  }
  2313  
  2314  func (fc flagConstant) eq() bool {
  2315  	return fc.Z()
  2316  }
  2317  func (fc flagConstant) ne() bool {
  2318  	return !fc.Z()
  2319  }
  2320  func (fc flagConstant) lt() bool {
  2321  	return fc.N() != fc.V()
  2322  }
  2323  func (fc flagConstant) le() bool {
  2324  	return fc.Z() || fc.lt()
  2325  }
  2326  func (fc flagConstant) gt() bool {
  2327  	return !fc.Z() && fc.ge()
  2328  }
  2329  func (fc flagConstant) ge() bool {
  2330  	return fc.N() == fc.V()
  2331  }
  2332  func (fc flagConstant) ult() bool {
  2333  	return !fc.C()
  2334  }
  2335  func (fc flagConstant) ule() bool {
  2336  	return fc.Z() || fc.ult()
  2337  }
  2338  func (fc flagConstant) ugt() bool {
  2339  	return !fc.Z() && fc.uge()
  2340  }
  2341  func (fc flagConstant) uge() bool {
  2342  	return fc.C()
  2343  }
  2344  
  2345  func (fc flagConstant) ltNoov() bool {
  2346  	return fc.lt() && !fc.V()
  2347  }
  2348  func (fc flagConstant) leNoov() bool {
  2349  	return fc.le() && !fc.V()
  2350  }
  2351  func (fc flagConstant) gtNoov() bool {
  2352  	return fc.gt() && !fc.V()
  2353  }
  2354  func (fc flagConstant) geNoov() bool {
  2355  	return fc.ge() && !fc.V()
  2356  }
  2357  
  2358  func (fc flagConstant) String() string {
  2359  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2360  }
  2361  
  2362  type flagConstantBuilder struct {
  2363  	N bool
  2364  	Z bool
  2365  	C bool
  2366  	V bool
  2367  }
  2368  
  2369  func (fcs flagConstantBuilder) encode() flagConstant {
  2370  	var fc flagConstant
  2371  	if fcs.N {
  2372  		fc |= 1
  2373  	}
  2374  	if fcs.Z {
  2375  		fc |= 2
  2376  	}
  2377  	if fcs.C {
  2378  		fc |= 4
  2379  	}
  2380  	if fcs.V {
  2381  		fc |= 8
  2382  	}
  2383  	return fc
  2384  }
  2385  
  2386  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2387  //  - the results of the C flag are different
  2388  //  - the results of the V flag when y==minint are different
  2389  
  2390  // addFlags64 returns the flags that would be set from computing x+y.
  2391  func addFlags64(x, y int64) flagConstant {
  2392  	var fcb flagConstantBuilder
  2393  	fcb.Z = x+y == 0
  2394  	fcb.N = x+y < 0
  2395  	fcb.C = uint64(x+y) < uint64(x)
  2396  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2397  	return fcb.encode()
  2398  }
  2399  
  2400  // subFlags64 returns the flags that would be set from computing x-y.
  2401  func subFlags64(x, y int64) flagConstant {
  2402  	var fcb flagConstantBuilder
  2403  	fcb.Z = x-y == 0
  2404  	fcb.N = x-y < 0
  2405  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2406  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2407  	return fcb.encode()
  2408  }
  2409  
  2410  // addFlags32 returns the flags that would be set from computing x+y.
  2411  func addFlags32(x, y int32) flagConstant {
  2412  	var fcb flagConstantBuilder
  2413  	fcb.Z = x+y == 0
  2414  	fcb.N = x+y < 0
  2415  	fcb.C = uint32(x+y) < uint32(x)
  2416  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2417  	return fcb.encode()
  2418  }
  2419  
  2420  // subFlags32 returns the flags that would be set from computing x-y.
  2421  func subFlags32(x, y int32) flagConstant {
  2422  	var fcb flagConstantBuilder
  2423  	fcb.Z = x-y == 0
  2424  	fcb.N = x-y < 0
  2425  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2426  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2427  	return fcb.encode()
  2428  }
  2429  
  2430  // logicFlags64 returns flags set to the sign/zeroness of x.
  2431  // C and V are set to false.
  2432  func logicFlags64(x int64) flagConstant {
  2433  	var fcb flagConstantBuilder
  2434  	fcb.Z = x == 0
  2435  	fcb.N = x < 0
  2436  	return fcb.encode()
  2437  }
  2438  
  2439  // logicFlags32 returns flags set to the sign/zeroness of x.
  2440  // C and V are set to false.
  2441  func logicFlags32(x int32) flagConstant {
  2442  	var fcb flagConstantBuilder
  2443  	fcb.Z = x == 0
  2444  	fcb.N = x < 0
  2445  	return fcb.encode()
  2446  }
  2447  
  2448  func makeJumpTableSym(b *Block) *obj.LSym {
  2449  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2450  	// The jump table symbol is accessed only from the function symbol.
  2451  	s.Set(obj.AttrStatic, true)
  2452  	return s
  2453  }
  2454  
  2455  // canRotate reports whether the architecture supports
  2456  // rotates of integer registers with the given number of bits.
  2457  func canRotate(c *Config, bits int64) bool {
  2458  	if bits > c.PtrSize*8 {
  2459  		// Don't rewrite to rotates bigger than the machine word.
  2460  		return false
  2461  	}
  2462  	switch c.arch {
  2463  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2464  		return true
  2465  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2466  		return bits >= 32
  2467  	default:
  2468  		return false
  2469  	}
  2470  }
  2471  
  2472  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2473  func isARM64bitcon(x uint64) bool {
  2474  	if x == 1<<64-1 || x == 0 {
  2475  		return false
  2476  	}
  2477  	// determine the period and sign-extend a unit to 64 bits
  2478  	switch {
  2479  	case x != x>>32|x<<32:
  2480  		// period is 64
  2481  		// nothing to do
  2482  	case x != x>>16|x<<48:
  2483  		// period is 32
  2484  		x = uint64(int64(int32(x)))
  2485  	case x != x>>8|x<<56:
  2486  		// period is 16
  2487  		x = uint64(int64(int16(x)))
  2488  	case x != x>>4|x<<60:
  2489  		// period is 8
  2490  		x = uint64(int64(int8(x)))
  2491  	default:
  2492  		// period is 4 or 2, always true
  2493  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2494  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2495  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2496  		// 0101, 1010             -- 01   rotate, repeat
  2497  		return true
  2498  	}
  2499  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2500  }
  2501  
  2502  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2503  func sequenceOfOnes(x uint64) bool {
  2504  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2505  	y += x
  2506  	return (y-1)&y == 0
  2507  }
  2508  
  2509  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2510  func isARM64addcon(v int64) bool {
  2511  	/* uimm12 or uimm24? */
  2512  	if v < 0 {
  2513  		return false
  2514  	}
  2515  	if (v & 0xFFF) == 0 {
  2516  		v >>= 12
  2517  	}
  2518  	return v <= 0xFFF
  2519  }
  2520  
  2521  // setPos sets the position of v to pos, then returns true.
  2522  // Useful for setting the result of a rewrite's position to
  2523  // something other than the default.
  2524  func setPos(v *Value, pos src.XPos) bool {
  2525  	v.Pos = pos
  2526  	return true
  2527  }
  2528  
  2529  // isNonNegative reports whether v is known to be greater or equal to zero.
  2530  // Note that this is pretty simplistic. The prove pass generates more detailed
  2531  // nonnegative information about values.
  2532  func isNonNegative(v *Value) bool {
  2533  	if !v.Type.IsInteger() {
  2534  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2535  	}
  2536  	// TODO: return true if !v.Type.IsSigned()
  2537  	// SSA isn't type-safe enough to do that now (issue 37753).
  2538  	// The checks below depend only on the pattern of bits.
  2539  
  2540  	switch v.Op {
  2541  	case OpConst64:
  2542  		return v.AuxInt >= 0
  2543  
  2544  	case OpConst32:
  2545  		return int32(v.AuxInt) >= 0
  2546  
  2547  	case OpConst16:
  2548  		return int16(v.AuxInt) >= 0
  2549  
  2550  	case OpConst8:
  2551  		return int8(v.AuxInt) >= 0
  2552  
  2553  	case OpStringLen, OpSliceLen, OpSliceCap,
  2554  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2555  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2556  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2557  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2558  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2559  		return true
  2560  
  2561  	case OpRsh64Ux64, OpRsh32Ux64:
  2562  		by := v.Args[1]
  2563  		return by.Op == OpConst64 && by.AuxInt > 0
  2564  
  2565  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2566  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2567  		return isNonNegative(v.Args[0])
  2568  
  2569  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2570  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2571  
  2572  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2573  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2574  		OpOr64, OpOr32, OpOr16, OpOr8,
  2575  		OpXor64, OpXor32, OpXor16, OpXor8:
  2576  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2577  
  2578  		// We could handle OpPhi here, but the improvements from doing
  2579  		// so are very minor, and it is neither simple nor cheap.
  2580  	}
  2581  	return false
  2582  }
  2583  
  2584  func rewriteStructLoad(v *Value) *Value {
  2585  	b := v.Block
  2586  	ptr := v.Args[0]
  2587  	mem := v.Args[1]
  2588  
  2589  	t := v.Type
  2590  	args := make([]*Value, t.NumFields())
  2591  	for i := range args {
  2592  		ft := t.FieldType(i)
  2593  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2594  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2595  	}
  2596  
  2597  	v.reset(OpStructMake)
  2598  	v.AddArgs(args...)
  2599  	return v
  2600  }
  2601  
  2602  func rewriteStructStore(v *Value) *Value {
  2603  	b := v.Block
  2604  	dst := v.Args[0]
  2605  	x := v.Args[1]
  2606  	if x.Op != OpStructMake {
  2607  		base.Fatalf("invalid struct store: %v", x)
  2608  	}
  2609  	mem := v.Args[2]
  2610  
  2611  	t := x.Type
  2612  	for i, arg := range x.Args {
  2613  		ft := t.FieldType(i)
  2614  
  2615  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2616  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2617  	}
  2618  
  2619  	return mem
  2620  }
  2621  
  2622  // isDirectType reports whether v represents a type
  2623  // (a *runtime._type) whose value is stored directly in an
  2624  // interface (i.e., is pointer or pointer-like).
  2625  func isDirectType(v *Value) bool {
  2626  	return isDirectType1(v)
  2627  }
  2628  
  2629  // v is a type
  2630  func isDirectType1(v *Value) bool {
  2631  	switch v.Op {
  2632  	case OpITab:
  2633  		return isDirectType2(v.Args[0])
  2634  	case OpAddr:
  2635  		lsym := v.Aux.(*obj.LSym)
  2636  		if ti := lsym.TypeInfo(); ti != nil {
  2637  			return types.IsDirectIface(ti.Type.(*types.Type))
  2638  		}
  2639  	}
  2640  	return false
  2641  }
  2642  
  2643  // v is an empty interface
  2644  func isDirectType2(v *Value) bool {
  2645  	switch v.Op {
  2646  	case OpIMake:
  2647  		return isDirectType1(v.Args[0])
  2648  	}
  2649  	return false
  2650  }
  2651  
  2652  // isDirectIface reports whether v represents an itab
  2653  // (a *runtime._itab) for a type whose value is stored directly
  2654  // in an interface (i.e., is pointer or pointer-like).
  2655  func isDirectIface(v *Value) bool {
  2656  	return isDirectIface1(v, 9)
  2657  }
  2658  
  2659  // v is an itab
  2660  func isDirectIface1(v *Value, depth int) bool {
  2661  	if depth == 0 {
  2662  		return false
  2663  	}
  2664  	switch v.Op {
  2665  	case OpITab:
  2666  		return isDirectIface2(v.Args[0], depth-1)
  2667  	case OpAddr:
  2668  		lsym := v.Aux.(*obj.LSym)
  2669  		if ii := lsym.ItabInfo(); ii != nil {
  2670  			return types.IsDirectIface(ii.Type.(*types.Type))
  2671  		}
  2672  	case OpConstNil:
  2673  		// We can treat this as direct, because if the itab is
  2674  		// nil, the data field must be nil also.
  2675  		return true
  2676  	}
  2677  	return false
  2678  }
  2679  
  2680  // v is an interface
  2681  func isDirectIface2(v *Value, depth int) bool {
  2682  	if depth == 0 {
  2683  		return false
  2684  	}
  2685  	switch v.Op {
  2686  	case OpIMake:
  2687  		return isDirectIface1(v.Args[0], depth-1)
  2688  	case OpPhi:
  2689  		for _, a := range v.Args {
  2690  			if !isDirectIface2(a, depth-1) {
  2691  				return false
  2692  			}
  2693  		}
  2694  		return true
  2695  	}
  2696  	return false
  2697  }
  2698  
  2699  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2700  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2701  	r.sum, r.carry = int64(s), int64(c)
  2702  	return
  2703  }
  2704  
  2705  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2706  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2707  	r.hi, r.lo = int64(hi), int64(lo)
  2708  	return
  2709  }
  2710  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2711  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2712  	r.hi, r.lo = int32(hi), int32(lo)
  2713  	return
  2714  }
  2715  
  2716  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2717  func flagify(v *Value) bool {
  2718  	var flagVersion Op
  2719  	switch v.Op {
  2720  	case OpAMD64ADDQconst:
  2721  		flagVersion = OpAMD64ADDQconstflags
  2722  	case OpAMD64ADDLconst:
  2723  		flagVersion = OpAMD64ADDLconstflags
  2724  	default:
  2725  		base.Fatalf("can't flagify op %s", v.Op)
  2726  	}
  2727  	inner := v.copyInto(v.Block)
  2728  	inner.Op = flagVersion
  2729  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2730  	v.reset(OpSelect0)
  2731  	v.AddArg(inner)
  2732  	return true
  2733  }
  2734  
  2735  // PanicBoundsC contains a constant for a bounds failure.
  2736  type PanicBoundsC struct {
  2737  	C int64
  2738  }
  2739  
  2740  // PanicBoundsCC contains 2 constants for a bounds failure.
  2741  type PanicBoundsCC struct {
  2742  	Cx int64
  2743  	Cy int64
  2744  }
  2745  
  2746  func (p PanicBoundsC) CanBeAnSSAAux() {
  2747  }
  2748  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2749  }
  2750  
  2751  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2752  	return i.(PanicBoundsC)
  2753  }
  2754  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2755  	return i.(PanicBoundsCC)
  2756  }
  2757  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2758  	return p
  2759  }
  2760  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2761  	return p
  2762  }
  2763  
  2764  func isDictArgSym(sym Sym) bool {
  2765  	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
  2766  }
  2767  

View as plain text