Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/reflectdata"
    12  	"cmd/compile/internal/rttype"
    13  	"cmd/compile/internal/typecheck"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/s390x"
    17  	"cmd/internal/objabi"
    18  	"cmd/internal/src"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"internal/buildcfg"
    22  	"io"
    23  	"math"
    24  	"math/bits"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  )
    29  
    30  type deadValueChoice bool
    31  
    32  const (
    33  	leaveDeadValues  deadValueChoice = false
    34  	removeDeadValues                 = true
    35  
    36  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    37  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    38  )
    39  
    40  // deadcode indicates whether rewrite should try to remove any values that become dead.
    41  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    42  	// repeat rewrites until we find no more rewrites
    43  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    44  	pendingLines.clear()
    45  	debug := f.pass.debug
    46  	if debug > 1 {
    47  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    48  	}
    49  	// if the number of rewrite iterations reaches itersLimit we will
    50  	// at that point turn on cycle detection. Instead of a fixed limit,
    51  	// size the limit according to func size to allow for cases such
    52  	// as the one in issue #66773.
    53  	itersLimit := f.NumBlocks()
    54  	if itersLimit < 20 {
    55  		itersLimit = 20
    56  	}
    57  	var iters int
    58  	var states map[string]bool
    59  	for {
    60  		if debug > 1 {
    61  			fmt.Printf("%s: iter %d\n", f.pass.name, iters)
    62  		}
    63  		change := false
    64  		deadChange := false
    65  		for _, b := range f.Blocks {
    66  			var b0 *Block
    67  			if debug > 1 {
    68  				fmt.Printf("%s: start block\n", f.pass.name)
    69  				b0 = new(Block)
    70  				*b0 = *b
    71  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    72  			}
    73  			for i, c := range b.ControlValues() {
    74  				for c.Op == OpCopy {
    75  					c = c.Args[0]
    76  					b.ReplaceControl(i, c)
    77  				}
    78  			}
    79  			if rb(b) {
    80  				change = true
    81  				if debug > 1 {
    82  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    83  				}
    84  			}
    85  			for j, v := range b.Values {
    86  				if debug > 1 {
    87  					fmt.Printf("%s: consider %v\n", f.pass.name, v.LongString())
    88  				}
    89  				var v0 *Value
    90  				if debug > 1 {
    91  					v0 = new(Value)
    92  					*v0 = *v
    93  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    94  				}
    95  				if v.Uses == 0 && v.removeable() {
    96  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    97  						// Reset any values that are now unused, so that we decrement
    98  						// the use count of all of its arguments.
    99  						// Not quite a deadcode pass, because it does not handle cycles.
   100  						// But it should help Uses==1 rules to fire.
   101  						v.reset(OpInvalid)
   102  						deadChange = true
   103  					}
   104  					// No point rewriting values which aren't used.
   105  					continue
   106  				}
   107  
   108  				vchange := phielimValue(v)
   109  				if vchange && debug > 1 {
   110  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   111  				}
   112  
   113  				// Eliminate copy inputs.
   114  				// If any copy input becomes unused, mark it
   115  				// as invalid and discard its argument. Repeat
   116  				// recursively on the discarded argument.
   117  				// This phase helps remove phantom "dead copy" uses
   118  				// of a value so that a x.Uses==1 rule condition
   119  				// fires reliably.
   120  				for i, a := range v.Args {
   121  					if a.Op != OpCopy {
   122  						continue
   123  					}
   124  					aa := copySource(a)
   125  					v.SetArg(i, aa)
   126  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   127  					// to hold it.  The first candidate is the value that will replace a (aa),
   128  					// if it shares the same block and line and is eligible.
   129  					// The second option is v, which has a as an input.  Because aa is earlier in
   130  					// the data flow, it is the better choice.
   131  					if a.Pos.IsStmt() == src.PosIsStmt {
   132  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   133  							aa.Pos = aa.Pos.WithIsStmt()
   134  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   135  							v.Pos = v.Pos.WithIsStmt()
   136  						} else {
   137  							// Record the lost line and look for a new home after all rewrites are complete.
   138  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   139  							// line to appear in more than one block, but only one block is stored, so if both end
   140  							// up here, then one will be lost.
   141  							pendingLines.set(a.Pos, int32(a.Block.ID))
   142  						}
   143  						a.Pos = a.Pos.WithNotStmt()
   144  					}
   145  					vchange = true
   146  					for a.Uses == 0 {
   147  						b := a.Args[0]
   148  						a.reset(OpInvalid)
   149  						a = b
   150  					}
   151  				}
   152  				if vchange && debug > 1 {
   153  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   154  				}
   155  
   156  				// apply rewrite function
   157  				if rv(v) {
   158  					vchange = true
   159  					// If value changed to a poor choice for a statement boundary, move the boundary
   160  					if v.Pos.IsStmt() == src.PosIsStmt {
   161  						if k := nextGoodStatementIndex(v, j, b); k != j {
   162  							v.Pos = v.Pos.WithNotStmt()
   163  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   164  						}
   165  					}
   166  				}
   167  
   168  				change = change || vchange
   169  				if vchange && debug > 1 {
   170  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   171  				}
   172  			}
   173  		}
   174  		if !change && !deadChange {
   175  			break
   176  		}
   177  		iters++
   178  		if (iters > itersLimit || debug >= 2) && change {
   179  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   180  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   181  			// and the maximum value encountered during make.bash is 12.
   182  			// Start checking for cycles. (This is too expensive to do routinely.)
   183  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   184  			if states == nil {
   185  				states = make(map[string]bool)
   186  			}
   187  			h := f.rewriteHash()
   188  			if _, ok := states[h]; ok {
   189  				// We've found a cycle.
   190  				// To diagnose it, set debug to 2 and start again,
   191  				// so that we'll print all rules applied until we complete another cycle.
   192  				// If debug is already >= 2, we've already done that, so it's time to crash.
   193  				if debug < 2 {
   194  					debug = 2
   195  					states = make(map[string]bool)
   196  				} else {
   197  					f.Fatalf("rewrite cycle detected")
   198  				}
   199  			}
   200  			states[h] = true
   201  		}
   202  	}
   203  	// remove clobbered values
   204  	for _, b := range f.Blocks {
   205  		j := 0
   206  		for i, v := range b.Values {
   207  			vl := v.Pos
   208  			if v.Op == OpInvalid {
   209  				if v.Pos.IsStmt() == src.PosIsStmt {
   210  					pendingLines.set(vl, int32(b.ID))
   211  				}
   212  				f.freeValue(v)
   213  				continue
   214  			}
   215  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   216  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   217  					pendingLines.remove(vl)
   218  					v.Pos = v.Pos.WithIsStmt()
   219  				}
   220  			}
   221  			if i != j {
   222  				b.Values[j] = v
   223  			}
   224  			j++
   225  		}
   226  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   227  			b.Pos = b.Pos.WithIsStmt()
   228  			pendingLines.remove(b.Pos)
   229  		}
   230  		b.truncateValues(j)
   231  	}
   232  }
   233  
   234  // Common functions called from rewriting rules
   235  
   236  func is64BitFloat(t *types.Type) bool {
   237  	return t.Size() == 8 && t.IsFloat()
   238  }
   239  
   240  func is32BitFloat(t *types.Type) bool {
   241  	return t.Size() == 4 && t.IsFloat()
   242  }
   243  
   244  func is64BitInt(t *types.Type) bool {
   245  	return t.Size() == 8 && t.IsInteger()
   246  }
   247  
   248  func is32BitInt(t *types.Type) bool {
   249  	return t.Size() == 4 && t.IsInteger()
   250  }
   251  
   252  func is16BitInt(t *types.Type) bool {
   253  	return t.Size() == 2 && t.IsInteger()
   254  }
   255  
   256  func is8BitInt(t *types.Type) bool {
   257  	return t.Size() == 1 && t.IsInteger()
   258  }
   259  
   260  func isPtr(t *types.Type) bool {
   261  	return t.IsPtrShaped()
   262  }
   263  
   264  func copyCompatibleType(t1, t2 *types.Type) bool {
   265  	if t1.Size() != t2.Size() {
   266  		return false
   267  	}
   268  	if t1.IsInteger() {
   269  		return t2.IsInteger()
   270  	}
   271  	if isPtr(t1) {
   272  		return isPtr(t2)
   273  	}
   274  	return t1.Compare(t2) == types.CMPeq
   275  }
   276  
   277  // mergeSym merges two symbolic offsets. There is no real merging of
   278  // offsets, we just pick the non-nil one.
   279  func mergeSym(x, y Sym) Sym {
   280  	if x == nil {
   281  		return y
   282  	}
   283  	if y == nil {
   284  		return x
   285  	}
   286  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   287  }
   288  
   289  func canMergeSym(x, y Sym) bool {
   290  	return x == nil || y == nil
   291  }
   292  
   293  // canMergeLoadClobber reports whether the load can be merged into target without
   294  // invalidating the schedule.
   295  // It also checks that the other non-load argument x is something we
   296  // are ok with clobbering.
   297  func canMergeLoadClobber(target, load, x *Value) bool {
   298  	// The register containing x is going to get clobbered.
   299  	// Don't merge if we still need the value of x.
   300  	// We don't have liveness information here, but we can
   301  	// approximate x dying with:
   302  	//  1) target is x's only use.
   303  	//  2) target is not in a deeper loop than x.
   304  	switch {
   305  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   306  		// This is a simple detector to determine that x is probably
   307  		// not live after target. (It does not need to be perfect,
   308  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   309  		// We have:
   310  		//   x = Phi(?, target)
   311  		//   target = Op(load, x)
   312  		// Because target has only one use as a Phi argument, we can schedule it
   313  		// very late. Hopefully, later than the other use of x. (The other use died
   314  		// between x and target, or exists on another branch entirely).
   315  	case x.Uses > 1:
   316  		return false
   317  	}
   318  	loopnest := x.Block.Func.loopnest()
   319  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   320  		return false
   321  	}
   322  	return canMergeLoad(target, load)
   323  }
   324  
   325  // canMergeLoad reports whether the load can be merged into target without
   326  // invalidating the schedule.
   327  func canMergeLoad(target, load *Value) bool {
   328  	if target.Block.ID != load.Block.ID {
   329  		// If the load is in a different block do not merge it.
   330  		return false
   331  	}
   332  
   333  	// We can't merge the load into the target if the load
   334  	// has more than one use.
   335  	if load.Uses != 1 {
   336  		return false
   337  	}
   338  
   339  	mem := load.MemoryArg()
   340  
   341  	// We need the load's memory arg to still be alive at target. That
   342  	// can't be the case if one of target's args depends on a memory
   343  	// state that is a successor of load's memory arg.
   344  	//
   345  	// For example, it would be invalid to merge load into target in
   346  	// the following situation because newmem has killed oldmem
   347  	// before target is reached:
   348  	//     load = read ... oldmem
   349  	//   newmem = write ... oldmem
   350  	//     arg0 = read ... newmem
   351  	//   target = add arg0 load
   352  	//
   353  	// If the argument comes from a different block then we can exclude
   354  	// it immediately because it must dominate load (which is in the
   355  	// same block as target).
   356  	var args []*Value
   357  	for _, a := range target.Args {
   358  		if a != load && a.Block.ID == target.Block.ID {
   359  			args = append(args, a)
   360  		}
   361  	}
   362  
   363  	// memPreds contains memory states known to be predecessors of load's
   364  	// memory state. It is lazily initialized.
   365  	var memPreds map[*Value]bool
   366  	for i := 0; len(args) > 0; i++ {
   367  		const limit = 100
   368  		if i >= limit {
   369  			// Give up if we have done a lot of iterations.
   370  			return false
   371  		}
   372  		v := args[len(args)-1]
   373  		args = args[:len(args)-1]
   374  		if target.Block.ID != v.Block.ID {
   375  			// Since target and load are in the same block
   376  			// we can stop searching when we leave the block.
   377  			continue
   378  		}
   379  		if v.Op == OpPhi {
   380  			// A Phi implies we have reached the top of the block.
   381  			// The memory phi, if it exists, is always
   382  			// the first logical store in the block.
   383  			continue
   384  		}
   385  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   386  			// We could handle this situation however it is likely
   387  			// to be very rare.
   388  			return false
   389  		}
   390  		if v.Op.SymEffect()&SymAddr != 0 {
   391  			// This case prevents an operation that calculates the
   392  			// address of a local variable from being forced to schedule
   393  			// before its corresponding VarDef.
   394  			// See issue 28445.
   395  			//   v1 = LOAD ...
   396  			//   v2 = VARDEF
   397  			//   v3 = LEAQ
   398  			//   v4 = CMPQ v1 v3
   399  			// We don't want to combine the CMPQ with the load, because
   400  			// that would force the CMPQ to schedule before the VARDEF, which
   401  			// in turn requires the LEAQ to schedule before the VARDEF.
   402  			return false
   403  		}
   404  		if v.Type.IsMemory() {
   405  			if memPreds == nil {
   406  				// Initialise a map containing memory states
   407  				// known to be predecessors of load's memory
   408  				// state.
   409  				memPreds = make(map[*Value]bool)
   410  				m := mem
   411  				const limit = 50
   412  				for i := 0; i < limit; i++ {
   413  					if m.Op == OpPhi {
   414  						// The memory phi, if it exists, is always
   415  						// the first logical store in the block.
   416  						break
   417  					}
   418  					if m.Block.ID != target.Block.ID {
   419  						break
   420  					}
   421  					if !m.Type.IsMemory() {
   422  						break
   423  					}
   424  					memPreds[m] = true
   425  					if len(m.Args) == 0 {
   426  						break
   427  					}
   428  					m = m.MemoryArg()
   429  				}
   430  			}
   431  
   432  			// We can merge if v is a predecessor of mem.
   433  			//
   434  			// For example, we can merge load into target in the
   435  			// following scenario:
   436  			//      x = read ... v
   437  			//    mem = write ... v
   438  			//   load = read ... mem
   439  			// target = add x load
   440  			if memPreds[v] {
   441  				continue
   442  			}
   443  			return false
   444  		}
   445  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   446  			// If v takes mem as an input then we know mem
   447  			// is valid at this point.
   448  			continue
   449  		}
   450  		for _, a := range v.Args {
   451  			if target.Block.ID == a.Block.ID {
   452  				args = append(args, a)
   453  			}
   454  		}
   455  	}
   456  
   457  	return true
   458  }
   459  
   460  // isSameCall reports whether aux is the same as the given named symbol.
   461  func isSameCall(aux Aux, name string) bool {
   462  	fn := aux.(*AuxCall).Fn
   463  	return fn != nil && fn.String() == name
   464  }
   465  
   466  func isMalloc(aux Aux) bool {
   467  	return isNewObject(aux) || isSpecializedMalloc(aux)
   468  }
   469  
   470  func isNewObject(aux Aux) bool {
   471  	fn := aux.(*AuxCall).Fn
   472  	return fn != nil && fn.String() == "runtime.newobject"
   473  }
   474  
   475  func isSpecializedMalloc(aux Aux) bool {
   476  	fn := aux.(*AuxCall).Fn
   477  	if fn == nil {
   478  		return false
   479  	}
   480  	name := fn.String()
   481  	return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
   482  		strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
   483  		strings.HasPrefix(name, "runtime.mallocTiny")
   484  }
   485  
   486  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   487  func canLoadUnaligned(c *Config) bool {
   488  	return c.ctxt.Arch.Alignment == 1
   489  }
   490  
   491  // nlzX returns the number of leading zeros.
   492  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   493  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   494  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   495  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   496  
   497  // ntzX returns the number of trailing zeros.
   498  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   499  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   500  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   501  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   502  
   503  // oneBit reports whether x contains exactly one set bit.
   504  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   505  	return x&(x-1) == 0 && x != 0
   506  }
   507  
   508  // nto returns the number of trailing ones.
   509  func nto(x int64) int64 {
   510  	return int64(ntz64(^x))
   511  }
   512  
   513  // logX returns logarithm of n base 2.
   514  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   515  func log8(n int8) int64   { return log8u(uint8(n)) }
   516  func log16(n int16) int64 { return log16u(uint16(n)) }
   517  func log32(n int32) int64 { return log32u(uint32(n)) }
   518  func log64(n int64) int64 { return log64u(uint64(n)) }
   519  
   520  // logXu returns the logarithm of n base 2.
   521  // n must be a power of 2 (isUnsignedPowerOfTwo returns true)
   522  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   523  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   524  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   525  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   526  
   527  // isPowerOfTwoX functions report whether n is a power of 2.
   528  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   529  	return n > 0 && n&(n-1) == 0
   530  }
   531  
   532  // isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
   533  func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
   534  	return n != 0 && n&(n-1) == 0
   535  }
   536  
   537  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   538  func is32Bit(n int64) bool {
   539  	return n == int64(int32(n))
   540  }
   541  
   542  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   543  func is16Bit(n int64) bool {
   544  	return n == int64(int16(n))
   545  }
   546  
   547  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   548  func is8Bit(n int64) bool {
   549  	return n == int64(int8(n))
   550  }
   551  
   552  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   553  func isU8Bit(n int64) bool {
   554  	return n == int64(uint8(n))
   555  }
   556  
   557  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   558  func is12Bit(n int64) bool {
   559  	return -(1<<11) <= n && n < (1<<11)
   560  }
   561  
   562  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   563  func isU12Bit(n int64) bool {
   564  	return 0 <= n && n < (1<<12)
   565  }
   566  
   567  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   568  func isU16Bit(n int64) bool {
   569  	return n == int64(uint16(n))
   570  }
   571  
   572  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   573  func isU32Bit(n int64) bool {
   574  	return n == int64(uint32(n))
   575  }
   576  
   577  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   578  func is20Bit(n int64) bool {
   579  	return -(1<<19) <= n && n < (1<<19)
   580  }
   581  
   582  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   583  func b2i(b bool) int64 {
   584  	if b {
   585  		return 1
   586  	}
   587  	return 0
   588  }
   589  
   590  // b2i32 translates a boolean value to 0 or 1.
   591  func b2i32(b bool) int32 {
   592  	if b {
   593  		return 1
   594  	}
   595  	return 0
   596  }
   597  
   598  func canMulStrengthReduce(config *Config, x int64) bool {
   599  	_, ok := config.mulRecipes[x]
   600  	return ok
   601  }
   602  func canMulStrengthReduce32(config *Config, x int32) bool {
   603  	_, ok := config.mulRecipes[int64(x)]
   604  	return ok
   605  }
   606  
   607  // mulStrengthReduce returns v*x evaluated at the location
   608  // (block and source position) of m.
   609  // canMulStrengthReduce must have returned true.
   610  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   611  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   612  }
   613  
   614  // mulStrengthReduce32 returns v*x evaluated at the location
   615  // (block and source position) of m.
   616  // canMulStrengthReduce32 must have returned true.
   617  // The upper 32 bits of m might be set to junk.
   618  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   619  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   620  }
   621  
   622  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   623  // A shift is bounded if it is shifting by less than the width of the shifted value.
   624  func shiftIsBounded(v *Value) bool {
   625  	return v.AuxInt != 0
   626  }
   627  
   628  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   629  // generated code as much as possible.
   630  func canonLessThan(x, y *Value) bool {
   631  	if x.Op != y.Op {
   632  		return x.Op < y.Op
   633  	}
   634  	if !x.Pos.SameFileAndLine(y.Pos) {
   635  		return x.Pos.Before(y.Pos)
   636  	}
   637  	return x.ID < y.ID
   638  }
   639  
   640  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   641  // of the mantissa. It will panic if the truncation results in lost information.
   642  func truncate64Fto32F(f float64) float32 {
   643  	if !isExactFloat32(f) {
   644  		panic("truncate64Fto32F: truncation is not exact")
   645  	}
   646  	if !math.IsNaN(f) {
   647  		return float32(f)
   648  	}
   649  	// NaN bit patterns aren't necessarily preserved across conversion
   650  	// instructions so we need to do the conversion manually.
   651  	b := math.Float64bits(f)
   652  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   653  	//          | sign                  | exponent   | mantissa       |
   654  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   655  	return math.Float32frombits(r)
   656  }
   657  
   658  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   659  func DivisionNeedsFixUp(v *Value) bool {
   660  	return v.AuxInt == 0
   661  }
   662  
   663  // auxTo32F decodes a float32 from the AuxInt value provided.
   664  func auxTo32F(i int64) float32 {
   665  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   666  }
   667  
   668  func auxIntToBool(i int64) bool {
   669  	if i == 0 {
   670  		return false
   671  	}
   672  	return true
   673  }
   674  func auxIntToInt8(i int64) int8 {
   675  	return int8(i)
   676  }
   677  func auxIntToInt16(i int64) int16 {
   678  	return int16(i)
   679  }
   680  func auxIntToInt32(i int64) int32 {
   681  	return int32(i)
   682  }
   683  func auxIntToInt64(i int64) int64 {
   684  	return i
   685  }
   686  func auxIntToUint8(i int64) uint8 {
   687  	return uint8(i)
   688  }
   689  func auxIntToFloat32(i int64) float32 {
   690  	return float32(math.Float64frombits(uint64(i)))
   691  }
   692  func auxIntToFloat64(i int64) float64 {
   693  	return math.Float64frombits(uint64(i))
   694  }
   695  func auxIntToValAndOff(i int64) ValAndOff {
   696  	return ValAndOff(i)
   697  }
   698  func auxIntToArm64BitField(i int64) arm64BitField {
   699  	return arm64BitField(i)
   700  }
   701  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   702  	var params arm64ConditionalParams
   703  	params.cond = Op(i & 0xffff)
   704  	i >>= 16
   705  	params.nzcv = uint8(i & 0x0f)
   706  	i >>= 4
   707  	params.constValue = uint8(i & 0x1f)
   708  	i >>= 5
   709  	params.ind = i == 1
   710  	return params
   711  }
   712  func auxIntToFlagConstant(x int64) flagConstant {
   713  	return flagConstant(x)
   714  }
   715  
   716  func auxIntToOp(cc int64) Op {
   717  	return Op(cc)
   718  }
   719  
   720  func boolToAuxInt(b bool) int64 {
   721  	if b {
   722  		return 1
   723  	}
   724  	return 0
   725  }
   726  func int8ToAuxInt(i int8) int64 {
   727  	return int64(i)
   728  }
   729  func int16ToAuxInt(i int16) int64 {
   730  	return int64(i)
   731  }
   732  func int32ToAuxInt(i int32) int64 {
   733  	return int64(i)
   734  }
   735  func int64ToAuxInt(i int64) int64 {
   736  	return i
   737  }
   738  func uint8ToAuxInt(i uint8) int64 {
   739  	return int64(int8(i))
   740  }
   741  func float32ToAuxInt(f float32) int64 {
   742  	return int64(math.Float64bits(float64(f)))
   743  }
   744  func float64ToAuxInt(f float64) int64 {
   745  	return int64(math.Float64bits(f))
   746  }
   747  func valAndOffToAuxInt(v ValAndOff) int64 {
   748  	return int64(v)
   749  }
   750  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   751  	return int64(v)
   752  }
   753  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   754  	if v.cond&^0xffff != 0 {
   755  		panic("condition value exceeds 16 bits")
   756  	}
   757  
   758  	var i int64
   759  	if v.ind {
   760  		i = 1 << 25
   761  	}
   762  	i |= int64(v.constValue) << 20
   763  	i |= int64(v.nzcv) << 16
   764  	i |= int64(v.cond)
   765  	return i
   766  }
   767  
   768  func flagConstantToAuxInt(x flagConstant) int64 {
   769  	return int64(x)
   770  }
   771  
   772  func opToAuxInt(o Op) int64 {
   773  	return int64(o)
   774  }
   775  
   776  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   777  type Aux interface {
   778  	CanBeAnSSAAux()
   779  }
   780  
   781  // for now only used to mark moves that need to avoid clobbering flags
   782  type auxMark bool
   783  
   784  func (auxMark) CanBeAnSSAAux() {}
   785  
   786  var AuxMark auxMark
   787  
   788  // stringAux wraps string values for use in Aux.
   789  type stringAux string
   790  
   791  func (stringAux) CanBeAnSSAAux() {}
   792  
   793  func auxToString(i Aux) string {
   794  	return string(i.(stringAux))
   795  }
   796  func auxToSym(i Aux) Sym {
   797  	// TODO: kind of a hack - allows nil interface through
   798  	s, _ := i.(Sym)
   799  	return s
   800  }
   801  func auxToType(i Aux) *types.Type {
   802  	return i.(*types.Type)
   803  }
   804  func auxToCall(i Aux) *AuxCall {
   805  	return i.(*AuxCall)
   806  }
   807  func auxToS390xCCMask(i Aux) s390x.CCMask {
   808  	return i.(s390x.CCMask)
   809  }
   810  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   811  	return i.(s390x.RotateParams)
   812  }
   813  
   814  func StringToAux(s string) Aux {
   815  	return stringAux(s)
   816  }
   817  func symToAux(s Sym) Aux {
   818  	return s
   819  }
   820  func callToAux(s *AuxCall) Aux {
   821  	return s
   822  }
   823  func typeToAux(t *types.Type) Aux {
   824  	return t
   825  }
   826  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   827  	return c
   828  }
   829  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   830  	return r
   831  }
   832  
   833  // uaddOvf reports whether unsigned a+b would overflow.
   834  func uaddOvf(a, b int64) bool {
   835  	return uint64(a)+uint64(b) < uint64(a)
   836  }
   837  
   838  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   839  	v.Op = OpStaticLECall
   840  	auxcall := v.Aux.(*AuxCall)
   841  	auxcall.Fn = sym
   842  	// Remove first arg
   843  	v.Args[0].Uses--
   844  	copy(v.Args[0:], v.Args[1:])
   845  	v.Args[len(v.Args)-1] = nil // aid GC
   846  	v.Args = v.Args[:len(v.Args)-1]
   847  	if f := v.Block.Func; f.pass.debug > 0 {
   848  		f.Warnl(v.Pos, "de-virtualizing call")
   849  	}
   850  	return v
   851  }
   852  
   853  // isSamePtr reports whether p1 and p2 point to the same address.
   854  func isSamePtr(p1, p2 *Value) bool {
   855  	if p1 == p2 {
   856  		return true
   857  	}
   858  	if p1.Op != p2.Op {
   859  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   860  			p1 = p1.Args[0]
   861  		}
   862  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   863  			p2 = p2.Args[0]
   864  		}
   865  		if p1 == p2 {
   866  			return true
   867  		}
   868  		if p1.Op != p2.Op {
   869  			return false
   870  		}
   871  	}
   872  	switch p1.Op {
   873  	case OpOffPtr:
   874  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   875  	case OpAddr, OpLocalAddr:
   876  		return p1.Aux == p2.Aux
   877  	case OpAddPtr:
   878  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   879  	}
   880  	return false
   881  }
   882  
   883  func isStackPtr(v *Value) bool {
   884  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   885  		v = v.Args[0]
   886  	}
   887  	return v.Op == OpSP || v.Op == OpLocalAddr
   888  }
   889  
   890  // disjoint reports whether the memory region specified by [p1:p1+n1)
   891  // does not overlap with [p2:p2+n2).
   892  // A return value of false does not imply the regions overlap.
   893  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   894  	if n1 == 0 || n2 == 0 {
   895  		return true
   896  	}
   897  	if p1 == p2 {
   898  		return false
   899  	}
   900  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   901  		base, offset = ptr, 0
   902  		for base.Op == OpOffPtr {
   903  			offset += base.AuxInt
   904  			base = base.Args[0]
   905  		}
   906  		if opcodeTable[base.Op].nilCheck {
   907  			base = base.Args[0]
   908  		}
   909  		return base, offset
   910  	}
   911  
   912  	// Run types-based analysis
   913  	if disjointTypes(p1.Type, p2.Type) {
   914  		return true
   915  	}
   916  
   917  	p1, off1 := baseAndOffset(p1)
   918  	p2, off2 := baseAndOffset(p2)
   919  	if isSamePtr(p1, p2) {
   920  		return !overlap(off1, n1, off2, n2)
   921  	}
   922  	// p1 and p2 are not the same, so if they are both OpAddrs then
   923  	// they point to different variables.
   924  	// If one pointer is on the stack and the other is an argument
   925  	// then they can't overlap.
   926  	switch p1.Op {
   927  	case OpAddr, OpLocalAddr:
   928  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   929  			return true
   930  		}
   931  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   932  	case OpArg, OpArgIntReg:
   933  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   934  			return true
   935  		}
   936  	case OpSP:
   937  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   938  	}
   939  	return false
   940  }
   941  
   942  // disjointTypes reports whether a memory region pointed to by a pointer of type
   943  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   944  // based on type aliasing rules.
   945  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   946  	// Unsafe pointer can alias with anything.
   947  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   948  		return false
   949  	}
   950  
   951  	if !t1.IsPtr() || !t2.IsPtr() {
   952  		panic("disjointTypes: one of arguments is not a pointer")
   953  	}
   954  
   955  	t1 = t1.Elem()
   956  	t2 = t2.Elem()
   957  
   958  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   959  	// type.HasPointers check doesn't work for them correctly.
   960  	if t1.NotInHeap() || t2.NotInHeap() {
   961  		return false
   962  	}
   963  
   964  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   965  
   966  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   967  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   968  		(isPtrShaped(t2) && !t1.HasPointers()) {
   969  		return true
   970  	}
   971  
   972  	return false
   973  }
   974  
   975  // moveSize returns the number of bytes an aligned MOV instruction moves.
   976  func moveSize(align int64, c *Config) int64 {
   977  	switch {
   978  	case align%8 == 0 && c.PtrSize == 8:
   979  		return 8
   980  	case align%4 == 0:
   981  		return 4
   982  	case align%2 == 0:
   983  		return 2
   984  	}
   985  	return 1
   986  }
   987  
   988  // mergePoint finds a block among a's blocks which dominates b and is itself
   989  // dominated by all of a's blocks. Returns nil if it can't find one.
   990  // Might return nil even if one does exist.
   991  func mergePoint(b *Block, a ...*Value) *Block {
   992  	// Walk backward from b looking for one of the a's blocks.
   993  
   994  	// Max distance
   995  	d := 100
   996  
   997  	for d > 0 {
   998  		for _, x := range a {
   999  			if b == x.Block {
  1000  				goto found
  1001  			}
  1002  		}
  1003  		if len(b.Preds) > 1 {
  1004  			// Don't know which way to go back. Abort.
  1005  			return nil
  1006  		}
  1007  		b = b.Preds[0].b
  1008  		d--
  1009  	}
  1010  	return nil // too far away
  1011  found:
  1012  	// At this point, r is the first value in a that we find by walking backwards.
  1013  	// if we return anything, r will be it.
  1014  	r := b
  1015  
  1016  	// Keep going, counting the other a's that we find. They must all dominate r.
  1017  	na := 0
  1018  	for d > 0 {
  1019  		for _, x := range a {
  1020  			if b == x.Block {
  1021  				na++
  1022  			}
  1023  		}
  1024  		if na == len(a) {
  1025  			// Found all of a in a backwards walk. We can return r.
  1026  			return r
  1027  		}
  1028  		if len(b.Preds) > 1 {
  1029  			return nil
  1030  		}
  1031  		b = b.Preds[0].b
  1032  		d--
  1033  
  1034  	}
  1035  	return nil // too far away
  1036  }
  1037  
  1038  // clobber invalidates values. Returns true.
  1039  // clobber is used by rewrite rules to:
  1040  //
  1041  //	A) make sure the values are really dead and never used again.
  1042  //	B) decrement use counts of the values' args.
  1043  func clobber(vv ...*Value) bool {
  1044  	for _, v := range vv {
  1045  		v.reset(OpInvalid)
  1046  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1047  	}
  1048  	return true
  1049  }
  1050  
  1051  // resetCopy resets v to be a copy of arg.
  1052  // Always returns true.
  1053  func resetCopy(v *Value, arg *Value) bool {
  1054  	v.reset(OpCopy)
  1055  	v.AddArg(arg)
  1056  	return true
  1057  }
  1058  
  1059  // clobberIfDead resets v when use count is 1. Returns true.
  1060  // clobberIfDead is used by rewrite rules to decrement
  1061  // use counts of v's args when v is dead and never used.
  1062  func clobberIfDead(v *Value) bool {
  1063  	if v.Uses == 1 {
  1064  		v.reset(OpInvalid)
  1065  	}
  1066  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1067  	return true
  1068  }
  1069  
  1070  // noteRule is an easy way to track if a rule is matched when writing
  1071  // new ones.  Make the rule of interest also conditional on
  1072  //
  1073  //	noteRule("note to self: rule of interest matched")
  1074  //
  1075  // and that message will print when the rule matches.
  1076  func noteRule(s string) bool {
  1077  	fmt.Println(s)
  1078  	return true
  1079  }
  1080  
  1081  // countRule increments Func.ruleMatches[key].
  1082  // If Func.ruleMatches is non-nil at the end
  1083  // of compilation, it will be printed to stdout.
  1084  // This is intended to make it easier to find which functions
  1085  // which contain lots of rules matches when developing new rules.
  1086  func countRule(v *Value, key string) bool {
  1087  	f := v.Block.Func
  1088  	if f.ruleMatches == nil {
  1089  		f.ruleMatches = make(map[string]int)
  1090  	}
  1091  	f.ruleMatches[key]++
  1092  	return true
  1093  }
  1094  
  1095  // warnRule generates compiler debug output with string s when
  1096  // v is not in autogenerated code, cond is true and the rule has fired.
  1097  func warnRule(cond bool, v *Value, s string) bool {
  1098  	if pos := v.Pos; pos.Line() > 1 && cond {
  1099  		v.Block.Func.Warnl(pos, s)
  1100  	}
  1101  	return true
  1102  }
  1103  
  1104  // for a pseudo-op like (LessThan x), extract x.
  1105  func flagArg(v *Value) *Value {
  1106  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1107  		return nil
  1108  	}
  1109  	return v.Args[0]
  1110  }
  1111  
  1112  // arm64Negate finds the complement to an ARM64 condition code,
  1113  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1114  //
  1115  // For floating point, it's more subtle because NaN is unordered. We do
  1116  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1117  func arm64Negate(op Op) Op {
  1118  	switch op {
  1119  	case OpARM64LessThan:
  1120  		return OpARM64GreaterEqual
  1121  	case OpARM64LessThanU:
  1122  		return OpARM64GreaterEqualU
  1123  	case OpARM64GreaterThan:
  1124  		return OpARM64LessEqual
  1125  	case OpARM64GreaterThanU:
  1126  		return OpARM64LessEqualU
  1127  	case OpARM64LessEqual:
  1128  		return OpARM64GreaterThan
  1129  	case OpARM64LessEqualU:
  1130  		return OpARM64GreaterThanU
  1131  	case OpARM64GreaterEqual:
  1132  		return OpARM64LessThan
  1133  	case OpARM64GreaterEqualU:
  1134  		return OpARM64LessThanU
  1135  	case OpARM64Equal:
  1136  		return OpARM64NotEqual
  1137  	case OpARM64NotEqual:
  1138  		return OpARM64Equal
  1139  	case OpARM64LessThanF:
  1140  		return OpARM64NotLessThanF
  1141  	case OpARM64NotLessThanF:
  1142  		return OpARM64LessThanF
  1143  	case OpARM64LessEqualF:
  1144  		return OpARM64NotLessEqualF
  1145  	case OpARM64NotLessEqualF:
  1146  		return OpARM64LessEqualF
  1147  	case OpARM64GreaterThanF:
  1148  		return OpARM64NotGreaterThanF
  1149  	case OpARM64NotGreaterThanF:
  1150  		return OpARM64GreaterThanF
  1151  	case OpARM64GreaterEqualF:
  1152  		return OpARM64NotGreaterEqualF
  1153  	case OpARM64NotGreaterEqualF:
  1154  		return OpARM64GreaterEqualF
  1155  	default:
  1156  		panic("unreachable")
  1157  	}
  1158  }
  1159  
  1160  // arm64Invert evaluates (InvertFlags op), which
  1161  // is the same as altering the condition codes such
  1162  // that the same result would be produced if the arguments
  1163  // to the flag-generating instruction were reversed, e.g.
  1164  // (InvertFlags (CMP x y)) -> (CMP y x)
  1165  func arm64Invert(op Op) Op {
  1166  	switch op {
  1167  	case OpARM64LessThan:
  1168  		return OpARM64GreaterThan
  1169  	case OpARM64LessThanU:
  1170  		return OpARM64GreaterThanU
  1171  	case OpARM64GreaterThan:
  1172  		return OpARM64LessThan
  1173  	case OpARM64GreaterThanU:
  1174  		return OpARM64LessThanU
  1175  	case OpARM64LessEqual:
  1176  		return OpARM64GreaterEqual
  1177  	case OpARM64LessEqualU:
  1178  		return OpARM64GreaterEqualU
  1179  	case OpARM64GreaterEqual:
  1180  		return OpARM64LessEqual
  1181  	case OpARM64GreaterEqualU:
  1182  		return OpARM64LessEqualU
  1183  	case OpARM64Equal, OpARM64NotEqual:
  1184  		return op
  1185  	case OpARM64LessThanF:
  1186  		return OpARM64GreaterThanF
  1187  	case OpARM64GreaterThanF:
  1188  		return OpARM64LessThanF
  1189  	case OpARM64LessEqualF:
  1190  		return OpARM64GreaterEqualF
  1191  	case OpARM64GreaterEqualF:
  1192  		return OpARM64LessEqualF
  1193  	case OpARM64NotLessThanF:
  1194  		return OpARM64NotGreaterThanF
  1195  	case OpARM64NotGreaterThanF:
  1196  		return OpARM64NotLessThanF
  1197  	case OpARM64NotLessEqualF:
  1198  		return OpARM64NotGreaterEqualF
  1199  	case OpARM64NotGreaterEqualF:
  1200  		return OpARM64NotLessEqualF
  1201  	default:
  1202  		panic("unreachable")
  1203  	}
  1204  }
  1205  
  1206  // evaluate an ARM64 op against a flags value
  1207  // that is potentially constant; return 1 for true,
  1208  // -1 for false, and 0 for not constant.
  1209  func ccARM64Eval(op Op, flags *Value) int {
  1210  	fop := flags.Op
  1211  	if fop == OpARM64InvertFlags {
  1212  		return -ccARM64Eval(op, flags.Args[0])
  1213  	}
  1214  	if fop != OpARM64FlagConstant {
  1215  		return 0
  1216  	}
  1217  	fc := flagConstant(flags.AuxInt)
  1218  	b2i := func(b bool) int {
  1219  		if b {
  1220  			return 1
  1221  		}
  1222  		return -1
  1223  	}
  1224  	switch op {
  1225  	case OpARM64Equal:
  1226  		return b2i(fc.eq())
  1227  	case OpARM64NotEqual:
  1228  		return b2i(fc.ne())
  1229  	case OpARM64LessThan:
  1230  		return b2i(fc.lt())
  1231  	case OpARM64LessThanU:
  1232  		return b2i(fc.ult())
  1233  	case OpARM64GreaterThan:
  1234  		return b2i(fc.gt())
  1235  	case OpARM64GreaterThanU:
  1236  		return b2i(fc.ugt())
  1237  	case OpARM64LessEqual:
  1238  		return b2i(fc.le())
  1239  	case OpARM64LessEqualU:
  1240  		return b2i(fc.ule())
  1241  	case OpARM64GreaterEqual:
  1242  		return b2i(fc.ge())
  1243  	case OpARM64GreaterEqualU:
  1244  		return b2i(fc.uge())
  1245  	}
  1246  	return 0
  1247  }
  1248  
  1249  // logRule logs the use of the rule s. This will only be enabled if
  1250  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1251  func logRule(s string) {
  1252  	if ruleFile == nil {
  1253  		// Open a log file to write log to. We open in append
  1254  		// mode because all.bash runs the compiler lots of times,
  1255  		// and we want the concatenation of all of those logs.
  1256  		// This means, of course, that users need to rm the old log
  1257  		// to get fresh data.
  1258  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1259  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1260  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1261  		if err != nil {
  1262  			panic(err)
  1263  		}
  1264  		ruleFile = w
  1265  	}
  1266  	// Ignore errors in case of multiple processes fighting over the file.
  1267  	fmt.Fprintln(ruleFile, s)
  1268  }
  1269  
  1270  var ruleFile io.Writer
  1271  
  1272  func isConstZero(v *Value) bool {
  1273  	switch v.Op {
  1274  	case OpConstNil:
  1275  		return true
  1276  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1277  		return v.AuxInt == 0
  1278  	case OpStringMake, OpIMake, OpComplexMake:
  1279  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1280  	case OpSliceMake:
  1281  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1282  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1283  		return isConstZero(v.Args[0])
  1284  	}
  1285  	return false
  1286  }
  1287  
  1288  // reciprocalExact64 reports whether 1/c is exactly representable.
  1289  func reciprocalExact64(c float64) bool {
  1290  	b := math.Float64bits(c)
  1291  	man := b & (1<<52 - 1)
  1292  	if man != 0 {
  1293  		return false // not a power of 2, denormal, or NaN
  1294  	}
  1295  	exp := b >> 52 & (1<<11 - 1)
  1296  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1297  	// changes the exponent to 0x7fe-exp.
  1298  	switch exp {
  1299  	case 0:
  1300  		return false // ±0
  1301  	case 0x7ff:
  1302  		return false // ±inf
  1303  	case 0x7fe:
  1304  		return false // exponent is not representable
  1305  	default:
  1306  		return true
  1307  	}
  1308  }
  1309  
  1310  // reciprocalExact32 reports whether 1/c is exactly representable.
  1311  func reciprocalExact32(c float32) bool {
  1312  	b := math.Float32bits(c)
  1313  	man := b & (1<<23 - 1)
  1314  	if man != 0 {
  1315  		return false // not a power of 2, denormal, or NaN
  1316  	}
  1317  	exp := b >> 23 & (1<<8 - 1)
  1318  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1319  	// changes the exponent to 0xfe-exp.
  1320  	switch exp {
  1321  	case 0:
  1322  		return false // ±0
  1323  	case 0xff:
  1324  		return false // ±inf
  1325  	case 0xfe:
  1326  		return false // exponent is not representable
  1327  	default:
  1328  		return true
  1329  	}
  1330  }
  1331  
  1332  // check if an immediate can be directly encoded into an ARM's instruction.
  1333  func isARMImmRot(v uint32) bool {
  1334  	for i := 0; i < 16; i++ {
  1335  		if v&^0xff == 0 {
  1336  			return true
  1337  		}
  1338  		v = v<<2 | v>>30
  1339  	}
  1340  
  1341  	return false
  1342  }
  1343  
  1344  // overlap reports whether the ranges given by the given offset and
  1345  // size pairs overlap.
  1346  func overlap(offset1, size1, offset2, size2 int64) bool {
  1347  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1348  		return true
  1349  	}
  1350  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1351  		return true
  1352  	}
  1353  	return false
  1354  }
  1355  
  1356  // check if value zeroes out upper 32-bit of 64-bit register.
  1357  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1358  // because it catches same amount of cases as 4.
  1359  func zeroUpper32Bits(x *Value, depth int) bool {
  1360  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1361  		// If the value is signed, it might get re-sign-extended
  1362  		// during spill and restore. See issue 68227.
  1363  		return false
  1364  	}
  1365  	switch x.Op {
  1366  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1367  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1368  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1369  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1370  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1371  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1372  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1373  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1374  		OpAMD64SHLL, OpAMD64SHLLconst:
  1375  		return true
  1376  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1377  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1378  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1379  		return true
  1380  	case OpArg: // note: but not ArgIntReg
  1381  		// amd64 always loads args from the stack unsigned.
  1382  		// most other architectures load them sign/zero extended based on the type.
  1383  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1384  	case OpPhi, OpSelect0, OpSelect1:
  1385  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1386  		// just limit recursion depth.
  1387  		if depth <= 0 {
  1388  			return false
  1389  		}
  1390  		for i := range x.Args {
  1391  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1392  				return false
  1393  			}
  1394  		}
  1395  		return true
  1396  
  1397  	}
  1398  	return false
  1399  }
  1400  
  1401  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1402  func zeroUpper48Bits(x *Value, depth int) bool {
  1403  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1404  		return false
  1405  	}
  1406  	switch x.Op {
  1407  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1408  		return true
  1409  	case OpArg: // note: but not ArgIntReg
  1410  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1411  	case OpPhi, OpSelect0, OpSelect1:
  1412  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1413  		// just limit recursion depth.
  1414  		if depth <= 0 {
  1415  			return false
  1416  		}
  1417  		for i := range x.Args {
  1418  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1419  				return false
  1420  			}
  1421  		}
  1422  		return true
  1423  
  1424  	}
  1425  	return false
  1426  }
  1427  
  1428  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1429  func zeroUpper56Bits(x *Value, depth int) bool {
  1430  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1431  		return false
  1432  	}
  1433  	switch x.Op {
  1434  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1435  		return true
  1436  	case OpArg: // note: but not ArgIntReg
  1437  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1438  	case OpPhi, OpSelect0, OpSelect1:
  1439  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1440  		// just limit recursion depth.
  1441  		if depth <= 0 {
  1442  			return false
  1443  		}
  1444  		for i := range x.Args {
  1445  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1446  				return false
  1447  			}
  1448  		}
  1449  		return true
  1450  
  1451  	}
  1452  	return false
  1453  }
  1454  
  1455  func isInlinableMemclr(c *Config, sz int64) bool {
  1456  	if sz < 0 {
  1457  		return false
  1458  	}
  1459  	// TODO: expand this check to allow other architectures
  1460  	// see CL 454255 and issue 56997
  1461  	switch c.arch {
  1462  	case "amd64", "arm64":
  1463  		return true
  1464  	case "ppc64le", "ppc64", "loong64":
  1465  		return sz < 512
  1466  	}
  1467  	return false
  1468  }
  1469  
  1470  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1471  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1472  // safe, either because Move will do all of its loads before any of its stores, or
  1473  // because the arguments are known to be disjoint.
  1474  // This is used as a check for replacing memmove with Move ops.
  1475  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1476  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1477  	// Move ops may or may not be faster for large sizes depending on how the platform
  1478  	// lowers them, so we only perform this optimization on platforms that we know to
  1479  	// have fast Move ops.
  1480  	switch c.arch {
  1481  	case "amd64":
  1482  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1483  	case "arm64":
  1484  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1485  	case "386":
  1486  		return sz <= 8
  1487  	case "s390x", "ppc64", "ppc64le":
  1488  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1489  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1490  		return sz <= 4
  1491  	}
  1492  	return false
  1493  }
  1494  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1495  	return isInlinableMemmove(dst, src, sz, c)
  1496  }
  1497  
  1498  // logLargeCopy logs the occurrence of a large copy.
  1499  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1500  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1501  func logLargeCopy(v *Value, s int64) bool {
  1502  	if s < 128 {
  1503  		return true
  1504  	}
  1505  	if logopt.Enabled() {
  1506  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1507  	}
  1508  	return true
  1509  }
  1510  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1511  	if s < 128 {
  1512  		return
  1513  	}
  1514  	if logopt.Enabled() {
  1515  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1516  	}
  1517  }
  1518  
  1519  // hasSmallRotate reports whether the architecture has rotate instructions
  1520  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1521  func hasSmallRotate(c *Config) bool {
  1522  	switch c.arch {
  1523  	case "amd64", "386":
  1524  		return true
  1525  	default:
  1526  		return false
  1527  	}
  1528  }
  1529  
  1530  func supportsPPC64PCRel() bool {
  1531  	// PCRel is currently supported for >= power10, linux only
  1532  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1533  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1534  }
  1535  
  1536  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1537  	if sh < 0 || sh >= sz {
  1538  		panic("PPC64 shift arg sh out of range")
  1539  	}
  1540  	if mb < 0 || mb >= sz {
  1541  		panic("PPC64 shift arg mb out of range")
  1542  	}
  1543  	if me < 0 || me >= sz {
  1544  		panic("PPC64 shift arg me out of range")
  1545  	}
  1546  	return int32(sh<<16 | mb<<8 | me)
  1547  }
  1548  
  1549  func GetPPC64Shiftsh(auxint int64) int64 {
  1550  	return int64(int8(auxint >> 16))
  1551  }
  1552  
  1553  func GetPPC64Shiftmb(auxint int64) int64 {
  1554  	return int64(int8(auxint >> 8))
  1555  }
  1556  
  1557  // Test if this value can encoded as a mask for a rlwinm like
  1558  // operation.  Masks can also extend from the msb and wrap to
  1559  // the lsb too.  That is, the valid masks are 32 bit strings
  1560  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1561  //
  1562  // Note: This ignores the upper 32 bits of the input. When a
  1563  // zero extended result is desired (e.g a 64 bit result), the
  1564  // user must verify the upper 32 bits are 0 and the mask is
  1565  // contiguous (that is, non-wrapping).
  1566  func isPPC64WordRotateMask(v64 int64) bool {
  1567  	// Isolate rightmost 1 (if none 0) and add.
  1568  	v := uint32(v64)
  1569  	vp := (v & -v) + v
  1570  	// Likewise, for the wrapping case.
  1571  	vn := ^v
  1572  	vpn := (vn & -vn) + vn
  1573  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1574  }
  1575  
  1576  // Test if this mask is a valid, contiguous bitmask which can be
  1577  // represented by a RLWNM mask and also clears the upper 32 bits
  1578  // of the register.
  1579  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1580  	// Isolate rightmost 1 (if none 0) and add.
  1581  	v := uint32(v64)
  1582  	vp := (v & -v) + v
  1583  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1584  }
  1585  
  1586  // Compress mask and shift into single value of the form
  1587  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1588  // be used to regenerate the input mask.
  1589  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1590  	var mb, me, mbn, men int
  1591  
  1592  	// Determine boundaries and then decode them
  1593  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1594  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1595  	} else if nbits == 32 {
  1596  		mb = bits.LeadingZeros32(uint32(mask))
  1597  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1598  		mbn = bits.LeadingZeros32(^uint32(mask))
  1599  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1600  	} else {
  1601  		mb = bits.LeadingZeros64(uint64(mask))
  1602  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1603  		mbn = bits.LeadingZeros64(^uint64(mask))
  1604  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1605  	}
  1606  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1607  	if mb == 0 && me == int(nbits) {
  1608  		// swap the inverted values
  1609  		mb, me = men, mbn
  1610  	}
  1611  
  1612  	return int64(me) | int64(mb<<8) | rotate<<16 | nbits<<24
  1613  }
  1614  
  1615  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1616  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1617  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1618  // operations can be combined. This functions assumes the two opcodes can
  1619  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1620  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1621  	mb := s
  1622  	r := 64 - s
  1623  	// A larger mb is a smaller mask.
  1624  	if (encoded>>8)&0xFF < mb {
  1625  		encoded = (encoded &^ 0xFF00) | mb<<8
  1626  	}
  1627  	// The rotate is expected to be 0.
  1628  	if (encoded & 0xFF0000) != 0 {
  1629  		panic("non-zero rotate")
  1630  	}
  1631  	return encoded | r<<16
  1632  }
  1633  
  1634  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1635  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1636  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1637  	auxint := uint64(sauxint)
  1638  	rotate = int64((auxint >> 16) & 0xFF)
  1639  	mb = int64((auxint >> 8) & 0xFF)
  1640  	me = int64((auxint >> 0) & 0xFF)
  1641  	nbits := int64((auxint >> 24) & 0xFF)
  1642  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1643  	if mb > me {
  1644  		mask = ^mask
  1645  	}
  1646  	if nbits == 32 {
  1647  		mask = uint64(uint32(mask))
  1648  	}
  1649  
  1650  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1651  	// is inclusive.
  1652  	me = (me - 1) & (nbits - 1)
  1653  	return
  1654  }
  1655  
  1656  // This verifies that the mask is a set of
  1657  // consecutive bits including the least
  1658  // significant bit.
  1659  func isPPC64ValidShiftMask(v int64) bool {
  1660  	if (v != 0) && ((v+1)&v) == 0 {
  1661  		return true
  1662  	}
  1663  	return false
  1664  }
  1665  
  1666  func getPPC64ShiftMaskLength(v int64) int64 {
  1667  	return int64(bits.Len64(uint64(v)))
  1668  }
  1669  
  1670  // Decompose a shift right into an equivalent rotate/mask,
  1671  // and return mask & m.
  1672  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1673  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1674  	return m & int64(smask)
  1675  }
  1676  
  1677  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1678  func mergePPC64AndSrwi(m, s int64) int64 {
  1679  	mask := mergePPC64RShiftMask(m, s, 32)
  1680  	if !isPPC64WordRotateMask(mask) {
  1681  		return 0
  1682  	}
  1683  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1684  }
  1685  
  1686  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1687  func mergePPC64AndSrdi(m, s int64) int64 {
  1688  	mask := mergePPC64RShiftMask(m, s, 64)
  1689  
  1690  	// Verify the rotate and mask result only uses the lower 32 bits.
  1691  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1692  	if rv&uint64(mask) != 0 {
  1693  		return 0
  1694  	}
  1695  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1696  		return 0
  1697  	}
  1698  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1699  }
  1700  
  1701  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1702  func mergePPC64AndSldi(m, s int64) int64 {
  1703  	mask := -1 << s & m
  1704  
  1705  	// Verify the rotate and mask result only uses the lower 32 bits.
  1706  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1707  	if rv&uint64(mask) != 0 {
  1708  		return 0
  1709  	}
  1710  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1711  		return 0
  1712  	}
  1713  	return encodePPC64RotateMask(s&31, mask, 32)
  1714  }
  1715  
  1716  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1717  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1718  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1719  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1720  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1721  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1722  
  1723  	// Rewrite mask to apply after the final left shift.
  1724  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1725  
  1726  	r_1 := 32 - srw
  1727  	r_2 := GetPPC64Shiftsh(sld)
  1728  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1729  
  1730  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1731  		return 0
  1732  	}
  1733  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1734  }
  1735  
  1736  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1737  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1738  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1739  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1740  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1741  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1742  
  1743  	// Rewrite mask to apply after the final left shift.
  1744  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1745  
  1746  	r_1 := 64 - srd
  1747  	r_2 := GetPPC64Shiftsh(sld)
  1748  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1749  
  1750  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1751  		return 0
  1752  	}
  1753  	// This combine only works when selecting and shifting the lower 32 bits.
  1754  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1755  	if v1&mask_3 != 0 {
  1756  		return 0
  1757  	}
  1758  	return encodePPC64RotateMask(r_3&31, int64(mask_3), 32)
  1759  }
  1760  
  1761  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1762  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1763  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1764  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1765  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1766  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1767  
  1768  	// combine the masks, and adjust for the final left shift.
  1769  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1770  	r_2 := GetPPC64Shiftsh(int64(sld))
  1771  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1772  
  1773  	// Verify the result is still a valid bitmask of <= 32 bits.
  1774  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1775  		return 0
  1776  	}
  1777  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1778  }
  1779  
  1780  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1781  // or 0 if they cannot be merged.
  1782  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1783  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1784  	mask_out := (mask_rlw & uint64(mask))
  1785  
  1786  	// Verify the result is still a valid bitmask of <= 32 bits.
  1787  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1788  		return 0
  1789  	}
  1790  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1791  }
  1792  
  1793  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1794  // result. Return rlw if it does, 0 otherwise.
  1795  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1796  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1797  	if mb > me {
  1798  		return 0
  1799  	}
  1800  	return rlw
  1801  }
  1802  
  1803  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1804  // or 0 if they cannot be merged.
  1805  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1806  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1807  
  1808  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1809  	r_mask := bits.RotateLeft32(mask, int(r))
  1810  
  1811  	mask_out := (mask_rlw & uint64(r_mask))
  1812  
  1813  	// Verify the result is still a valid bitmask of <= 32 bits.
  1814  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1815  		return 0
  1816  	}
  1817  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1818  }
  1819  
  1820  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1821  // or 0 if they cannot be merged.
  1822  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1823  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1824  	if mb > me || mb < sldi {
  1825  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1826  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1827  		return 0
  1828  	}
  1829  	// combine the masks, and adjust for the final left shift.
  1830  	mask_3 := mask_1 << sldi
  1831  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1832  
  1833  	// Verify the result is still a valid bitmask of <= 32 bits.
  1834  	if uint64(uint32(mask_3)) != mask_3 {
  1835  		return 0
  1836  	}
  1837  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1838  }
  1839  
  1840  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1841  // or return 0 if they cannot be combined.
  1842  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1843  	if sld > srw || srw >= 32 {
  1844  		return 0
  1845  	}
  1846  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1847  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1848  	mask := (mask_r & mask_l) << uint(sld)
  1849  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1850  }
  1851  
  1852  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1853  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1854  // of op.
  1855  //
  1856  // E.g consider the case:
  1857  // a = (ADD x y)
  1858  // b = (CMPconst [0] a)
  1859  // c = (OR a z)
  1860  //
  1861  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1862  // would produce:
  1863  // a  = (ADD x y)
  1864  // a' = (ADDCC x y)
  1865  // a” = (Select0 a')
  1866  // b  = (CMPconst [0] a”)
  1867  // c  = (OR a z)
  1868  //
  1869  // which makes it impossible to rewrite the second user. Instead the result
  1870  // of this conversion is:
  1871  // a' = (ADDCC x y)
  1872  // a  = (Select0 a')
  1873  // b  = (CMPconst [0] a)
  1874  // c  = (OR a z)
  1875  //
  1876  // Which makes it trivial to rewrite b using a lowering rule.
  1877  func convertPPC64OpToOpCC(op *Value) *Value {
  1878  	ccOpMap := map[Op]Op{
  1879  		OpPPC64ADD:      OpPPC64ADDCC,
  1880  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1881  		OpPPC64AND:      OpPPC64ANDCC,
  1882  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1883  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1884  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1885  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1886  		OpPPC64NEG:      OpPPC64NEGCC,
  1887  		OpPPC64NOR:      OpPPC64NORCC,
  1888  		OpPPC64OR:       OpPPC64ORCC,
  1889  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1890  		OpPPC64SUB:      OpPPC64SUBCC,
  1891  		OpPPC64XOR:      OpPPC64XORCC,
  1892  	}
  1893  	b := op.Block
  1894  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1895  	opCC.AddArgs(op.Args...)
  1896  	op.reset(OpSelect0)
  1897  	op.AddArgs(opCC)
  1898  	return op
  1899  }
  1900  
  1901  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1902  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1903  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1904  	if r != 0 || mask&0xFFFF != mask {
  1905  		return 0
  1906  	}
  1907  	return int64(mask)
  1908  }
  1909  
  1910  // Convenience function to rotate a 32 bit constant value by another constant.
  1911  func rotateLeft32(v, rotate int64) int64 {
  1912  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1913  }
  1914  
  1915  func rotateRight64(v, rotate int64) int64 {
  1916  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1917  }
  1918  
  1919  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1920  func armBFAuxInt(lsb, width int64) arm64BitField {
  1921  	if lsb < 0 || lsb > 63 {
  1922  		panic("ARM(64) bit field lsb constant out of range")
  1923  	}
  1924  	if width < 1 || lsb+width > 64 {
  1925  		panic("ARM(64) bit field width constant out of range")
  1926  	}
  1927  	return arm64BitField(width | lsb<<8)
  1928  }
  1929  
  1930  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1931  func (bfc arm64BitField) lsb() int64 {
  1932  	return int64(uint64(bfc) >> 8)
  1933  }
  1934  
  1935  // returns the width part of the auxInt field of arm64 bitfield ops.
  1936  func (bfc arm64BitField) width() int64 {
  1937  	return int64(bfc) & 0xff
  1938  }
  1939  
  1940  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1941  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1942  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1943  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1944  }
  1945  
  1946  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1947  func arm64BFWidth(mask, rshift int64) int64 {
  1948  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1949  	if shiftedMask == 0 {
  1950  		panic("ARM64 BF mask is zero")
  1951  	}
  1952  	return nto(shiftedMask)
  1953  }
  1954  
  1955  // encodes condition code and NZCV flags into auxint.
  1956  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1957  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1958  		panic("Wrong conditional operation")
  1959  	}
  1960  	if nzcv&0x0f != nzcv {
  1961  		panic("Wrong value of NZCV flag")
  1962  	}
  1963  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1964  }
  1965  
  1966  // encodes condition code, NZCV flags and constant value into auxint.
  1967  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1968  	if value&0x1f != value {
  1969  		panic("Wrong value of constant")
  1970  	}
  1971  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1972  	params.constValue = value
  1973  	params.ind = true
  1974  	return params
  1975  }
  1976  
  1977  // extracts condition code from auxint.
  1978  func (condParams arm64ConditionalParams) Cond() Op {
  1979  	return condParams.cond
  1980  }
  1981  
  1982  // extracts NZCV flags from auxint.
  1983  func (condParams arm64ConditionalParams) Nzcv() int64 {
  1984  	return int64(condParams.nzcv)
  1985  }
  1986  
  1987  // extracts constant value from auxint if present.
  1988  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  1989  	return int64(condParams.constValue), condParams.ind
  1990  }
  1991  
  1992  // registerizable reports whether t is a primitive type that fits in
  1993  // a register. It assumes float64 values will always fit into registers
  1994  // even if that isn't strictly true.
  1995  func registerizable(b *Block, typ *types.Type) bool {
  1996  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1997  		return true
  1998  	}
  1999  	if typ.IsInteger() {
  2000  		return typ.Size() <= b.Func.Config.RegSize
  2001  	}
  2002  	return false
  2003  }
  2004  
  2005  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  2006  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  2007  	f := v.Block.Func
  2008  	if !f.Config.Race {
  2009  		return false
  2010  	}
  2011  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2012  		return false
  2013  	}
  2014  	for _, b := range f.Blocks {
  2015  		for _, v := range b.Values {
  2016  			switch v.Op {
  2017  			case OpStaticCall, OpStaticLECall:
  2018  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2019  				// Allow calls to panic*
  2020  				s := v.Aux.(*AuxCall).Fn.String()
  2021  				switch s {
  2022  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2023  					"runtime.panicdivide", "runtime.panicwrap",
  2024  					"runtime.panicshift":
  2025  					continue
  2026  				}
  2027  				// If we encountered any call, we need to keep racefunc*,
  2028  				// for accurate stacktraces.
  2029  				return false
  2030  			case OpPanicBounds, OpPanicExtend:
  2031  				// Note: these are panic generators that are ok (like the static calls above).
  2032  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2033  				// We must keep the race functions if there are any other call types.
  2034  				return false
  2035  			}
  2036  		}
  2037  	}
  2038  	if isSameCall(sym, "runtime.racefuncenter") {
  2039  		// TODO REGISTER ABI this needs to be cleaned up.
  2040  		// If we're removing racefuncenter, remove its argument as well.
  2041  		if v.Args[0].Op != OpStore {
  2042  			if v.Op == OpStaticLECall {
  2043  				// there is no store, yet.
  2044  				return true
  2045  			}
  2046  			return false
  2047  		}
  2048  		mem := v.Args[0].Args[2]
  2049  		v.Args[0].reset(OpCopy)
  2050  		v.Args[0].AddArg(mem)
  2051  	}
  2052  	return true
  2053  }
  2054  
  2055  // symIsRO reports whether sym is a read-only global.
  2056  func symIsRO(sym Sym) bool {
  2057  	lsym := sym.(*obj.LSym)
  2058  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2059  }
  2060  
  2061  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2062  func symIsROZero(sym Sym) bool {
  2063  	lsym := sym.(*obj.LSym)
  2064  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2065  		return false
  2066  	}
  2067  	for _, b := range lsym.P {
  2068  		if b != 0 {
  2069  			return false
  2070  		}
  2071  	}
  2072  	return true
  2073  }
  2074  
  2075  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2076  // and can be rewritten by rewriteFixedLoad.
  2077  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2078  	lsym := sym.(*obj.LSym)
  2079  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2080  		for _, r := range lsym.R {
  2081  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2082  				return true
  2083  			}
  2084  		}
  2085  		return false
  2086  	}
  2087  
  2088  	if ti := lsym.TypeInfo(); ti != nil {
  2089  		// Type symbols do not contain information about their fields, unlike the cases above.
  2090  		// Hand-implement field accesses.
  2091  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2092  
  2093  		t := ti.Type.(*types.Type)
  2094  
  2095  		for _, f := range rttype.Type.Fields() {
  2096  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2097  				switch f.Sym.Name {
  2098  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2099  					return true
  2100  				default:
  2101  					// fmt.Println("unknown field", f.Sym.Name)
  2102  					return false
  2103  				}
  2104  			}
  2105  		}
  2106  
  2107  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2108  			return true
  2109  		}
  2110  
  2111  		return false
  2112  	}
  2113  
  2114  	return false
  2115  }
  2116  
  2117  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2118  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2119  	b := v.Block
  2120  	f := b.Func
  2121  
  2122  	lsym := sym.(*obj.LSym)
  2123  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2124  		for _, r := range lsym.R {
  2125  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2126  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2127  					// In case we're loading a type out of a dictionary, we need to record
  2128  					// that the containing function might put that type in an interface.
  2129  					// That information is currently recorded in relocations in the dictionary,
  2130  					// but if we perform this load at compile time then the dictionary
  2131  					// might be dead.
  2132  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2133  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2134  					// Same, but if we're using an itab we need to record that the
  2135  					// itab._type might be put in an interface.
  2136  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2137  				}
  2138  				v.reset(OpAddr)
  2139  				v.Aux = symToAux(r.Sym)
  2140  				v.AddArg(sb)
  2141  				return v
  2142  			}
  2143  		}
  2144  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2145  	}
  2146  
  2147  	if ti := lsym.TypeInfo(); ti != nil {
  2148  		// Type symbols do not contain information about their fields, unlike the cases above.
  2149  		// Hand-implement field accesses.
  2150  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2151  
  2152  		t := ti.Type.(*types.Type)
  2153  
  2154  		ptrSizedOpConst := OpConst64
  2155  		if f.Config.PtrSize == 4 {
  2156  			ptrSizedOpConst = OpConst32
  2157  		}
  2158  
  2159  		for _, f := range rttype.Type.Fields() {
  2160  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2161  				switch f.Sym.Name {
  2162  				case "Size_":
  2163  					v.reset(ptrSizedOpConst)
  2164  					v.AuxInt = t.Size()
  2165  					return v
  2166  				case "PtrBytes":
  2167  					v.reset(ptrSizedOpConst)
  2168  					v.AuxInt = types.PtrDataSize(t)
  2169  					return v
  2170  				case "Hash":
  2171  					v.reset(OpConst32)
  2172  					v.AuxInt = int64(types.TypeHash(t))
  2173  					return v
  2174  				case "Kind_":
  2175  					v.reset(OpConst8)
  2176  					v.AuxInt = int64(reflectdata.ABIKindOfType(t))
  2177  					return v
  2178  				case "GCData":
  2179  					gcdata, _ := reflectdata.GCSym(t, true)
  2180  					v.reset(OpAddr)
  2181  					v.Aux = symToAux(gcdata)
  2182  					v.AddArg(sb)
  2183  					return v
  2184  				default:
  2185  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2186  				}
  2187  			}
  2188  		}
  2189  
  2190  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2191  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2192  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2193  			v.reset(OpAddr)
  2194  			v.Aux = symToAux(elemSym)
  2195  			v.AddArg(sb)
  2196  			return v
  2197  		}
  2198  
  2199  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2200  	}
  2201  
  2202  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2203  	return nil
  2204  }
  2205  
  2206  // read8 reads one byte from the read-only global sym at offset off.
  2207  func read8(sym Sym, off int64) uint8 {
  2208  	lsym := sym.(*obj.LSym)
  2209  	if off >= int64(len(lsym.P)) || off < 0 {
  2210  		// Invalid index into the global sym.
  2211  		// This can happen in dead code, so we don't want to panic.
  2212  		// Just return any value, it will eventually get ignored.
  2213  		// See issue 29215.
  2214  		return 0
  2215  	}
  2216  	return lsym.P[off]
  2217  }
  2218  
  2219  // read16 reads two bytes from the read-only global sym at offset off.
  2220  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2221  	lsym := sym.(*obj.LSym)
  2222  	// lsym.P is written lazily.
  2223  	// Bytes requested after the end of lsym.P are 0.
  2224  	var src []byte
  2225  	if 0 <= off && off < int64(len(lsym.P)) {
  2226  		src = lsym.P[off:]
  2227  	}
  2228  	buf := make([]byte, 2)
  2229  	copy(buf, src)
  2230  	return byteorder.Uint16(buf)
  2231  }
  2232  
  2233  // read32 reads four bytes from the read-only global sym at offset off.
  2234  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2235  	lsym := sym.(*obj.LSym)
  2236  	var src []byte
  2237  	if 0 <= off && off < int64(len(lsym.P)) {
  2238  		src = lsym.P[off:]
  2239  	}
  2240  	buf := make([]byte, 4)
  2241  	copy(buf, src)
  2242  	return byteorder.Uint32(buf)
  2243  }
  2244  
  2245  // read64 reads eight bytes from the read-only global sym at offset off.
  2246  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2247  	lsym := sym.(*obj.LSym)
  2248  	var src []byte
  2249  	if 0 <= off && off < int64(len(lsym.P)) {
  2250  		src = lsym.P[off:]
  2251  	}
  2252  	buf := make([]byte, 8)
  2253  	copy(buf, src)
  2254  	return byteorder.Uint64(buf)
  2255  }
  2256  
  2257  // sequentialAddresses reports true if it can prove that x + n == y
  2258  func sequentialAddresses(x, y *Value, n int64) bool {
  2259  	if x == y && n == 0 {
  2260  		return true
  2261  	}
  2262  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2263  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2264  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2265  		return true
  2266  	}
  2267  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2268  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2269  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2270  		return true
  2271  	}
  2272  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2273  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2274  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2275  		return true
  2276  	}
  2277  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2278  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2279  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2280  		return true
  2281  	}
  2282  	return false
  2283  }
  2284  
  2285  // flagConstant represents the result of a compile-time comparison.
  2286  // The sense of these flags does not necessarily represent the hardware's notion
  2287  // of a flags register - these are just a compile-time construct.
  2288  // We happen to match the semantics to those of arm/arm64.
  2289  // Note that these semantics differ from x86: the carry flag has the opposite
  2290  // sense on a subtraction!
  2291  //
  2292  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2293  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2294  //	 (because it does x + ^y + C).
  2295  //
  2296  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2297  type flagConstant uint8
  2298  
  2299  // N reports whether the result of an operation is negative (high bit set).
  2300  func (fc flagConstant) N() bool {
  2301  	return fc&1 != 0
  2302  }
  2303  
  2304  // Z reports whether the result of an operation is 0.
  2305  func (fc flagConstant) Z() bool {
  2306  	return fc&2 != 0
  2307  }
  2308  
  2309  // C reports whether an unsigned add overflowed (carry), or an
  2310  // unsigned subtract did not underflow (borrow).
  2311  func (fc flagConstant) C() bool {
  2312  	return fc&4 != 0
  2313  }
  2314  
  2315  // V reports whether a signed operation overflowed or underflowed.
  2316  func (fc flagConstant) V() bool {
  2317  	return fc&8 != 0
  2318  }
  2319  
  2320  func (fc flagConstant) eq() bool {
  2321  	return fc.Z()
  2322  }
  2323  func (fc flagConstant) ne() bool {
  2324  	return !fc.Z()
  2325  }
  2326  func (fc flagConstant) lt() bool {
  2327  	return fc.N() != fc.V()
  2328  }
  2329  func (fc flagConstant) le() bool {
  2330  	return fc.Z() || fc.lt()
  2331  }
  2332  func (fc flagConstant) gt() bool {
  2333  	return !fc.Z() && fc.ge()
  2334  }
  2335  func (fc flagConstant) ge() bool {
  2336  	return fc.N() == fc.V()
  2337  }
  2338  func (fc flagConstant) ult() bool {
  2339  	return !fc.C()
  2340  }
  2341  func (fc flagConstant) ule() bool {
  2342  	return fc.Z() || fc.ult()
  2343  }
  2344  func (fc flagConstant) ugt() bool {
  2345  	return !fc.Z() && fc.uge()
  2346  }
  2347  func (fc flagConstant) uge() bool {
  2348  	return fc.C()
  2349  }
  2350  
  2351  func (fc flagConstant) ltNoov() bool {
  2352  	return fc.lt() && !fc.V()
  2353  }
  2354  func (fc flagConstant) leNoov() bool {
  2355  	return fc.le() && !fc.V()
  2356  }
  2357  func (fc flagConstant) gtNoov() bool {
  2358  	return fc.gt() && !fc.V()
  2359  }
  2360  func (fc flagConstant) geNoov() bool {
  2361  	return fc.ge() && !fc.V()
  2362  }
  2363  
  2364  func (fc flagConstant) String() string {
  2365  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2366  }
  2367  
  2368  type flagConstantBuilder struct {
  2369  	N bool
  2370  	Z bool
  2371  	C bool
  2372  	V bool
  2373  }
  2374  
  2375  func (fcs flagConstantBuilder) encode() flagConstant {
  2376  	var fc flagConstant
  2377  	if fcs.N {
  2378  		fc |= 1
  2379  	}
  2380  	if fcs.Z {
  2381  		fc |= 2
  2382  	}
  2383  	if fcs.C {
  2384  		fc |= 4
  2385  	}
  2386  	if fcs.V {
  2387  		fc |= 8
  2388  	}
  2389  	return fc
  2390  }
  2391  
  2392  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2393  //  - the results of the C flag are different
  2394  //  - the results of the V flag when y==minint are different
  2395  
  2396  // addFlags64 returns the flags that would be set from computing x+y.
  2397  func addFlags64(x, y int64) flagConstant {
  2398  	var fcb flagConstantBuilder
  2399  	fcb.Z = x+y == 0
  2400  	fcb.N = x+y < 0
  2401  	fcb.C = uint64(x+y) < uint64(x)
  2402  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2403  	return fcb.encode()
  2404  }
  2405  
  2406  // subFlags64 returns the flags that would be set from computing x-y.
  2407  func subFlags64(x, y int64) flagConstant {
  2408  	var fcb flagConstantBuilder
  2409  	fcb.Z = x-y == 0
  2410  	fcb.N = x-y < 0
  2411  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2412  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2413  	return fcb.encode()
  2414  }
  2415  
  2416  // addFlags32 returns the flags that would be set from computing x+y.
  2417  func addFlags32(x, y int32) flagConstant {
  2418  	var fcb flagConstantBuilder
  2419  	fcb.Z = x+y == 0
  2420  	fcb.N = x+y < 0
  2421  	fcb.C = uint32(x+y) < uint32(x)
  2422  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2423  	return fcb.encode()
  2424  }
  2425  
  2426  // subFlags32 returns the flags that would be set from computing x-y.
  2427  func subFlags32(x, y int32) flagConstant {
  2428  	var fcb flagConstantBuilder
  2429  	fcb.Z = x-y == 0
  2430  	fcb.N = x-y < 0
  2431  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2432  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2433  	return fcb.encode()
  2434  }
  2435  
  2436  // logicFlags64 returns flags set to the sign/zeroness of x.
  2437  // C and V are set to false.
  2438  func logicFlags64(x int64) flagConstant {
  2439  	var fcb flagConstantBuilder
  2440  	fcb.Z = x == 0
  2441  	fcb.N = x < 0
  2442  	return fcb.encode()
  2443  }
  2444  
  2445  // logicFlags32 returns flags set to the sign/zeroness of x.
  2446  // C and V are set to false.
  2447  func logicFlags32(x int32) flagConstant {
  2448  	var fcb flagConstantBuilder
  2449  	fcb.Z = x == 0
  2450  	fcb.N = x < 0
  2451  	return fcb.encode()
  2452  }
  2453  
  2454  func makeJumpTableSym(b *Block) *obj.LSym {
  2455  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2456  	// The jump table symbol is accessed only from the function symbol.
  2457  	s.Set(obj.AttrStatic, true)
  2458  	return s
  2459  }
  2460  
  2461  // canRotate reports whether the architecture supports
  2462  // rotates of integer registers with the given number of bits.
  2463  func canRotate(c *Config, bits int64) bool {
  2464  	if bits > c.PtrSize*8 {
  2465  		// Don't rewrite to rotates bigger than the machine word.
  2466  		return false
  2467  	}
  2468  	switch c.arch {
  2469  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2470  		return true
  2471  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2472  		return bits >= 32
  2473  	default:
  2474  		return false
  2475  	}
  2476  }
  2477  
  2478  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2479  func isARM64bitcon(x uint64) bool {
  2480  	if x == 1<<64-1 || x == 0 {
  2481  		return false
  2482  	}
  2483  	// determine the period and sign-extend a unit to 64 bits
  2484  	switch {
  2485  	case x != x>>32|x<<32:
  2486  		// period is 64
  2487  		// nothing to do
  2488  	case x != x>>16|x<<48:
  2489  		// period is 32
  2490  		x = uint64(int64(int32(x)))
  2491  	case x != x>>8|x<<56:
  2492  		// period is 16
  2493  		x = uint64(int64(int16(x)))
  2494  	case x != x>>4|x<<60:
  2495  		// period is 8
  2496  		x = uint64(int64(int8(x)))
  2497  	default:
  2498  		// period is 4 or 2, always true
  2499  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2500  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2501  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2502  		// 0101, 1010             -- 01   rotate, repeat
  2503  		return true
  2504  	}
  2505  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2506  }
  2507  
  2508  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2509  func sequenceOfOnes(x uint64) bool {
  2510  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2511  	y += x
  2512  	return (y-1)&y == 0
  2513  }
  2514  
  2515  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2516  func isARM64addcon(v int64) bool {
  2517  	/* uimm12 or uimm24? */
  2518  	if v < 0 {
  2519  		return false
  2520  	}
  2521  	if (v & 0xFFF) == 0 {
  2522  		v >>= 12
  2523  	}
  2524  	return v <= 0xFFF
  2525  }
  2526  
  2527  // setPos sets the position of v to pos, then returns true.
  2528  // Useful for setting the result of a rewrite's position to
  2529  // something other than the default.
  2530  func setPos(v *Value, pos src.XPos) bool {
  2531  	v.Pos = pos
  2532  	return true
  2533  }
  2534  
  2535  // isNonNegative reports whether v is known to be greater or equal to zero.
  2536  // Note that this is pretty simplistic. The prove pass generates more detailed
  2537  // nonnegative information about values.
  2538  func isNonNegative(v *Value) bool {
  2539  	if !v.Type.IsInteger() {
  2540  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2541  	}
  2542  	// TODO: return true if !v.Type.IsSigned()
  2543  	// SSA isn't type-safe enough to do that now (issue 37753).
  2544  	// The checks below depend only on the pattern of bits.
  2545  
  2546  	switch v.Op {
  2547  	case OpConst64:
  2548  		return v.AuxInt >= 0
  2549  
  2550  	case OpConst32:
  2551  		return int32(v.AuxInt) >= 0
  2552  
  2553  	case OpConst16:
  2554  		return int16(v.AuxInt) >= 0
  2555  
  2556  	case OpConst8:
  2557  		return int8(v.AuxInt) >= 0
  2558  
  2559  	case OpStringLen, OpSliceLen, OpSliceCap,
  2560  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2561  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2562  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2563  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2564  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2565  		return true
  2566  
  2567  	case OpRsh64Ux64, OpRsh32Ux64:
  2568  		by := v.Args[1]
  2569  		return by.Op == OpConst64 && by.AuxInt > 0
  2570  
  2571  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2572  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2573  		return isNonNegative(v.Args[0])
  2574  
  2575  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2576  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2577  
  2578  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2579  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2580  		OpOr64, OpOr32, OpOr16, OpOr8,
  2581  		OpXor64, OpXor32, OpXor16, OpXor8:
  2582  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2583  
  2584  		// We could handle OpPhi here, but the improvements from doing
  2585  		// so are very minor, and it is neither simple nor cheap.
  2586  	}
  2587  	return false
  2588  }
  2589  
  2590  func rewriteStructLoad(v *Value) *Value {
  2591  	b := v.Block
  2592  	ptr := v.Args[0]
  2593  	mem := v.Args[1]
  2594  
  2595  	t := v.Type
  2596  	args := make([]*Value, t.NumFields())
  2597  	for i := range args {
  2598  		ft := t.FieldType(i)
  2599  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2600  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2601  	}
  2602  
  2603  	v.reset(OpStructMake)
  2604  	v.AddArgs(args...)
  2605  	return v
  2606  }
  2607  
  2608  func rewriteStructStore(v *Value) *Value {
  2609  	b := v.Block
  2610  	dst := v.Args[0]
  2611  	x := v.Args[1]
  2612  	if x.Op != OpStructMake {
  2613  		base.Fatalf("invalid struct store: %v", x)
  2614  	}
  2615  	mem := v.Args[2]
  2616  
  2617  	t := x.Type
  2618  	for i, arg := range x.Args {
  2619  		ft := t.FieldType(i)
  2620  
  2621  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2622  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2623  	}
  2624  
  2625  	return mem
  2626  }
  2627  
  2628  // isDirectAndComparableType reports whether v represents a type
  2629  // (a *runtime._type) whose value is stored directly in an
  2630  // interface (i.e., is pointer or pointer-like) and is comparable.
  2631  func isDirectAndComparableType(v *Value) bool {
  2632  	return isDirectAndComparableType1(v)
  2633  }
  2634  
  2635  // v is a type
  2636  func isDirectAndComparableType1(v *Value) bool {
  2637  	switch v.Op {
  2638  	case OpITab:
  2639  		return isDirectAndComparableType2(v.Args[0])
  2640  	case OpAddr:
  2641  		lsym := v.Aux.(*obj.LSym)
  2642  		if ti := lsym.TypeInfo(); ti != nil {
  2643  			t := ti.Type.(*types.Type)
  2644  			return types.IsDirectIface(t) && types.IsComparable(t)
  2645  		}
  2646  	}
  2647  	return false
  2648  }
  2649  
  2650  // v is an empty interface
  2651  func isDirectAndComparableType2(v *Value) bool {
  2652  	switch v.Op {
  2653  	case OpIMake:
  2654  		return isDirectAndComparableType1(v.Args[0])
  2655  	}
  2656  	return false
  2657  }
  2658  
  2659  // isDirectAndComparableIface reports whether v represents an itab
  2660  // (a *runtime._itab) for a type whose value is stored directly
  2661  // in an interface (i.e., is pointer or pointer-like) and is comparable.
  2662  func isDirectAndComparableIface(v *Value) bool {
  2663  	return isDirectAndComparableIface1(v, 9)
  2664  }
  2665  
  2666  // v is an itab
  2667  func isDirectAndComparableIface1(v *Value, depth int) bool {
  2668  	if depth == 0 {
  2669  		return false
  2670  	}
  2671  	switch v.Op {
  2672  	case OpITab:
  2673  		return isDirectAndComparableIface2(v.Args[0], depth-1)
  2674  	case OpAddr:
  2675  		lsym := v.Aux.(*obj.LSym)
  2676  		if ii := lsym.ItabInfo(); ii != nil {
  2677  			t := ii.Type.(*types.Type)
  2678  			return types.IsDirectIface(t) && types.IsComparable(t)
  2679  		}
  2680  	case OpConstNil:
  2681  		// We can treat this as direct, because if the itab is
  2682  		// nil, the data field must be nil also.
  2683  		return true
  2684  	}
  2685  	return false
  2686  }
  2687  
  2688  // v is an interface
  2689  func isDirectAndComparableIface2(v *Value, depth int) bool {
  2690  	if depth == 0 {
  2691  		return false
  2692  	}
  2693  	switch v.Op {
  2694  	case OpIMake:
  2695  		return isDirectAndComparableIface1(v.Args[0], depth-1)
  2696  	case OpPhi:
  2697  		for _, a := range v.Args {
  2698  			if !isDirectAndComparableIface2(a, depth-1) {
  2699  				return false
  2700  			}
  2701  		}
  2702  		return true
  2703  	}
  2704  	return false
  2705  }
  2706  
  2707  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2708  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2709  	r.sum, r.carry = int64(s), int64(c)
  2710  	return
  2711  }
  2712  
  2713  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2714  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2715  	r.hi, r.lo = int64(hi), int64(lo)
  2716  	return
  2717  }
  2718  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2719  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2720  	r.hi, r.lo = int32(hi), int32(lo)
  2721  	return
  2722  }
  2723  
  2724  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2725  func flagify(v *Value) bool {
  2726  	var flagVersion Op
  2727  	switch v.Op {
  2728  	case OpAMD64ADDQconst:
  2729  		flagVersion = OpAMD64ADDQconstflags
  2730  	case OpAMD64ADDLconst:
  2731  		flagVersion = OpAMD64ADDLconstflags
  2732  	default:
  2733  		base.Fatalf("can't flagify op %s", v.Op)
  2734  	}
  2735  	inner := v.copyInto(v.Block)
  2736  	inner.Op = flagVersion
  2737  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2738  	v.reset(OpSelect0)
  2739  	v.AddArg(inner)
  2740  	return true
  2741  }
  2742  
  2743  // PanicBoundsC contains a constant for a bounds failure.
  2744  type PanicBoundsC struct {
  2745  	C int64
  2746  }
  2747  
  2748  // PanicBoundsCC contains 2 constants for a bounds failure.
  2749  type PanicBoundsCC struct {
  2750  	Cx int64
  2751  	Cy int64
  2752  }
  2753  
  2754  func (p PanicBoundsC) CanBeAnSSAAux() {
  2755  }
  2756  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2757  }
  2758  
  2759  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2760  	return i.(PanicBoundsC)
  2761  }
  2762  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2763  	return i.(PanicBoundsCC)
  2764  }
  2765  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2766  	return p
  2767  }
  2768  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2769  	return p
  2770  }
  2771  
  2772  func isDictArgSym(sym Sym) bool {
  2773  	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
  2774  }
  2775  

View as plain text