Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  func copyCompatibleType(t1, t2 *types.Type) bool {
   250  	if t1.Size() != t2.Size() {
   251  		return false
   252  	}
   253  	if t1.IsInteger() {
   254  		return t2.IsInteger()
   255  	}
   256  	if isPtr(t1) {
   257  		return isPtr(t2)
   258  	}
   259  	return t1.Compare(t2) == types.CMPeq
   260  }
   261  
   262  // mergeSym merges two symbolic offsets. There is no real merging of
   263  // offsets, we just pick the non-nil one.
   264  func mergeSym(x, y Sym) Sym {
   265  	if x == nil {
   266  		return y
   267  	}
   268  	if y == nil {
   269  		return x
   270  	}
   271  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   272  }
   273  
   274  func canMergeSym(x, y Sym) bool {
   275  	return x == nil || y == nil
   276  }
   277  
   278  // canMergeLoadClobber reports whether the load can be merged into target without
   279  // invalidating the schedule.
   280  // It also checks that the other non-load argument x is something we
   281  // are ok with clobbering.
   282  func canMergeLoadClobber(target, load, x *Value) bool {
   283  	// The register containing x is going to get clobbered.
   284  	// Don't merge if we still need the value of x.
   285  	// We don't have liveness information here, but we can
   286  	// approximate x dying with:
   287  	//  1) target is x's only use.
   288  	//  2) target is not in a deeper loop than x.
   289  	if x.Uses != 1 {
   290  		return false
   291  	}
   292  	loopnest := x.Block.Func.loopnest()
   293  	loopnest.calculateDepths()
   294  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   295  		return false
   296  	}
   297  	return canMergeLoad(target, load)
   298  }
   299  
   300  // canMergeLoad reports whether the load can be merged into target without
   301  // invalidating the schedule.
   302  func canMergeLoad(target, load *Value) bool {
   303  	if target.Block.ID != load.Block.ID {
   304  		// If the load is in a different block do not merge it.
   305  		return false
   306  	}
   307  
   308  	// We can't merge the load into the target if the load
   309  	// has more than one use.
   310  	if load.Uses != 1 {
   311  		return false
   312  	}
   313  
   314  	mem := load.MemoryArg()
   315  
   316  	// We need the load's memory arg to still be alive at target. That
   317  	// can't be the case if one of target's args depends on a memory
   318  	// state that is a successor of load's memory arg.
   319  	//
   320  	// For example, it would be invalid to merge load into target in
   321  	// the following situation because newmem has killed oldmem
   322  	// before target is reached:
   323  	//     load = read ... oldmem
   324  	//   newmem = write ... oldmem
   325  	//     arg0 = read ... newmem
   326  	//   target = add arg0 load
   327  	//
   328  	// If the argument comes from a different block then we can exclude
   329  	// it immediately because it must dominate load (which is in the
   330  	// same block as target).
   331  	var args []*Value
   332  	for _, a := range target.Args {
   333  		if a != load && a.Block.ID == target.Block.ID {
   334  			args = append(args, a)
   335  		}
   336  	}
   337  
   338  	// memPreds contains memory states known to be predecessors of load's
   339  	// memory state. It is lazily initialized.
   340  	var memPreds map[*Value]bool
   341  	for i := 0; len(args) > 0; i++ {
   342  		const limit = 100
   343  		if i >= limit {
   344  			// Give up if we have done a lot of iterations.
   345  			return false
   346  		}
   347  		v := args[len(args)-1]
   348  		args = args[:len(args)-1]
   349  		if target.Block.ID != v.Block.ID {
   350  			// Since target and load are in the same block
   351  			// we can stop searching when we leave the block.
   352  			continue
   353  		}
   354  		if v.Op == OpPhi {
   355  			// A Phi implies we have reached the top of the block.
   356  			// The memory phi, if it exists, is always
   357  			// the first logical store in the block.
   358  			continue
   359  		}
   360  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   361  			// We could handle this situation however it is likely
   362  			// to be very rare.
   363  			return false
   364  		}
   365  		if v.Op.SymEffect()&SymAddr != 0 {
   366  			// This case prevents an operation that calculates the
   367  			// address of a local variable from being forced to schedule
   368  			// before its corresponding VarDef.
   369  			// See issue 28445.
   370  			//   v1 = LOAD ...
   371  			//   v2 = VARDEF
   372  			//   v3 = LEAQ
   373  			//   v4 = CMPQ v1 v3
   374  			// We don't want to combine the CMPQ with the load, because
   375  			// that would force the CMPQ to schedule before the VARDEF, which
   376  			// in turn requires the LEAQ to schedule before the VARDEF.
   377  			return false
   378  		}
   379  		if v.Type.IsMemory() {
   380  			if memPreds == nil {
   381  				// Initialise a map containing memory states
   382  				// known to be predecessors of load's memory
   383  				// state.
   384  				memPreds = make(map[*Value]bool)
   385  				m := mem
   386  				const limit = 50
   387  				for i := 0; i < limit; i++ {
   388  					if m.Op == OpPhi {
   389  						// The memory phi, if it exists, is always
   390  						// the first logical store in the block.
   391  						break
   392  					}
   393  					if m.Block.ID != target.Block.ID {
   394  						break
   395  					}
   396  					if !m.Type.IsMemory() {
   397  						break
   398  					}
   399  					memPreds[m] = true
   400  					if len(m.Args) == 0 {
   401  						break
   402  					}
   403  					m = m.MemoryArg()
   404  				}
   405  			}
   406  
   407  			// We can merge if v is a predecessor of mem.
   408  			//
   409  			// For example, we can merge load into target in the
   410  			// following scenario:
   411  			//      x = read ... v
   412  			//    mem = write ... v
   413  			//   load = read ... mem
   414  			// target = add x load
   415  			if memPreds[v] {
   416  				continue
   417  			}
   418  			return false
   419  		}
   420  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   421  			// If v takes mem as an input then we know mem
   422  			// is valid at this point.
   423  			continue
   424  		}
   425  		for _, a := range v.Args {
   426  			if target.Block.ID == a.Block.ID {
   427  				args = append(args, a)
   428  			}
   429  		}
   430  	}
   431  
   432  	return true
   433  }
   434  
   435  // isSameCall reports whether aux is the same as the given named symbol.
   436  func isSameCall(aux Aux, name string) bool {
   437  	fn := aux.(*AuxCall).Fn
   438  	return fn != nil && fn.String() == name
   439  }
   440  
   441  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   442  func canLoadUnaligned(c *Config) bool {
   443  	return c.ctxt.Arch.Alignment == 1
   444  }
   445  
   446  // nlzX returns the number of leading zeros.
   447  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   448  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   449  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   450  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   451  
   452  // ntzX returns the number of trailing zeros.
   453  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   454  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   455  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   456  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   457  
   458  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   459  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   460  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   461  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   462  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   463  
   464  // nto returns the number of trailing ones.
   465  func nto(x int64) int64 {
   466  	return int64(ntz64(^x))
   467  }
   468  
   469  // logX returns logarithm of n base 2.
   470  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   471  func log8(n int8) int64 {
   472  	return int64(bits.Len8(uint8(n))) - 1
   473  }
   474  func log16(n int16) int64 {
   475  	return int64(bits.Len16(uint16(n))) - 1
   476  }
   477  func log32(n int32) int64 {
   478  	return int64(bits.Len32(uint32(n))) - 1
   479  }
   480  func log64(n int64) int64 {
   481  	return int64(bits.Len64(uint64(n))) - 1
   482  }
   483  
   484  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   485  // Rounds down.
   486  func log2uint32(n int64) int64 {
   487  	return int64(bits.Len32(uint32(n))) - 1
   488  }
   489  
   490  // isPowerOfTwoX functions report whether n is a power of 2.
   491  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   492  	return n > 0 && n&(n-1) == 0
   493  }
   494  
   495  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   496  func isUint64PowerOfTwo(in int64) bool {
   497  	n := uint64(in)
   498  	return n > 0 && n&(n-1) == 0
   499  }
   500  
   501  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   502  func isUint32PowerOfTwo(in int64) bool {
   503  	n := uint64(uint32(in))
   504  	return n > 0 && n&(n-1) == 0
   505  }
   506  
   507  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   508  func is32Bit(n int64) bool {
   509  	return n == int64(int32(n))
   510  }
   511  
   512  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   513  func is16Bit(n int64) bool {
   514  	return n == int64(int16(n))
   515  }
   516  
   517  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   518  func is8Bit(n int64) bool {
   519  	return n == int64(int8(n))
   520  }
   521  
   522  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   523  func isU8Bit(n int64) bool {
   524  	return n == int64(uint8(n))
   525  }
   526  
   527  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   528  func isU12Bit(n int64) bool {
   529  	return 0 <= n && n < (1<<12)
   530  }
   531  
   532  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   533  func isU16Bit(n int64) bool {
   534  	return n == int64(uint16(n))
   535  }
   536  
   537  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   538  func isU32Bit(n int64) bool {
   539  	return n == int64(uint32(n))
   540  }
   541  
   542  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   543  func is20Bit(n int64) bool {
   544  	return -(1<<19) <= n && n < (1<<19)
   545  }
   546  
   547  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   548  func b2i(b bool) int64 {
   549  	if b {
   550  		return 1
   551  	}
   552  	return 0
   553  }
   554  
   555  // b2i32 translates a boolean value to 0 or 1.
   556  func b2i32(b bool) int32 {
   557  	if b {
   558  		return 1
   559  	}
   560  	return 0
   561  }
   562  
   563  func canMulStrengthReduce(config *Config, x int64) bool {
   564  	_, ok := config.mulRecipes[x]
   565  	return ok
   566  }
   567  func canMulStrengthReduce32(config *Config, x int32) bool {
   568  	_, ok := config.mulRecipes[int64(x)]
   569  	return ok
   570  }
   571  
   572  // mulStrengthReduce returns v*x evaluated at the location
   573  // (block and source position) of m.
   574  // canMulStrengthReduce must have returned true.
   575  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   576  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   577  }
   578  
   579  // mulStrengthReduce32 returns v*x evaluated at the location
   580  // (block and source position) of m.
   581  // canMulStrengthReduce32 must have returned true.
   582  // The upper 32 bits of m might be set to junk.
   583  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   584  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   585  }
   586  
   587  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   588  // A shift is bounded if it is shifting by less than the width of the shifted value.
   589  func shiftIsBounded(v *Value) bool {
   590  	return v.AuxInt != 0
   591  }
   592  
   593  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   594  // generated code as much as possible.
   595  func canonLessThan(x, y *Value) bool {
   596  	if x.Op != y.Op {
   597  		return x.Op < y.Op
   598  	}
   599  	if !x.Pos.SameFileAndLine(y.Pos) {
   600  		return x.Pos.Before(y.Pos)
   601  	}
   602  	return x.ID < y.ID
   603  }
   604  
   605  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   606  // of the mantissa. It will panic if the truncation results in lost information.
   607  func truncate64Fto32F(f float64) float32 {
   608  	if !isExactFloat32(f) {
   609  		panic("truncate64Fto32F: truncation is not exact")
   610  	}
   611  	if !math.IsNaN(f) {
   612  		return float32(f)
   613  	}
   614  	// NaN bit patterns aren't necessarily preserved across conversion
   615  	// instructions so we need to do the conversion manually.
   616  	b := math.Float64bits(f)
   617  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   618  	//          | sign                  | exponent   | mantissa       |
   619  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   620  	return math.Float32frombits(r)
   621  }
   622  
   623  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   624  // pattern of the mantissa.
   625  func extend32Fto64F(f float32) float64 {
   626  	if !math.IsNaN(float64(f)) {
   627  		return float64(f)
   628  	}
   629  	// NaN bit patterns aren't necessarily preserved across conversion
   630  	// instructions so we need to do the conversion manually.
   631  	b := uint64(math.Float32bits(f))
   632  	//   | sign                  | exponent      | mantissa                    |
   633  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   634  	return math.Float64frombits(r)
   635  }
   636  
   637  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   638  func DivisionNeedsFixUp(v *Value) bool {
   639  	return v.AuxInt == 0
   640  }
   641  
   642  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   643  func auxFrom64F(f float64) int64 {
   644  	if f != f {
   645  		panic("can't encode a NaN in AuxInt field")
   646  	}
   647  	return int64(math.Float64bits(f))
   648  }
   649  
   650  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   651  func auxFrom32F(f float32) int64 {
   652  	if f != f {
   653  		panic("can't encode a NaN in AuxInt field")
   654  	}
   655  	return int64(math.Float64bits(extend32Fto64F(f)))
   656  }
   657  
   658  // auxTo32F decodes a float32 from the AuxInt value provided.
   659  func auxTo32F(i int64) float32 {
   660  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   661  }
   662  
   663  // auxTo64F decodes a float64 from the AuxInt value provided.
   664  func auxTo64F(i int64) float64 {
   665  	return math.Float64frombits(uint64(i))
   666  }
   667  
   668  func auxIntToBool(i int64) bool {
   669  	if i == 0 {
   670  		return false
   671  	}
   672  	return true
   673  }
   674  func auxIntToInt8(i int64) int8 {
   675  	return int8(i)
   676  }
   677  func auxIntToInt16(i int64) int16 {
   678  	return int16(i)
   679  }
   680  func auxIntToInt32(i int64) int32 {
   681  	return int32(i)
   682  }
   683  func auxIntToInt64(i int64) int64 {
   684  	return i
   685  }
   686  func auxIntToUint8(i int64) uint8 {
   687  	return uint8(i)
   688  }
   689  func auxIntToFloat32(i int64) float32 {
   690  	return float32(math.Float64frombits(uint64(i)))
   691  }
   692  func auxIntToFloat64(i int64) float64 {
   693  	return math.Float64frombits(uint64(i))
   694  }
   695  func auxIntToValAndOff(i int64) ValAndOff {
   696  	return ValAndOff(i)
   697  }
   698  func auxIntToArm64BitField(i int64) arm64BitField {
   699  	return arm64BitField(i)
   700  }
   701  func auxIntToInt128(x int64) int128 {
   702  	if x != 0 {
   703  		panic("nonzero int128 not allowed")
   704  	}
   705  	return 0
   706  }
   707  func auxIntToFlagConstant(x int64) flagConstant {
   708  	return flagConstant(x)
   709  }
   710  
   711  func auxIntToOp(cc int64) Op {
   712  	return Op(cc)
   713  }
   714  
   715  func boolToAuxInt(b bool) int64 {
   716  	if b {
   717  		return 1
   718  	}
   719  	return 0
   720  }
   721  func int8ToAuxInt(i int8) int64 {
   722  	return int64(i)
   723  }
   724  func int16ToAuxInt(i int16) int64 {
   725  	return int64(i)
   726  }
   727  func int32ToAuxInt(i int32) int64 {
   728  	return int64(i)
   729  }
   730  func int64ToAuxInt(i int64) int64 {
   731  	return int64(i)
   732  }
   733  func uint8ToAuxInt(i uint8) int64 {
   734  	return int64(int8(i))
   735  }
   736  func float32ToAuxInt(f float32) int64 {
   737  	return int64(math.Float64bits(float64(f)))
   738  }
   739  func float64ToAuxInt(f float64) int64 {
   740  	return int64(math.Float64bits(f))
   741  }
   742  func valAndOffToAuxInt(v ValAndOff) int64 {
   743  	return int64(v)
   744  }
   745  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   746  	return int64(v)
   747  }
   748  func int128ToAuxInt(x int128) int64 {
   749  	if x != 0 {
   750  		panic("nonzero int128 not allowed")
   751  	}
   752  	return 0
   753  }
   754  func flagConstantToAuxInt(x flagConstant) int64 {
   755  	return int64(x)
   756  }
   757  
   758  func opToAuxInt(o Op) int64 {
   759  	return int64(o)
   760  }
   761  
   762  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   763  type Aux interface {
   764  	CanBeAnSSAAux()
   765  }
   766  
   767  // for now only used to mark moves that need to avoid clobbering flags
   768  type auxMark bool
   769  
   770  func (auxMark) CanBeAnSSAAux() {}
   771  
   772  var AuxMark auxMark
   773  
   774  // stringAux wraps string values for use in Aux.
   775  type stringAux string
   776  
   777  func (stringAux) CanBeAnSSAAux() {}
   778  
   779  func auxToString(i Aux) string {
   780  	return string(i.(stringAux))
   781  }
   782  func auxToSym(i Aux) Sym {
   783  	// TODO: kind of a hack - allows nil interface through
   784  	s, _ := i.(Sym)
   785  	return s
   786  }
   787  func auxToType(i Aux) *types.Type {
   788  	return i.(*types.Type)
   789  }
   790  func auxToCall(i Aux) *AuxCall {
   791  	return i.(*AuxCall)
   792  }
   793  func auxToS390xCCMask(i Aux) s390x.CCMask {
   794  	return i.(s390x.CCMask)
   795  }
   796  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   797  	return i.(s390x.RotateParams)
   798  }
   799  
   800  func StringToAux(s string) Aux {
   801  	return stringAux(s)
   802  }
   803  func symToAux(s Sym) Aux {
   804  	return s
   805  }
   806  func callToAux(s *AuxCall) Aux {
   807  	return s
   808  }
   809  func typeToAux(t *types.Type) Aux {
   810  	return t
   811  }
   812  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   813  	return c
   814  }
   815  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   816  	return r
   817  }
   818  
   819  // uaddOvf reports whether unsigned a+b would overflow.
   820  func uaddOvf(a, b int64) bool {
   821  	return uint64(a)+uint64(b) < uint64(a)
   822  }
   823  
   824  // loadLSymOffset simulates reading a word at an offset into a
   825  // read-only symbol's runtime memory. If it would read a pointer to
   826  // another symbol, that symbol is returned. Otherwise, it returns nil.
   827  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   828  	if lsym.Type != objabi.SRODATA {
   829  		return nil
   830  	}
   831  
   832  	for _, r := range lsym.R {
   833  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   834  			return r.Sym
   835  		}
   836  	}
   837  
   838  	return nil
   839  }
   840  
   841  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   842  	v.Op = OpStaticLECall
   843  	auxcall := v.Aux.(*AuxCall)
   844  	auxcall.Fn = sym
   845  	// Remove first arg
   846  	v.Args[0].Uses--
   847  	copy(v.Args[0:], v.Args[1:])
   848  	v.Args[len(v.Args)-1] = nil // aid GC
   849  	v.Args = v.Args[:len(v.Args)-1]
   850  	if f := v.Block.Func; f.pass.debug > 0 {
   851  		f.Warnl(v.Pos, "de-virtualizing call")
   852  	}
   853  	return v
   854  }
   855  
   856  // isSamePtr reports whether p1 and p2 point to the same address.
   857  func isSamePtr(p1, p2 *Value) bool {
   858  	if p1 == p2 {
   859  		return true
   860  	}
   861  	if p1.Op != p2.Op {
   862  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   863  			p1 = p1.Args[0]
   864  		}
   865  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   866  			p2 = p2.Args[0]
   867  		}
   868  		if p1 == p2 {
   869  			return true
   870  		}
   871  		if p1.Op != p2.Op {
   872  			return false
   873  		}
   874  	}
   875  	switch p1.Op {
   876  	case OpOffPtr:
   877  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   878  	case OpAddr, OpLocalAddr:
   879  		return p1.Aux == p2.Aux
   880  	case OpAddPtr:
   881  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   882  	}
   883  	return false
   884  }
   885  
   886  func isStackPtr(v *Value) bool {
   887  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   888  		v = v.Args[0]
   889  	}
   890  	return v.Op == OpSP || v.Op == OpLocalAddr
   891  }
   892  
   893  // disjoint reports whether the memory region specified by [p1:p1+n1)
   894  // does not overlap with [p2:p2+n2).
   895  // A return value of false does not imply the regions overlap.
   896  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   897  	if n1 == 0 || n2 == 0 {
   898  		return true
   899  	}
   900  	if p1 == p2 {
   901  		return false
   902  	}
   903  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   904  		base, offset = ptr, 0
   905  		for base.Op == OpOffPtr {
   906  			offset += base.AuxInt
   907  			base = base.Args[0]
   908  		}
   909  		if opcodeTable[base.Op].nilCheck {
   910  			base = base.Args[0]
   911  		}
   912  		return base, offset
   913  	}
   914  
   915  	// Run types-based analysis
   916  	if disjointTypes(p1.Type, p2.Type) {
   917  		return true
   918  	}
   919  
   920  	p1, off1 := baseAndOffset(p1)
   921  	p2, off2 := baseAndOffset(p2)
   922  	if isSamePtr(p1, p2) {
   923  		return !overlap(off1, n1, off2, n2)
   924  	}
   925  	// p1 and p2 are not the same, so if they are both OpAddrs then
   926  	// they point to different variables.
   927  	// If one pointer is on the stack and the other is an argument
   928  	// then they can't overlap.
   929  	switch p1.Op {
   930  	case OpAddr, OpLocalAddr:
   931  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   932  			return true
   933  		}
   934  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   935  	case OpArg, OpArgIntReg:
   936  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   937  			return true
   938  		}
   939  	case OpSP:
   940  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   941  	}
   942  	return false
   943  }
   944  
   945  // disjointTypes reports whether a memory region pointed to by a pointer of type
   946  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   947  // based on type aliasing rules.
   948  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   949  	// Unsafe pointer can alias with anything.
   950  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   951  		return false
   952  	}
   953  
   954  	if !t1.IsPtr() || !t2.IsPtr() {
   955  		panic("disjointTypes: one of arguments is not a pointer")
   956  	}
   957  
   958  	t1 = t1.Elem()
   959  	t2 = t2.Elem()
   960  
   961  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   962  	// type.HasPointers check doesn't work for them correctly.
   963  	if t1.NotInHeap() || t2.NotInHeap() {
   964  		return false
   965  	}
   966  
   967  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   968  
   969  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   970  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   971  		(isPtrShaped(t2) && !t1.HasPointers()) {
   972  		return true
   973  	}
   974  
   975  	return false
   976  }
   977  
   978  // moveSize returns the number of bytes an aligned MOV instruction moves.
   979  func moveSize(align int64, c *Config) int64 {
   980  	switch {
   981  	case align%8 == 0 && c.PtrSize == 8:
   982  		return 8
   983  	case align%4 == 0:
   984  		return 4
   985  	case align%2 == 0:
   986  		return 2
   987  	}
   988  	return 1
   989  }
   990  
   991  // mergePoint finds a block among a's blocks which dominates b and is itself
   992  // dominated by all of a's blocks. Returns nil if it can't find one.
   993  // Might return nil even if one does exist.
   994  func mergePoint(b *Block, a ...*Value) *Block {
   995  	// Walk backward from b looking for one of the a's blocks.
   996  
   997  	// Max distance
   998  	d := 100
   999  
  1000  	for d > 0 {
  1001  		for _, x := range a {
  1002  			if b == x.Block {
  1003  				goto found
  1004  			}
  1005  		}
  1006  		if len(b.Preds) > 1 {
  1007  			// Don't know which way to go back. Abort.
  1008  			return nil
  1009  		}
  1010  		b = b.Preds[0].b
  1011  		d--
  1012  	}
  1013  	return nil // too far away
  1014  found:
  1015  	// At this point, r is the first value in a that we find by walking backwards.
  1016  	// if we return anything, r will be it.
  1017  	r := b
  1018  
  1019  	// Keep going, counting the other a's that we find. They must all dominate r.
  1020  	na := 0
  1021  	for d > 0 {
  1022  		for _, x := range a {
  1023  			if b == x.Block {
  1024  				na++
  1025  			}
  1026  		}
  1027  		if na == len(a) {
  1028  			// Found all of a in a backwards walk. We can return r.
  1029  			return r
  1030  		}
  1031  		if len(b.Preds) > 1 {
  1032  			return nil
  1033  		}
  1034  		b = b.Preds[0].b
  1035  		d--
  1036  
  1037  	}
  1038  	return nil // too far away
  1039  }
  1040  
  1041  // clobber invalidates values. Returns true.
  1042  // clobber is used by rewrite rules to:
  1043  //
  1044  //	A) make sure the values are really dead and never used again.
  1045  //	B) decrement use counts of the values' args.
  1046  func clobber(vv ...*Value) bool {
  1047  	for _, v := range vv {
  1048  		v.reset(OpInvalid)
  1049  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1050  	}
  1051  	return true
  1052  }
  1053  
  1054  // resetCopy resets v to be a copy of arg.
  1055  // Always returns true.
  1056  func resetCopy(v *Value, arg *Value) bool {
  1057  	v.reset(OpCopy)
  1058  	v.AddArg(arg)
  1059  	return true
  1060  }
  1061  
  1062  // clobberIfDead resets v when use count is 1. Returns true.
  1063  // clobberIfDead is used by rewrite rules to decrement
  1064  // use counts of v's args when v is dead and never used.
  1065  func clobberIfDead(v *Value) bool {
  1066  	if v.Uses == 1 {
  1067  		v.reset(OpInvalid)
  1068  	}
  1069  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1070  	return true
  1071  }
  1072  
  1073  // noteRule is an easy way to track if a rule is matched when writing
  1074  // new ones.  Make the rule of interest also conditional on
  1075  //
  1076  //	noteRule("note to self: rule of interest matched")
  1077  //
  1078  // and that message will print when the rule matches.
  1079  func noteRule(s string) bool {
  1080  	fmt.Println(s)
  1081  	return true
  1082  }
  1083  
  1084  // countRule increments Func.ruleMatches[key].
  1085  // If Func.ruleMatches is non-nil at the end
  1086  // of compilation, it will be printed to stdout.
  1087  // This is intended to make it easier to find which functions
  1088  // which contain lots of rules matches when developing new rules.
  1089  func countRule(v *Value, key string) bool {
  1090  	f := v.Block.Func
  1091  	if f.ruleMatches == nil {
  1092  		f.ruleMatches = make(map[string]int)
  1093  	}
  1094  	f.ruleMatches[key]++
  1095  	return true
  1096  }
  1097  
  1098  // warnRule generates compiler debug output with string s when
  1099  // v is not in autogenerated code, cond is true and the rule has fired.
  1100  func warnRule(cond bool, v *Value, s string) bool {
  1101  	if pos := v.Pos; pos.Line() > 1 && cond {
  1102  		v.Block.Func.Warnl(pos, s)
  1103  	}
  1104  	return true
  1105  }
  1106  
  1107  // for a pseudo-op like (LessThan x), extract x.
  1108  func flagArg(v *Value) *Value {
  1109  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1110  		return nil
  1111  	}
  1112  	return v.Args[0]
  1113  }
  1114  
  1115  // arm64Negate finds the complement to an ARM64 condition code,
  1116  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1117  //
  1118  // For floating point, it's more subtle because NaN is unordered. We do
  1119  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1120  func arm64Negate(op Op) Op {
  1121  	switch op {
  1122  	case OpARM64LessThan:
  1123  		return OpARM64GreaterEqual
  1124  	case OpARM64LessThanU:
  1125  		return OpARM64GreaterEqualU
  1126  	case OpARM64GreaterThan:
  1127  		return OpARM64LessEqual
  1128  	case OpARM64GreaterThanU:
  1129  		return OpARM64LessEqualU
  1130  	case OpARM64LessEqual:
  1131  		return OpARM64GreaterThan
  1132  	case OpARM64LessEqualU:
  1133  		return OpARM64GreaterThanU
  1134  	case OpARM64GreaterEqual:
  1135  		return OpARM64LessThan
  1136  	case OpARM64GreaterEqualU:
  1137  		return OpARM64LessThanU
  1138  	case OpARM64Equal:
  1139  		return OpARM64NotEqual
  1140  	case OpARM64NotEqual:
  1141  		return OpARM64Equal
  1142  	case OpARM64LessThanF:
  1143  		return OpARM64NotLessThanF
  1144  	case OpARM64NotLessThanF:
  1145  		return OpARM64LessThanF
  1146  	case OpARM64LessEqualF:
  1147  		return OpARM64NotLessEqualF
  1148  	case OpARM64NotLessEqualF:
  1149  		return OpARM64LessEqualF
  1150  	case OpARM64GreaterThanF:
  1151  		return OpARM64NotGreaterThanF
  1152  	case OpARM64NotGreaterThanF:
  1153  		return OpARM64GreaterThanF
  1154  	case OpARM64GreaterEqualF:
  1155  		return OpARM64NotGreaterEqualF
  1156  	case OpARM64NotGreaterEqualF:
  1157  		return OpARM64GreaterEqualF
  1158  	default:
  1159  		panic("unreachable")
  1160  	}
  1161  }
  1162  
  1163  // arm64Invert evaluates (InvertFlags op), which
  1164  // is the same as altering the condition codes such
  1165  // that the same result would be produced if the arguments
  1166  // to the flag-generating instruction were reversed, e.g.
  1167  // (InvertFlags (CMP x y)) -> (CMP y x)
  1168  func arm64Invert(op Op) Op {
  1169  	switch op {
  1170  	case OpARM64LessThan:
  1171  		return OpARM64GreaterThan
  1172  	case OpARM64LessThanU:
  1173  		return OpARM64GreaterThanU
  1174  	case OpARM64GreaterThan:
  1175  		return OpARM64LessThan
  1176  	case OpARM64GreaterThanU:
  1177  		return OpARM64LessThanU
  1178  	case OpARM64LessEqual:
  1179  		return OpARM64GreaterEqual
  1180  	case OpARM64LessEqualU:
  1181  		return OpARM64GreaterEqualU
  1182  	case OpARM64GreaterEqual:
  1183  		return OpARM64LessEqual
  1184  	case OpARM64GreaterEqualU:
  1185  		return OpARM64LessEqualU
  1186  	case OpARM64Equal, OpARM64NotEqual:
  1187  		return op
  1188  	case OpARM64LessThanF:
  1189  		return OpARM64GreaterThanF
  1190  	case OpARM64GreaterThanF:
  1191  		return OpARM64LessThanF
  1192  	case OpARM64LessEqualF:
  1193  		return OpARM64GreaterEqualF
  1194  	case OpARM64GreaterEqualF:
  1195  		return OpARM64LessEqualF
  1196  	case OpARM64NotLessThanF:
  1197  		return OpARM64NotGreaterThanF
  1198  	case OpARM64NotGreaterThanF:
  1199  		return OpARM64NotLessThanF
  1200  	case OpARM64NotLessEqualF:
  1201  		return OpARM64NotGreaterEqualF
  1202  	case OpARM64NotGreaterEqualF:
  1203  		return OpARM64NotLessEqualF
  1204  	default:
  1205  		panic("unreachable")
  1206  	}
  1207  }
  1208  
  1209  // evaluate an ARM64 op against a flags value
  1210  // that is potentially constant; return 1 for true,
  1211  // -1 for false, and 0 for not constant.
  1212  func ccARM64Eval(op Op, flags *Value) int {
  1213  	fop := flags.Op
  1214  	if fop == OpARM64InvertFlags {
  1215  		return -ccARM64Eval(op, flags.Args[0])
  1216  	}
  1217  	if fop != OpARM64FlagConstant {
  1218  		return 0
  1219  	}
  1220  	fc := flagConstant(flags.AuxInt)
  1221  	b2i := func(b bool) int {
  1222  		if b {
  1223  			return 1
  1224  		}
  1225  		return -1
  1226  	}
  1227  	switch op {
  1228  	case OpARM64Equal:
  1229  		return b2i(fc.eq())
  1230  	case OpARM64NotEqual:
  1231  		return b2i(fc.ne())
  1232  	case OpARM64LessThan:
  1233  		return b2i(fc.lt())
  1234  	case OpARM64LessThanU:
  1235  		return b2i(fc.ult())
  1236  	case OpARM64GreaterThan:
  1237  		return b2i(fc.gt())
  1238  	case OpARM64GreaterThanU:
  1239  		return b2i(fc.ugt())
  1240  	case OpARM64LessEqual:
  1241  		return b2i(fc.le())
  1242  	case OpARM64LessEqualU:
  1243  		return b2i(fc.ule())
  1244  	case OpARM64GreaterEqual:
  1245  		return b2i(fc.ge())
  1246  	case OpARM64GreaterEqualU:
  1247  		return b2i(fc.uge())
  1248  	}
  1249  	return 0
  1250  }
  1251  
  1252  // logRule logs the use of the rule s. This will only be enabled if
  1253  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1254  func logRule(s string) {
  1255  	if ruleFile == nil {
  1256  		// Open a log file to write log to. We open in append
  1257  		// mode because all.bash runs the compiler lots of times,
  1258  		// and we want the concatenation of all of those logs.
  1259  		// This means, of course, that users need to rm the old log
  1260  		// to get fresh data.
  1261  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1262  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1263  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1264  		if err != nil {
  1265  			panic(err)
  1266  		}
  1267  		ruleFile = w
  1268  	}
  1269  	_, err := fmt.Fprintln(ruleFile, s)
  1270  	if err != nil {
  1271  		panic(err)
  1272  	}
  1273  }
  1274  
  1275  var ruleFile io.Writer
  1276  
  1277  func isConstZero(v *Value) bool {
  1278  	switch v.Op {
  1279  	case OpConstNil:
  1280  		return true
  1281  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1282  		return v.AuxInt == 0
  1283  	case OpStringMake, OpIMake, OpComplexMake:
  1284  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1285  	case OpSliceMake:
  1286  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1287  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1288  		return isConstZero(v.Args[0])
  1289  	}
  1290  	return false
  1291  }
  1292  
  1293  // reciprocalExact64 reports whether 1/c is exactly representable.
  1294  func reciprocalExact64(c float64) bool {
  1295  	b := math.Float64bits(c)
  1296  	man := b & (1<<52 - 1)
  1297  	if man != 0 {
  1298  		return false // not a power of 2, denormal, or NaN
  1299  	}
  1300  	exp := b >> 52 & (1<<11 - 1)
  1301  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1302  	// changes the exponent to 0x7fe-exp.
  1303  	switch exp {
  1304  	case 0:
  1305  		return false // ±0
  1306  	case 0x7ff:
  1307  		return false // ±inf
  1308  	case 0x7fe:
  1309  		return false // exponent is not representable
  1310  	default:
  1311  		return true
  1312  	}
  1313  }
  1314  
  1315  // reciprocalExact32 reports whether 1/c is exactly representable.
  1316  func reciprocalExact32(c float32) bool {
  1317  	b := math.Float32bits(c)
  1318  	man := b & (1<<23 - 1)
  1319  	if man != 0 {
  1320  		return false // not a power of 2, denormal, or NaN
  1321  	}
  1322  	exp := b >> 23 & (1<<8 - 1)
  1323  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1324  	// changes the exponent to 0xfe-exp.
  1325  	switch exp {
  1326  	case 0:
  1327  		return false // ±0
  1328  	case 0xff:
  1329  		return false // ±inf
  1330  	case 0xfe:
  1331  		return false // exponent is not representable
  1332  	default:
  1333  		return true
  1334  	}
  1335  }
  1336  
  1337  // check if an immediate can be directly encoded into an ARM's instruction.
  1338  func isARMImmRot(v uint32) bool {
  1339  	for i := 0; i < 16; i++ {
  1340  		if v&^0xff == 0 {
  1341  			return true
  1342  		}
  1343  		v = v<<2 | v>>30
  1344  	}
  1345  
  1346  	return false
  1347  }
  1348  
  1349  // overlap reports whether the ranges given by the given offset and
  1350  // size pairs overlap.
  1351  func overlap(offset1, size1, offset2, size2 int64) bool {
  1352  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1353  		return true
  1354  	}
  1355  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1356  		return true
  1357  	}
  1358  	return false
  1359  }
  1360  
  1361  // check if value zeroes out upper 32-bit of 64-bit register.
  1362  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1363  // because it catches same amount of cases as 4.
  1364  func zeroUpper32Bits(x *Value, depth int) bool {
  1365  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1366  		// If the value is signed, it might get re-sign-extended
  1367  		// during spill and restore. See issue 68227.
  1368  		return false
  1369  	}
  1370  	switch x.Op {
  1371  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1372  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1373  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1374  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1375  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1376  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1377  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1378  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1379  		OpAMD64SHLL, OpAMD64SHLLconst:
  1380  		return true
  1381  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1382  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1383  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1384  		return true
  1385  	case OpArg: // note: but not ArgIntReg
  1386  		// amd64 always loads args from the stack unsigned.
  1387  		// most other architectures load them sign/zero extended based on the type.
  1388  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1389  	case OpPhi, OpSelect0, OpSelect1:
  1390  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1391  		// just limit recursion depth.
  1392  		if depth <= 0 {
  1393  			return false
  1394  		}
  1395  		for i := range x.Args {
  1396  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1397  				return false
  1398  			}
  1399  		}
  1400  		return true
  1401  
  1402  	}
  1403  	return false
  1404  }
  1405  
  1406  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1407  func zeroUpper48Bits(x *Value, depth int) bool {
  1408  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1409  		return false
  1410  	}
  1411  	switch x.Op {
  1412  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1413  		return true
  1414  	case OpArg: // note: but not ArgIntReg
  1415  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1416  	case OpPhi, OpSelect0, OpSelect1:
  1417  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1418  		// just limit recursion depth.
  1419  		if depth <= 0 {
  1420  			return false
  1421  		}
  1422  		for i := range x.Args {
  1423  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1424  				return false
  1425  			}
  1426  		}
  1427  		return true
  1428  
  1429  	}
  1430  	return false
  1431  }
  1432  
  1433  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1434  func zeroUpper56Bits(x *Value, depth int) bool {
  1435  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1436  		return false
  1437  	}
  1438  	switch x.Op {
  1439  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1440  		return true
  1441  	case OpArg: // note: but not ArgIntReg
  1442  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1443  	case OpPhi, OpSelect0, OpSelect1:
  1444  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1445  		// just limit recursion depth.
  1446  		if depth <= 0 {
  1447  			return false
  1448  		}
  1449  		for i := range x.Args {
  1450  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1451  				return false
  1452  			}
  1453  		}
  1454  		return true
  1455  
  1456  	}
  1457  	return false
  1458  }
  1459  
  1460  func isInlinableMemclr(c *Config, sz int64) bool {
  1461  	if sz < 0 {
  1462  		return false
  1463  	}
  1464  	// TODO: expand this check to allow other architectures
  1465  	// see CL 454255 and issue 56997
  1466  	switch c.arch {
  1467  	case "amd64", "arm64":
  1468  		return true
  1469  	case "ppc64le", "ppc64", "loong64":
  1470  		return sz < 512
  1471  	}
  1472  	return false
  1473  }
  1474  
  1475  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1476  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1477  // safe, either because Move will do all of its loads before any of its stores, or
  1478  // because the arguments are known to be disjoint.
  1479  // This is used as a check for replacing memmove with Move ops.
  1480  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1481  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1482  	// Move ops may or may not be faster for large sizes depending on how the platform
  1483  	// lowers them, so we only perform this optimization on platforms that we know to
  1484  	// have fast Move ops.
  1485  	switch c.arch {
  1486  	case "amd64":
  1487  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1488  	case "386", "arm64":
  1489  		return sz <= 8
  1490  	case "s390x", "ppc64", "ppc64le":
  1491  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1492  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1493  		return sz <= 4
  1494  	}
  1495  	return false
  1496  }
  1497  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1498  	return isInlinableMemmove(dst, src, sz, c)
  1499  }
  1500  
  1501  // logLargeCopy logs the occurrence of a large copy.
  1502  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1503  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1504  func logLargeCopy(v *Value, s int64) bool {
  1505  	if s < 128 {
  1506  		return true
  1507  	}
  1508  	if logopt.Enabled() {
  1509  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1510  	}
  1511  	return true
  1512  }
  1513  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1514  	if s < 128 {
  1515  		return
  1516  	}
  1517  	if logopt.Enabled() {
  1518  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1519  	}
  1520  }
  1521  
  1522  // hasSmallRotate reports whether the architecture has rotate instructions
  1523  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1524  func hasSmallRotate(c *Config) bool {
  1525  	switch c.arch {
  1526  	case "amd64", "386":
  1527  		return true
  1528  	default:
  1529  		return false
  1530  	}
  1531  }
  1532  
  1533  func supportsPPC64PCRel() bool {
  1534  	// PCRel is currently supported for >= power10, linux only
  1535  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1536  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1537  }
  1538  
  1539  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1540  	if sh < 0 || sh >= sz {
  1541  		panic("PPC64 shift arg sh out of range")
  1542  	}
  1543  	if mb < 0 || mb >= sz {
  1544  		panic("PPC64 shift arg mb out of range")
  1545  	}
  1546  	if me < 0 || me >= sz {
  1547  		panic("PPC64 shift arg me out of range")
  1548  	}
  1549  	return int32(sh<<16 | mb<<8 | me)
  1550  }
  1551  
  1552  func GetPPC64Shiftsh(auxint int64) int64 {
  1553  	return int64(int8(auxint >> 16))
  1554  }
  1555  
  1556  func GetPPC64Shiftmb(auxint int64) int64 {
  1557  	return int64(int8(auxint >> 8))
  1558  }
  1559  
  1560  func GetPPC64Shiftme(auxint int64) int64 {
  1561  	return int64(int8(auxint))
  1562  }
  1563  
  1564  // Test if this value can encoded as a mask for a rlwinm like
  1565  // operation.  Masks can also extend from the msb and wrap to
  1566  // the lsb too.  That is, the valid masks are 32 bit strings
  1567  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1568  func isPPC64WordRotateMask(v64 int64) bool {
  1569  	// Isolate rightmost 1 (if none 0) and add.
  1570  	v := uint32(v64)
  1571  	vp := (v & -v) + v
  1572  	// Likewise, for the wrapping case.
  1573  	vn := ^v
  1574  	vpn := (vn & -vn) + vn
  1575  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1576  }
  1577  
  1578  // Compress mask and shift into single value of the form
  1579  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1580  // be used to regenerate the input mask.
  1581  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1582  	var mb, me, mbn, men int
  1583  
  1584  	// Determine boundaries and then decode them
  1585  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1586  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1587  	} else if nbits == 32 {
  1588  		mb = bits.LeadingZeros32(uint32(mask))
  1589  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1590  		mbn = bits.LeadingZeros32(^uint32(mask))
  1591  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1592  	} else {
  1593  		mb = bits.LeadingZeros64(uint64(mask))
  1594  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1595  		mbn = bits.LeadingZeros64(^uint64(mask))
  1596  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1597  	}
  1598  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1599  	if mb == 0 && me == int(nbits) {
  1600  		// swap the inverted values
  1601  		mb, me = men, mbn
  1602  	}
  1603  
  1604  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1605  }
  1606  
  1607  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1608  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1609  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1610  // operations can be combined. This functions assumes the two opcodes can
  1611  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1612  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1613  	mb := s
  1614  	r := 64 - s
  1615  	// A larger mb is a smaller mask.
  1616  	if (encoded>>8)&0xFF < mb {
  1617  		encoded = (encoded &^ 0xFF00) | mb<<8
  1618  	}
  1619  	// The rotate is expected to be 0.
  1620  	if (encoded & 0xFF0000) != 0 {
  1621  		panic("non-zero rotate")
  1622  	}
  1623  	return encoded | r<<16
  1624  }
  1625  
  1626  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1627  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1628  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1629  	auxint := uint64(sauxint)
  1630  	rotate = int64((auxint >> 16) & 0xFF)
  1631  	mb = int64((auxint >> 8) & 0xFF)
  1632  	me = int64((auxint >> 0) & 0xFF)
  1633  	nbits := int64((auxint >> 24) & 0xFF)
  1634  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1635  	if mb > me {
  1636  		mask = ^mask
  1637  	}
  1638  	if nbits == 32 {
  1639  		mask = uint64(uint32(mask))
  1640  	}
  1641  
  1642  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1643  	// is inclusive.
  1644  	me = (me - 1) & (nbits - 1)
  1645  	return
  1646  }
  1647  
  1648  // This verifies that the mask is a set of
  1649  // consecutive bits including the least
  1650  // significant bit.
  1651  func isPPC64ValidShiftMask(v int64) bool {
  1652  	if (v != 0) && ((v+1)&v) == 0 {
  1653  		return true
  1654  	}
  1655  	return false
  1656  }
  1657  
  1658  func getPPC64ShiftMaskLength(v int64) int64 {
  1659  	return int64(bits.Len64(uint64(v)))
  1660  }
  1661  
  1662  // Decompose a shift right into an equivalent rotate/mask,
  1663  // and return mask & m.
  1664  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1665  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1666  	return m & int64(smask)
  1667  }
  1668  
  1669  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1670  func mergePPC64AndSrwi(m, s int64) int64 {
  1671  	mask := mergePPC64RShiftMask(m, s, 32)
  1672  	if !isPPC64WordRotateMask(mask) {
  1673  		return 0
  1674  	}
  1675  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1676  }
  1677  
  1678  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1679  func mergePPC64AndSrdi(m, s int64) int64 {
  1680  	mask := mergePPC64RShiftMask(m, s, 64)
  1681  
  1682  	// Verify the rotate and mask result only uses the lower 32 bits.
  1683  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1684  	if rv&uint64(mask) != 0 {
  1685  		return 0
  1686  	}
  1687  	if !isPPC64WordRotateMask(mask) {
  1688  		return 0
  1689  	}
  1690  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1691  }
  1692  
  1693  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1694  func mergePPC64AndSldi(m, s int64) int64 {
  1695  	mask := -1 << s & m
  1696  
  1697  	// Verify the rotate and mask result only uses the lower 32 bits.
  1698  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1699  	if rv&uint64(mask) != 0 {
  1700  		return 0
  1701  	}
  1702  	if !isPPC64WordRotateMask(mask) {
  1703  		return 0
  1704  	}
  1705  	return encodePPC64RotateMask(s&31, mask, 32)
  1706  }
  1707  
  1708  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1709  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1710  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1711  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1712  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1713  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1714  
  1715  	// Rewrite mask to apply after the final left shift.
  1716  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1717  
  1718  	r_1 := 32 - srw
  1719  	r_2 := GetPPC64Shiftsh(sld)
  1720  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1721  
  1722  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1723  		return 0
  1724  	}
  1725  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1726  }
  1727  
  1728  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1729  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1730  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1731  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1732  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1733  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1734  
  1735  	// Rewrite mask to apply after the final left shift.
  1736  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1737  
  1738  	r_1 := 64 - srd
  1739  	r_2 := GetPPC64Shiftsh(sld)
  1740  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1741  
  1742  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1743  		return 0
  1744  	}
  1745  	// This combine only works when selecting and shifting the lower 32 bits.
  1746  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1747  	if v1&mask_3 != 0 {
  1748  		return 0
  1749  	}
  1750  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1751  }
  1752  
  1753  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1754  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1755  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1756  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1757  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1758  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1759  
  1760  	// combine the masks, and adjust for the final left shift.
  1761  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1762  	r_2 := GetPPC64Shiftsh(int64(sld))
  1763  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1764  
  1765  	// Verify the result is still a valid bitmask of <= 32 bits.
  1766  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1767  		return 0
  1768  	}
  1769  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1770  }
  1771  
  1772  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1773  // or 0 if they cannot be merged.
  1774  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1775  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1776  	mask_out := (mask_rlw & uint64(mask))
  1777  
  1778  	// Verify the result is still a valid bitmask of <= 32 bits.
  1779  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1780  		return 0
  1781  	}
  1782  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1783  }
  1784  
  1785  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1786  // result. Return rlw if it does, 0 otherwise.
  1787  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1788  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1789  	if mb > me {
  1790  		return 0
  1791  	}
  1792  	return rlw
  1793  }
  1794  
  1795  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1796  // or 0 if they cannot be merged.
  1797  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1798  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1799  
  1800  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1801  	r_mask := bits.RotateLeft32(mask, int(r))
  1802  
  1803  	mask_out := (mask_rlw & uint64(r_mask))
  1804  
  1805  	// Verify the result is still a valid bitmask of <= 32 bits.
  1806  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1807  		return 0
  1808  	}
  1809  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1810  }
  1811  
  1812  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1813  // or 0 if they cannot be merged.
  1814  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1815  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1816  	if mb > me || mb < sldi {
  1817  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1818  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1819  		return 0
  1820  	}
  1821  	// combine the masks, and adjust for the final left shift.
  1822  	mask_3 := mask_1 << sldi
  1823  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1824  
  1825  	// Verify the result is still a valid bitmask of <= 32 bits.
  1826  	if uint64(uint32(mask_3)) != mask_3 {
  1827  		return 0
  1828  	}
  1829  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1830  }
  1831  
  1832  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1833  // or return 0 if they cannot be combined.
  1834  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1835  	if sld > srw || srw >= 32 {
  1836  		return 0
  1837  	}
  1838  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1839  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1840  	mask := (mask_r & mask_l) << uint(sld)
  1841  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1842  }
  1843  
  1844  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1845  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1846  // of op.
  1847  //
  1848  // E.g consider the case:
  1849  // a = (ADD x y)
  1850  // b = (CMPconst [0] a)
  1851  // c = (OR a z)
  1852  //
  1853  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1854  // would produce:
  1855  // a  = (ADD x y)
  1856  // a' = (ADDCC x y)
  1857  // a” = (Select0 a')
  1858  // b  = (CMPconst [0] a”)
  1859  // c  = (OR a z)
  1860  //
  1861  // which makes it impossible to rewrite the second user. Instead the result
  1862  // of this conversion is:
  1863  // a' = (ADDCC x y)
  1864  // a  = (Select0 a')
  1865  // b  = (CMPconst [0] a)
  1866  // c  = (OR a z)
  1867  //
  1868  // Which makes it trivial to rewrite b using a lowering rule.
  1869  func convertPPC64OpToOpCC(op *Value) *Value {
  1870  	ccOpMap := map[Op]Op{
  1871  		OpPPC64ADD:      OpPPC64ADDCC,
  1872  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1873  		OpPPC64AND:      OpPPC64ANDCC,
  1874  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1875  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1876  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1877  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1878  		OpPPC64NEG:      OpPPC64NEGCC,
  1879  		OpPPC64NOR:      OpPPC64NORCC,
  1880  		OpPPC64OR:       OpPPC64ORCC,
  1881  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1882  		OpPPC64SUB:      OpPPC64SUBCC,
  1883  		OpPPC64XOR:      OpPPC64XORCC,
  1884  	}
  1885  	b := op.Block
  1886  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1887  	opCC.AddArgs(op.Args...)
  1888  	op.reset(OpSelect0)
  1889  	op.AddArgs(opCC)
  1890  	return op
  1891  }
  1892  
  1893  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1894  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1895  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1896  	if r != 0 || mask&0xFFFF != mask {
  1897  		return 0
  1898  	}
  1899  	return int64(mask)
  1900  }
  1901  
  1902  // Convenience function to rotate a 32 bit constant value by another constant.
  1903  func rotateLeft32(v, rotate int64) int64 {
  1904  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1905  }
  1906  
  1907  func rotateRight64(v, rotate int64) int64 {
  1908  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1909  }
  1910  
  1911  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1912  func armBFAuxInt(lsb, width int64) arm64BitField {
  1913  	if lsb < 0 || lsb > 63 {
  1914  		panic("ARM(64) bit field lsb constant out of range")
  1915  	}
  1916  	if width < 1 || lsb+width > 64 {
  1917  		panic("ARM(64) bit field width constant out of range")
  1918  	}
  1919  	return arm64BitField(width | lsb<<8)
  1920  }
  1921  
  1922  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1923  func (bfc arm64BitField) lsb() int64 {
  1924  	return int64(uint64(bfc) >> 8)
  1925  }
  1926  
  1927  // returns the width part of the auxInt field of arm64 bitfield ops.
  1928  func (bfc arm64BitField) width() int64 {
  1929  	return int64(bfc) & 0xff
  1930  }
  1931  
  1932  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1933  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1934  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1935  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1936  }
  1937  
  1938  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1939  func arm64BFWidth(mask, rshift int64) int64 {
  1940  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1941  	if shiftedMask == 0 {
  1942  		panic("ARM64 BF mask is zero")
  1943  	}
  1944  	return nto(shiftedMask)
  1945  }
  1946  
  1947  // registerizable reports whether t is a primitive type that fits in
  1948  // a register. It assumes float64 values will always fit into registers
  1949  // even if that isn't strictly true.
  1950  func registerizable(b *Block, typ *types.Type) bool {
  1951  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1952  		return true
  1953  	}
  1954  	if typ.IsInteger() {
  1955  		return typ.Size() <= b.Func.Config.RegSize
  1956  	}
  1957  	return false
  1958  }
  1959  
  1960  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1961  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1962  	f := v.Block.Func
  1963  	if !f.Config.Race {
  1964  		return false
  1965  	}
  1966  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1967  		return false
  1968  	}
  1969  	for _, b := range f.Blocks {
  1970  		for _, v := range b.Values {
  1971  			switch v.Op {
  1972  			case OpStaticCall, OpStaticLECall:
  1973  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1974  				// Allow calls to panic*
  1975  				s := v.Aux.(*AuxCall).Fn.String()
  1976  				switch s {
  1977  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1978  					"runtime.panicdivide", "runtime.panicwrap",
  1979  					"runtime.panicshift":
  1980  					continue
  1981  				}
  1982  				// If we encountered any call, we need to keep racefunc*,
  1983  				// for accurate stacktraces.
  1984  				return false
  1985  			case OpPanicBounds, OpPanicExtend:
  1986  				// Note: these are panic generators that are ok (like the static calls above).
  1987  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1988  				// We must keep the race functions if there are any other call types.
  1989  				return false
  1990  			}
  1991  		}
  1992  	}
  1993  	if isSameCall(sym, "runtime.racefuncenter") {
  1994  		// TODO REGISTER ABI this needs to be cleaned up.
  1995  		// If we're removing racefuncenter, remove its argument as well.
  1996  		if v.Args[0].Op != OpStore {
  1997  			if v.Op == OpStaticLECall {
  1998  				// there is no store, yet.
  1999  				return true
  2000  			}
  2001  			return false
  2002  		}
  2003  		mem := v.Args[0].Args[2]
  2004  		v.Args[0].reset(OpCopy)
  2005  		v.Args[0].AddArg(mem)
  2006  	}
  2007  	return true
  2008  }
  2009  
  2010  // symIsRO reports whether sym is a read-only global.
  2011  func symIsRO(sym Sym) bool {
  2012  	lsym := sym.(*obj.LSym)
  2013  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2014  }
  2015  
  2016  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2017  func symIsROZero(sym Sym) bool {
  2018  	lsym := sym.(*obj.LSym)
  2019  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2020  		return false
  2021  	}
  2022  	for _, b := range lsym.P {
  2023  		if b != 0 {
  2024  			return false
  2025  		}
  2026  	}
  2027  	return true
  2028  }
  2029  
  2030  // isFixed32 returns true if the int32 at offset off in symbol sym
  2031  // is known and constant.
  2032  func isFixed32(c *Config, sym Sym, off int64) bool {
  2033  	return isFixed(c, sym, off, 4)
  2034  }
  2035  
  2036  // isFixed returns true if the range [off,off+size] of the symbol sym
  2037  // is known and constant.
  2038  func isFixed(c *Config, sym Sym, off, size int64) bool {
  2039  	lsym := sym.(*obj.LSym)
  2040  	if lsym.Extra == nil {
  2041  		return false
  2042  	}
  2043  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2044  		if off == 2*c.PtrSize && size == 4 {
  2045  			return true // type hash field
  2046  		}
  2047  	}
  2048  	return false
  2049  }
  2050  func fixed32(c *Config, sym Sym, off int64) int32 {
  2051  	lsym := sym.(*obj.LSym)
  2052  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2053  		if off == 2*c.PtrSize {
  2054  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  2055  		}
  2056  	}
  2057  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  2058  	return 0
  2059  }
  2060  
  2061  // isFixedSym returns true if the contents of sym at the given offset
  2062  // is known and is the constant address of another symbol.
  2063  func isFixedSym(sym Sym, off int64) bool {
  2064  	lsym := sym.(*obj.LSym)
  2065  	switch {
  2066  	case lsym.Type == objabi.SRODATA:
  2067  		// itabs, dictionaries
  2068  	default:
  2069  		return false
  2070  	}
  2071  	for _, r := range lsym.R {
  2072  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2073  			return true
  2074  		}
  2075  	}
  2076  	return false
  2077  }
  2078  func fixedSym(f *Func, sym Sym, off int64) Sym {
  2079  	lsym := sym.(*obj.LSym)
  2080  	for _, r := range lsym.R {
  2081  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2082  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2083  				// In case we're loading a type out of a dictionary, we need to record
  2084  				// that the containing function might put that type in an interface.
  2085  				// That information is currently recorded in relocations in the dictionary,
  2086  				// but if we perform this load at compile time then the dictionary
  2087  				// might be dead.
  2088  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2089  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2090  				// Same, but if we're using an itab we need to record that the
  2091  				// itab._type might be put in an interface.
  2092  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2093  			}
  2094  			return r.Sym
  2095  		}
  2096  	}
  2097  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2098  	return nil
  2099  }
  2100  
  2101  // read8 reads one byte from the read-only global sym at offset off.
  2102  func read8(sym Sym, off int64) uint8 {
  2103  	lsym := sym.(*obj.LSym)
  2104  	if off >= int64(len(lsym.P)) || off < 0 {
  2105  		// Invalid index into the global sym.
  2106  		// This can happen in dead code, so we don't want to panic.
  2107  		// Just return any value, it will eventually get ignored.
  2108  		// See issue 29215.
  2109  		return 0
  2110  	}
  2111  	return lsym.P[off]
  2112  }
  2113  
  2114  // read16 reads two bytes from the read-only global sym at offset off.
  2115  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2116  	lsym := sym.(*obj.LSym)
  2117  	// lsym.P is written lazily.
  2118  	// Bytes requested after the end of lsym.P are 0.
  2119  	var src []byte
  2120  	if 0 <= off && off < int64(len(lsym.P)) {
  2121  		src = lsym.P[off:]
  2122  	}
  2123  	buf := make([]byte, 2)
  2124  	copy(buf, src)
  2125  	return byteorder.Uint16(buf)
  2126  }
  2127  
  2128  // read32 reads four bytes from the read-only global sym at offset off.
  2129  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2130  	lsym := sym.(*obj.LSym)
  2131  	var src []byte
  2132  	if 0 <= off && off < int64(len(lsym.P)) {
  2133  		src = lsym.P[off:]
  2134  	}
  2135  	buf := make([]byte, 4)
  2136  	copy(buf, src)
  2137  	return byteorder.Uint32(buf)
  2138  }
  2139  
  2140  // read64 reads eight bytes from the read-only global sym at offset off.
  2141  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2142  	lsym := sym.(*obj.LSym)
  2143  	var src []byte
  2144  	if 0 <= off && off < int64(len(lsym.P)) {
  2145  		src = lsym.P[off:]
  2146  	}
  2147  	buf := make([]byte, 8)
  2148  	copy(buf, src)
  2149  	return byteorder.Uint64(buf)
  2150  }
  2151  
  2152  // sequentialAddresses reports true if it can prove that x + n == y
  2153  func sequentialAddresses(x, y *Value, n int64) bool {
  2154  	if x == y && n == 0 {
  2155  		return true
  2156  	}
  2157  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2158  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2159  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2160  		return true
  2161  	}
  2162  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2163  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2164  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2165  		return true
  2166  	}
  2167  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2168  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2169  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2170  		return true
  2171  	}
  2172  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2173  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2174  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2175  		return true
  2176  	}
  2177  	return false
  2178  }
  2179  
  2180  // flagConstant represents the result of a compile-time comparison.
  2181  // The sense of these flags does not necessarily represent the hardware's notion
  2182  // of a flags register - these are just a compile-time construct.
  2183  // We happen to match the semantics to those of arm/arm64.
  2184  // Note that these semantics differ from x86: the carry flag has the opposite
  2185  // sense on a subtraction!
  2186  //
  2187  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2188  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2189  //	 (because it does x + ^y + C).
  2190  //
  2191  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2192  type flagConstant uint8
  2193  
  2194  // N reports whether the result of an operation is negative (high bit set).
  2195  func (fc flagConstant) N() bool {
  2196  	return fc&1 != 0
  2197  }
  2198  
  2199  // Z reports whether the result of an operation is 0.
  2200  func (fc flagConstant) Z() bool {
  2201  	return fc&2 != 0
  2202  }
  2203  
  2204  // C reports whether an unsigned add overflowed (carry), or an
  2205  // unsigned subtract did not underflow (borrow).
  2206  func (fc flagConstant) C() bool {
  2207  	return fc&4 != 0
  2208  }
  2209  
  2210  // V reports whether a signed operation overflowed or underflowed.
  2211  func (fc flagConstant) V() bool {
  2212  	return fc&8 != 0
  2213  }
  2214  
  2215  func (fc flagConstant) eq() bool {
  2216  	return fc.Z()
  2217  }
  2218  func (fc flagConstant) ne() bool {
  2219  	return !fc.Z()
  2220  }
  2221  func (fc flagConstant) lt() bool {
  2222  	return fc.N() != fc.V()
  2223  }
  2224  func (fc flagConstant) le() bool {
  2225  	return fc.Z() || fc.lt()
  2226  }
  2227  func (fc flagConstant) gt() bool {
  2228  	return !fc.Z() && fc.ge()
  2229  }
  2230  func (fc flagConstant) ge() bool {
  2231  	return fc.N() == fc.V()
  2232  }
  2233  func (fc flagConstant) ult() bool {
  2234  	return !fc.C()
  2235  }
  2236  func (fc flagConstant) ule() bool {
  2237  	return fc.Z() || fc.ult()
  2238  }
  2239  func (fc flagConstant) ugt() bool {
  2240  	return !fc.Z() && fc.uge()
  2241  }
  2242  func (fc flagConstant) uge() bool {
  2243  	return fc.C()
  2244  }
  2245  
  2246  func (fc flagConstant) ltNoov() bool {
  2247  	return fc.lt() && !fc.V()
  2248  }
  2249  func (fc flagConstant) leNoov() bool {
  2250  	return fc.le() && !fc.V()
  2251  }
  2252  func (fc flagConstant) gtNoov() bool {
  2253  	return fc.gt() && !fc.V()
  2254  }
  2255  func (fc flagConstant) geNoov() bool {
  2256  	return fc.ge() && !fc.V()
  2257  }
  2258  
  2259  func (fc flagConstant) String() string {
  2260  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2261  }
  2262  
  2263  type flagConstantBuilder struct {
  2264  	N bool
  2265  	Z bool
  2266  	C bool
  2267  	V bool
  2268  }
  2269  
  2270  func (fcs flagConstantBuilder) encode() flagConstant {
  2271  	var fc flagConstant
  2272  	if fcs.N {
  2273  		fc |= 1
  2274  	}
  2275  	if fcs.Z {
  2276  		fc |= 2
  2277  	}
  2278  	if fcs.C {
  2279  		fc |= 4
  2280  	}
  2281  	if fcs.V {
  2282  		fc |= 8
  2283  	}
  2284  	return fc
  2285  }
  2286  
  2287  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2288  //  - the results of the C flag are different
  2289  //  - the results of the V flag when y==minint are different
  2290  
  2291  // addFlags64 returns the flags that would be set from computing x+y.
  2292  func addFlags64(x, y int64) flagConstant {
  2293  	var fcb flagConstantBuilder
  2294  	fcb.Z = x+y == 0
  2295  	fcb.N = x+y < 0
  2296  	fcb.C = uint64(x+y) < uint64(x)
  2297  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2298  	return fcb.encode()
  2299  }
  2300  
  2301  // subFlags64 returns the flags that would be set from computing x-y.
  2302  func subFlags64(x, y int64) flagConstant {
  2303  	var fcb flagConstantBuilder
  2304  	fcb.Z = x-y == 0
  2305  	fcb.N = x-y < 0
  2306  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2307  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2308  	return fcb.encode()
  2309  }
  2310  
  2311  // addFlags32 returns the flags that would be set from computing x+y.
  2312  func addFlags32(x, y int32) flagConstant {
  2313  	var fcb flagConstantBuilder
  2314  	fcb.Z = x+y == 0
  2315  	fcb.N = x+y < 0
  2316  	fcb.C = uint32(x+y) < uint32(x)
  2317  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2318  	return fcb.encode()
  2319  }
  2320  
  2321  // subFlags32 returns the flags that would be set from computing x-y.
  2322  func subFlags32(x, y int32) flagConstant {
  2323  	var fcb flagConstantBuilder
  2324  	fcb.Z = x-y == 0
  2325  	fcb.N = x-y < 0
  2326  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2327  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2328  	return fcb.encode()
  2329  }
  2330  
  2331  // logicFlags64 returns flags set to the sign/zeroness of x.
  2332  // C and V are set to false.
  2333  func logicFlags64(x int64) flagConstant {
  2334  	var fcb flagConstantBuilder
  2335  	fcb.Z = x == 0
  2336  	fcb.N = x < 0
  2337  	return fcb.encode()
  2338  }
  2339  
  2340  // logicFlags32 returns flags set to the sign/zeroness of x.
  2341  // C and V are set to false.
  2342  func logicFlags32(x int32) flagConstant {
  2343  	var fcb flagConstantBuilder
  2344  	fcb.Z = x == 0
  2345  	fcb.N = x < 0
  2346  	return fcb.encode()
  2347  }
  2348  
  2349  func makeJumpTableSym(b *Block) *obj.LSym {
  2350  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2351  	// The jump table symbol is accessed only from the function symbol.
  2352  	s.Set(obj.AttrStatic, true)
  2353  	return s
  2354  }
  2355  
  2356  // canRotate reports whether the architecture supports
  2357  // rotates of integer registers with the given number of bits.
  2358  func canRotate(c *Config, bits int64) bool {
  2359  	if bits > c.PtrSize*8 {
  2360  		// Don't rewrite to rotates bigger than the machine word.
  2361  		return false
  2362  	}
  2363  	switch c.arch {
  2364  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2365  		return true
  2366  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2367  		return bits >= 32
  2368  	default:
  2369  		return false
  2370  	}
  2371  }
  2372  
  2373  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2374  func isARM64bitcon(x uint64) bool {
  2375  	if x == 1<<64-1 || x == 0 {
  2376  		return false
  2377  	}
  2378  	// determine the period and sign-extend a unit to 64 bits
  2379  	switch {
  2380  	case x != x>>32|x<<32:
  2381  		// period is 64
  2382  		// nothing to do
  2383  	case x != x>>16|x<<48:
  2384  		// period is 32
  2385  		x = uint64(int64(int32(x)))
  2386  	case x != x>>8|x<<56:
  2387  		// period is 16
  2388  		x = uint64(int64(int16(x)))
  2389  	case x != x>>4|x<<60:
  2390  		// period is 8
  2391  		x = uint64(int64(int8(x)))
  2392  	default:
  2393  		// period is 4 or 2, always true
  2394  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2395  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2396  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2397  		// 0101, 1010             -- 01   rotate, repeat
  2398  		return true
  2399  	}
  2400  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2401  }
  2402  
  2403  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2404  func sequenceOfOnes(x uint64) bool {
  2405  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2406  	y += x
  2407  	return (y-1)&y == 0
  2408  }
  2409  
  2410  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2411  func isARM64addcon(v int64) bool {
  2412  	/* uimm12 or uimm24? */
  2413  	if v < 0 {
  2414  		return false
  2415  	}
  2416  	if (v & 0xFFF) == 0 {
  2417  		v >>= 12
  2418  	}
  2419  	return v <= 0xFFF
  2420  }
  2421  
  2422  // setPos sets the position of v to pos, then returns true.
  2423  // Useful for setting the result of a rewrite's position to
  2424  // something other than the default.
  2425  func setPos(v *Value, pos src.XPos) bool {
  2426  	v.Pos = pos
  2427  	return true
  2428  }
  2429  
  2430  // isNonNegative reports whether v is known to be greater or equal to zero.
  2431  // Note that this is pretty simplistic. The prove pass generates more detailed
  2432  // nonnegative information about values.
  2433  func isNonNegative(v *Value) bool {
  2434  	if !v.Type.IsInteger() {
  2435  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2436  	}
  2437  	// TODO: return true if !v.Type.IsSigned()
  2438  	// SSA isn't type-safe enough to do that now (issue 37753).
  2439  	// The checks below depend only on the pattern of bits.
  2440  
  2441  	switch v.Op {
  2442  	case OpConst64:
  2443  		return v.AuxInt >= 0
  2444  
  2445  	case OpConst32:
  2446  		return int32(v.AuxInt) >= 0
  2447  
  2448  	case OpConst16:
  2449  		return int16(v.AuxInt) >= 0
  2450  
  2451  	case OpConst8:
  2452  		return int8(v.AuxInt) >= 0
  2453  
  2454  	case OpStringLen, OpSliceLen, OpSliceCap,
  2455  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2456  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2457  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2458  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2459  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2460  		return true
  2461  
  2462  	case OpRsh64Ux64, OpRsh32Ux64:
  2463  		by := v.Args[1]
  2464  		return by.Op == OpConst64 && by.AuxInt > 0
  2465  
  2466  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2467  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2468  		return isNonNegative(v.Args[0])
  2469  
  2470  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2471  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2472  
  2473  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2474  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2475  		OpOr64, OpOr32, OpOr16, OpOr8,
  2476  		OpXor64, OpXor32, OpXor16, OpXor8:
  2477  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2478  
  2479  		// We could handle OpPhi here, but the improvements from doing
  2480  		// so are very minor, and it is neither simple nor cheap.
  2481  	}
  2482  	return false
  2483  }
  2484  
  2485  func rewriteStructLoad(v *Value) *Value {
  2486  	b := v.Block
  2487  	ptr := v.Args[0]
  2488  	mem := v.Args[1]
  2489  
  2490  	t := v.Type
  2491  	args := make([]*Value, t.NumFields())
  2492  	for i := range args {
  2493  		ft := t.FieldType(i)
  2494  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2495  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2496  	}
  2497  
  2498  	v.reset(OpStructMake)
  2499  	v.AddArgs(args...)
  2500  	return v
  2501  }
  2502  
  2503  func rewriteStructStore(v *Value) *Value {
  2504  	b := v.Block
  2505  	dst := v.Args[0]
  2506  	x := v.Args[1]
  2507  	if x.Op != OpStructMake {
  2508  		base.Fatalf("invalid struct store: %v", x)
  2509  	}
  2510  	mem := v.Args[2]
  2511  
  2512  	t := x.Type
  2513  	for i, arg := range x.Args {
  2514  		ft := t.FieldType(i)
  2515  
  2516  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2517  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2518  	}
  2519  
  2520  	return mem
  2521  }
  2522  
  2523  // isDirectType reports whether v represents a type
  2524  // (a *runtime._type) whose value is stored directly in an
  2525  // interface (i.e., is pointer or pointer-like).
  2526  func isDirectType(v *Value) bool {
  2527  	return isDirectType1(v)
  2528  }
  2529  
  2530  // v is a type
  2531  func isDirectType1(v *Value) bool {
  2532  	switch v.Op {
  2533  	case OpITab:
  2534  		return isDirectType2(v.Args[0])
  2535  	case OpAddr:
  2536  		lsym := v.Aux.(*obj.LSym)
  2537  		if lsym.Extra == nil {
  2538  			return false
  2539  		}
  2540  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2541  			return types.IsDirectIface(ti.Type.(*types.Type))
  2542  		}
  2543  	}
  2544  	return false
  2545  }
  2546  
  2547  // v is an empty interface
  2548  func isDirectType2(v *Value) bool {
  2549  	switch v.Op {
  2550  	case OpIMake:
  2551  		return isDirectType1(v.Args[0])
  2552  	}
  2553  	return false
  2554  }
  2555  
  2556  // isDirectIface reports whether v represents an itab
  2557  // (a *runtime._itab) for a type whose value is stored directly
  2558  // in an interface (i.e., is pointer or pointer-like).
  2559  func isDirectIface(v *Value) bool {
  2560  	return isDirectIface1(v, 9)
  2561  }
  2562  
  2563  // v is an itab
  2564  func isDirectIface1(v *Value, depth int) bool {
  2565  	if depth == 0 {
  2566  		return false
  2567  	}
  2568  	switch v.Op {
  2569  	case OpITab:
  2570  		return isDirectIface2(v.Args[0], depth-1)
  2571  	case OpAddr:
  2572  		lsym := v.Aux.(*obj.LSym)
  2573  		if lsym.Extra == nil {
  2574  			return false
  2575  		}
  2576  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2577  			return types.IsDirectIface(ii.Type.(*types.Type))
  2578  		}
  2579  	case OpConstNil:
  2580  		// We can treat this as direct, because if the itab is
  2581  		// nil, the data field must be nil also.
  2582  		return true
  2583  	}
  2584  	return false
  2585  }
  2586  
  2587  // v is an interface
  2588  func isDirectIface2(v *Value, depth int) bool {
  2589  	if depth == 0 {
  2590  		return false
  2591  	}
  2592  	switch v.Op {
  2593  	case OpIMake:
  2594  		return isDirectIface1(v.Args[0], depth-1)
  2595  	case OpPhi:
  2596  		for _, a := range v.Args {
  2597  			if !isDirectIface2(a, depth-1) {
  2598  				return false
  2599  			}
  2600  		}
  2601  		return true
  2602  	}
  2603  	return false
  2604  }
  2605  
  2606  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2607  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2608  	r.sum, r.carry = int64(s), int64(c)
  2609  	return
  2610  }
  2611  
  2612  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2613  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2614  	r.hi, r.lo = int64(hi), int64(lo)
  2615  	return
  2616  }
  2617  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2618  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2619  	r.hi, r.lo = int32(hi), int32(lo)
  2620  	return
  2621  }
  2622  
  2623  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2624  func flagify(v *Value) bool {
  2625  	var flagVersion Op
  2626  	switch v.Op {
  2627  	case OpAMD64ADDQconst:
  2628  		flagVersion = OpAMD64ADDQconstflags
  2629  	case OpAMD64ADDLconst:
  2630  		flagVersion = OpAMD64ADDLconstflags
  2631  	default:
  2632  		base.Fatalf("can't flagify op %s", v.Op)
  2633  	}
  2634  	inner := v.copyInto(v.Block)
  2635  	inner.Op = flagVersion
  2636  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2637  	v.reset(OpSelect0)
  2638  	v.AddArg(inner)
  2639  	return true
  2640  }
  2641  

View as plain text