Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/rttype"
    12  	"cmd/compile/internal/types"
    13  	"cmd/internal/obj"
    14  	"cmd/internal/obj/s390x"
    15  	"cmd/internal/objabi"
    16  	"cmd/internal/src"
    17  	"encoding/binary"
    18  	"fmt"
    19  	"internal/buildcfg"
    20  	"io"
    21  	"math"
    22  	"math/bits"
    23  	"os"
    24  	"path/filepath"
    25  	"strings"
    26  )
    27  
    28  type deadValueChoice bool
    29  
    30  const (
    31  	leaveDeadValues  deadValueChoice = false
    32  	removeDeadValues                 = true
    33  
    34  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    35  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    36  )
    37  
    38  // deadcode indicates whether rewrite should try to remove any values that become dead.
    39  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    40  	// repeat rewrites until we find no more rewrites
    41  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    42  	pendingLines.clear()
    43  	debug := f.pass.debug
    44  	if debug > 1 {
    45  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    46  	}
    47  	// if the number of rewrite iterations reaches itersLimit we will
    48  	// at that point turn on cycle detection. Instead of a fixed limit,
    49  	// size the limit according to func size to allow for cases such
    50  	// as the one in issue #66773.
    51  	itersLimit := f.NumBlocks()
    52  	if itersLimit < 20 {
    53  		itersLimit = 20
    54  	}
    55  	var iters int
    56  	var states map[string]bool
    57  	for {
    58  		change := false
    59  		deadChange := false
    60  		for _, b := range f.Blocks {
    61  			var b0 *Block
    62  			if debug > 1 {
    63  				b0 = new(Block)
    64  				*b0 = *b
    65  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    66  			}
    67  			for i, c := range b.ControlValues() {
    68  				for c.Op == OpCopy {
    69  					c = c.Args[0]
    70  					b.ReplaceControl(i, c)
    71  				}
    72  			}
    73  			if rb(b) {
    74  				change = true
    75  				if debug > 1 {
    76  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    77  				}
    78  			}
    79  			for j, v := range b.Values {
    80  				var v0 *Value
    81  				if debug > 1 {
    82  					v0 = new(Value)
    83  					*v0 = *v
    84  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    85  				}
    86  				if v.Uses == 0 && v.removeable() {
    87  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    88  						// Reset any values that are now unused, so that we decrement
    89  						// the use count of all of its arguments.
    90  						// Not quite a deadcode pass, because it does not handle cycles.
    91  						// But it should help Uses==1 rules to fire.
    92  						v.reset(OpInvalid)
    93  						deadChange = true
    94  					}
    95  					// No point rewriting values which aren't used.
    96  					continue
    97  				}
    98  
    99  				vchange := phielimValue(v)
   100  				if vchange && debug > 1 {
   101  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   102  				}
   103  
   104  				// Eliminate copy inputs.
   105  				// If any copy input becomes unused, mark it
   106  				// as invalid and discard its argument. Repeat
   107  				// recursively on the discarded argument.
   108  				// This phase helps remove phantom "dead copy" uses
   109  				// of a value so that a x.Uses==1 rule condition
   110  				// fires reliably.
   111  				for i, a := range v.Args {
   112  					if a.Op != OpCopy {
   113  						continue
   114  					}
   115  					aa := copySource(a)
   116  					v.SetArg(i, aa)
   117  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   118  					// to hold it.  The first candidate is the value that will replace a (aa),
   119  					// if it shares the same block and line and is eligible.
   120  					// The second option is v, which has a as an input.  Because aa is earlier in
   121  					// the data flow, it is the better choice.
   122  					if a.Pos.IsStmt() == src.PosIsStmt {
   123  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   124  							aa.Pos = aa.Pos.WithIsStmt()
   125  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   126  							v.Pos = v.Pos.WithIsStmt()
   127  						} else {
   128  							// Record the lost line and look for a new home after all rewrites are complete.
   129  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   130  							// line to appear in more than one block, but only one block is stored, so if both end
   131  							// up here, then one will be lost.
   132  							pendingLines.set(a.Pos, int32(a.Block.ID))
   133  						}
   134  						a.Pos = a.Pos.WithNotStmt()
   135  					}
   136  					vchange = true
   137  					for a.Uses == 0 {
   138  						b := a.Args[0]
   139  						a.reset(OpInvalid)
   140  						a = b
   141  					}
   142  				}
   143  				if vchange && debug > 1 {
   144  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   145  				}
   146  
   147  				// apply rewrite function
   148  				if rv(v) {
   149  					vchange = true
   150  					// If value changed to a poor choice for a statement boundary, move the boundary
   151  					if v.Pos.IsStmt() == src.PosIsStmt {
   152  						if k := nextGoodStatementIndex(v, j, b); k != j {
   153  							v.Pos = v.Pos.WithNotStmt()
   154  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   155  						}
   156  					}
   157  				}
   158  
   159  				change = change || vchange
   160  				if vchange && debug > 1 {
   161  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   162  				}
   163  			}
   164  		}
   165  		if !change && !deadChange {
   166  			break
   167  		}
   168  		iters++
   169  		if (iters > itersLimit || debug >= 2) && change {
   170  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   171  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   172  			// and the maximum value encountered during make.bash is 12.
   173  			// Start checking for cycles. (This is too expensive to do routinely.)
   174  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   175  			if states == nil {
   176  				states = make(map[string]bool)
   177  			}
   178  			h := f.rewriteHash()
   179  			if _, ok := states[h]; ok {
   180  				// We've found a cycle.
   181  				// To diagnose it, set debug to 2 and start again,
   182  				// so that we'll print all rules applied until we complete another cycle.
   183  				// If debug is already >= 2, we've already done that, so it's time to crash.
   184  				if debug < 2 {
   185  					debug = 2
   186  					states = make(map[string]bool)
   187  				} else {
   188  					f.Fatalf("rewrite cycle detected")
   189  				}
   190  			}
   191  			states[h] = true
   192  		}
   193  	}
   194  	// remove clobbered values
   195  	for _, b := range f.Blocks {
   196  		j := 0
   197  		for i, v := range b.Values {
   198  			vl := v.Pos
   199  			if v.Op == OpInvalid {
   200  				if v.Pos.IsStmt() == src.PosIsStmt {
   201  					pendingLines.set(vl, int32(b.ID))
   202  				}
   203  				f.freeValue(v)
   204  				continue
   205  			}
   206  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   207  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   208  					pendingLines.remove(vl)
   209  					v.Pos = v.Pos.WithIsStmt()
   210  				}
   211  			}
   212  			if i != j {
   213  				b.Values[j] = v
   214  			}
   215  			j++
   216  		}
   217  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   218  			b.Pos = b.Pos.WithIsStmt()
   219  			pendingLines.remove(b.Pos)
   220  		}
   221  		b.truncateValues(j)
   222  	}
   223  }
   224  
   225  // Common functions called from rewriting rules
   226  
   227  func is64BitFloat(t *types.Type) bool {
   228  	return t.Size() == 8 && t.IsFloat()
   229  }
   230  
   231  func is32BitFloat(t *types.Type) bool {
   232  	return t.Size() == 4 && t.IsFloat()
   233  }
   234  
   235  func is64BitInt(t *types.Type) bool {
   236  	return t.Size() == 8 && t.IsInteger()
   237  }
   238  
   239  func is32BitInt(t *types.Type) bool {
   240  	return t.Size() == 4 && t.IsInteger()
   241  }
   242  
   243  func is16BitInt(t *types.Type) bool {
   244  	return t.Size() == 2 && t.IsInteger()
   245  }
   246  
   247  func is8BitInt(t *types.Type) bool {
   248  	return t.Size() == 1 && t.IsInteger()
   249  }
   250  
   251  func isPtr(t *types.Type) bool {
   252  	return t.IsPtrShaped()
   253  }
   254  
   255  func copyCompatibleType(t1, t2 *types.Type) bool {
   256  	if t1.Size() != t2.Size() {
   257  		return false
   258  	}
   259  	if t1.IsInteger() {
   260  		return t2.IsInteger()
   261  	}
   262  	if isPtr(t1) {
   263  		return isPtr(t2)
   264  	}
   265  	return t1.Compare(t2) == types.CMPeq
   266  }
   267  
   268  // mergeSym merges two symbolic offsets. There is no real merging of
   269  // offsets, we just pick the non-nil one.
   270  func mergeSym(x, y Sym) Sym {
   271  	if x == nil {
   272  		return y
   273  	}
   274  	if y == nil {
   275  		return x
   276  	}
   277  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   278  }
   279  
   280  func canMergeSym(x, y Sym) bool {
   281  	return x == nil || y == nil
   282  }
   283  
   284  // canMergeLoadClobber reports whether the load can be merged into target without
   285  // invalidating the schedule.
   286  // It also checks that the other non-load argument x is something we
   287  // are ok with clobbering.
   288  func canMergeLoadClobber(target, load, x *Value) bool {
   289  	// The register containing x is going to get clobbered.
   290  	// Don't merge if we still need the value of x.
   291  	// We don't have liveness information here, but we can
   292  	// approximate x dying with:
   293  	//  1) target is x's only use.
   294  	//  2) target is not in a deeper loop than x.
   295  	switch {
   296  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   297  		// This is a simple detector to determine that x is probably
   298  		// not live after target. (It does not need to be perfect,
   299  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   300  		// We have:
   301  		//   x = Phi(?, target)
   302  		//   target = Op(load, x)
   303  		// Because target has only one use as a Phi argument, we can schedule it
   304  		// very late. Hopefully, later than the other use of x. (The other use died
   305  		// between x and target, or exists on another branch entirely).
   306  	case x.Uses > 1:
   307  		return false
   308  	}
   309  	loopnest := x.Block.Func.loopnest()
   310  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   311  		return false
   312  	}
   313  	return canMergeLoad(target, load)
   314  }
   315  
   316  // canMergeLoad reports whether the load can be merged into target without
   317  // invalidating the schedule.
   318  func canMergeLoad(target, load *Value) bool {
   319  	if target.Block.ID != load.Block.ID {
   320  		// If the load is in a different block do not merge it.
   321  		return false
   322  	}
   323  
   324  	// We can't merge the load into the target if the load
   325  	// has more than one use.
   326  	if load.Uses != 1 {
   327  		return false
   328  	}
   329  
   330  	mem := load.MemoryArg()
   331  
   332  	// We need the load's memory arg to still be alive at target. That
   333  	// can't be the case if one of target's args depends on a memory
   334  	// state that is a successor of load's memory arg.
   335  	//
   336  	// For example, it would be invalid to merge load into target in
   337  	// the following situation because newmem has killed oldmem
   338  	// before target is reached:
   339  	//     load = read ... oldmem
   340  	//   newmem = write ... oldmem
   341  	//     arg0 = read ... newmem
   342  	//   target = add arg0 load
   343  	//
   344  	// If the argument comes from a different block then we can exclude
   345  	// it immediately because it must dominate load (which is in the
   346  	// same block as target).
   347  	var args []*Value
   348  	for _, a := range target.Args {
   349  		if a != load && a.Block.ID == target.Block.ID {
   350  			args = append(args, a)
   351  		}
   352  	}
   353  
   354  	// memPreds contains memory states known to be predecessors of load's
   355  	// memory state. It is lazily initialized.
   356  	var memPreds map[*Value]bool
   357  	for i := 0; len(args) > 0; i++ {
   358  		const limit = 100
   359  		if i >= limit {
   360  			// Give up if we have done a lot of iterations.
   361  			return false
   362  		}
   363  		v := args[len(args)-1]
   364  		args = args[:len(args)-1]
   365  		if target.Block.ID != v.Block.ID {
   366  			// Since target and load are in the same block
   367  			// we can stop searching when we leave the block.
   368  			continue
   369  		}
   370  		if v.Op == OpPhi {
   371  			// A Phi implies we have reached the top of the block.
   372  			// The memory phi, if it exists, is always
   373  			// the first logical store in the block.
   374  			continue
   375  		}
   376  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   377  			// We could handle this situation however it is likely
   378  			// to be very rare.
   379  			return false
   380  		}
   381  		if v.Op.SymEffect()&SymAddr != 0 {
   382  			// This case prevents an operation that calculates the
   383  			// address of a local variable from being forced to schedule
   384  			// before its corresponding VarDef.
   385  			// See issue 28445.
   386  			//   v1 = LOAD ...
   387  			//   v2 = VARDEF
   388  			//   v3 = LEAQ
   389  			//   v4 = CMPQ v1 v3
   390  			// We don't want to combine the CMPQ with the load, because
   391  			// that would force the CMPQ to schedule before the VARDEF, which
   392  			// in turn requires the LEAQ to schedule before the VARDEF.
   393  			return false
   394  		}
   395  		if v.Type.IsMemory() {
   396  			if memPreds == nil {
   397  				// Initialise a map containing memory states
   398  				// known to be predecessors of load's memory
   399  				// state.
   400  				memPreds = make(map[*Value]bool)
   401  				m := mem
   402  				const limit = 50
   403  				for i := 0; i < limit; i++ {
   404  					if m.Op == OpPhi {
   405  						// The memory phi, if it exists, is always
   406  						// the first logical store in the block.
   407  						break
   408  					}
   409  					if m.Block.ID != target.Block.ID {
   410  						break
   411  					}
   412  					if !m.Type.IsMemory() {
   413  						break
   414  					}
   415  					memPreds[m] = true
   416  					if len(m.Args) == 0 {
   417  						break
   418  					}
   419  					m = m.MemoryArg()
   420  				}
   421  			}
   422  
   423  			// We can merge if v is a predecessor of mem.
   424  			//
   425  			// For example, we can merge load into target in the
   426  			// following scenario:
   427  			//      x = read ... v
   428  			//    mem = write ... v
   429  			//   load = read ... mem
   430  			// target = add x load
   431  			if memPreds[v] {
   432  				continue
   433  			}
   434  			return false
   435  		}
   436  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   437  			// If v takes mem as an input then we know mem
   438  			// is valid at this point.
   439  			continue
   440  		}
   441  		for _, a := range v.Args {
   442  			if target.Block.ID == a.Block.ID {
   443  				args = append(args, a)
   444  			}
   445  		}
   446  	}
   447  
   448  	return true
   449  }
   450  
   451  // isSameCall reports whether aux is the same as the given named symbol.
   452  func isSameCall(aux Aux, name string) bool {
   453  	fn := aux.(*AuxCall).Fn
   454  	return fn != nil && fn.String() == name
   455  }
   456  
   457  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   458  func canLoadUnaligned(c *Config) bool {
   459  	return c.ctxt.Arch.Alignment == 1
   460  }
   461  
   462  // nlzX returns the number of leading zeros.
   463  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   464  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   465  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   466  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   467  
   468  // ntzX returns the number of trailing zeros.
   469  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   470  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   471  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   472  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   473  
   474  // oneBit reports whether x contains exactly one set bit.
   475  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   476  	return x&(x-1) == 0 && x != 0
   477  }
   478  
   479  // nto returns the number of trailing ones.
   480  func nto(x int64) int64 {
   481  	return int64(ntz64(^x))
   482  }
   483  
   484  // logX returns logarithm of n base 2.
   485  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   486  func log8(n int8) int64   { return log8u(uint8(n)) }
   487  func log16(n int16) int64 { return log16u(uint16(n)) }
   488  func log32(n int32) int64 { return log32u(uint32(n)) }
   489  func log64(n int64) int64 { return log64u(uint64(n)) }
   490  
   491  // logXu returns the logarithm of n base 2.
   492  // n must be a power of 2 (isUnsignedPowerOfTwo returns true)
   493  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   494  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   495  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   496  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   497  
   498  // isPowerOfTwoX functions report whether n is a power of 2.
   499  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   500  	return n > 0 && n&(n-1) == 0
   501  }
   502  
   503  // isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
   504  func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
   505  	return n != 0 && n&(n-1) == 0
   506  }
   507  
   508  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   509  func is32Bit(n int64) bool {
   510  	return n == int64(int32(n))
   511  }
   512  
   513  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   514  func is16Bit(n int64) bool {
   515  	return n == int64(int16(n))
   516  }
   517  
   518  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   519  func is8Bit(n int64) bool {
   520  	return n == int64(int8(n))
   521  }
   522  
   523  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   524  func isU8Bit(n int64) bool {
   525  	return n == int64(uint8(n))
   526  }
   527  
   528  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   529  func is12Bit(n int64) bool {
   530  	return -(1<<11) <= n && n < (1<<11)
   531  }
   532  
   533  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   534  func isU12Bit(n int64) bool {
   535  	return 0 <= n && n < (1<<12)
   536  }
   537  
   538  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   539  func isU16Bit(n int64) bool {
   540  	return n == int64(uint16(n))
   541  }
   542  
   543  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   544  func isU32Bit(n int64) bool {
   545  	return n == int64(uint32(n))
   546  }
   547  
   548  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   549  func is20Bit(n int64) bool {
   550  	return -(1<<19) <= n && n < (1<<19)
   551  }
   552  
   553  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   554  func b2i(b bool) int64 {
   555  	if b {
   556  		return 1
   557  	}
   558  	return 0
   559  }
   560  
   561  // b2i32 translates a boolean value to 0 or 1.
   562  func b2i32(b bool) int32 {
   563  	if b {
   564  		return 1
   565  	}
   566  	return 0
   567  }
   568  
   569  func canMulStrengthReduce(config *Config, x int64) bool {
   570  	_, ok := config.mulRecipes[x]
   571  	return ok
   572  }
   573  func canMulStrengthReduce32(config *Config, x int32) bool {
   574  	_, ok := config.mulRecipes[int64(x)]
   575  	return ok
   576  }
   577  
   578  // mulStrengthReduce returns v*x evaluated at the location
   579  // (block and source position) of m.
   580  // canMulStrengthReduce must have returned true.
   581  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   582  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   583  }
   584  
   585  // mulStrengthReduce32 returns v*x evaluated at the location
   586  // (block and source position) of m.
   587  // canMulStrengthReduce32 must have returned true.
   588  // The upper 32 bits of m might be set to junk.
   589  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   590  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   591  }
   592  
   593  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   594  // A shift is bounded if it is shifting by less than the width of the shifted value.
   595  func shiftIsBounded(v *Value) bool {
   596  	return v.AuxInt != 0
   597  }
   598  
   599  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   600  // generated code as much as possible.
   601  func canonLessThan(x, y *Value) bool {
   602  	if x.Op != y.Op {
   603  		return x.Op < y.Op
   604  	}
   605  	if !x.Pos.SameFileAndLine(y.Pos) {
   606  		return x.Pos.Before(y.Pos)
   607  	}
   608  	return x.ID < y.ID
   609  }
   610  
   611  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   612  // of the mantissa. It will panic if the truncation results in lost information.
   613  func truncate64Fto32F(f float64) float32 {
   614  	if !isExactFloat32(f) {
   615  		panic("truncate64Fto32F: truncation is not exact")
   616  	}
   617  	if !math.IsNaN(f) {
   618  		return float32(f)
   619  	}
   620  	// NaN bit patterns aren't necessarily preserved across conversion
   621  	// instructions so we need to do the conversion manually.
   622  	b := math.Float64bits(f)
   623  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   624  	//          | sign                  | exponent   | mantissa       |
   625  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   626  	return math.Float32frombits(r)
   627  }
   628  
   629  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   630  func DivisionNeedsFixUp(v *Value) bool {
   631  	return v.AuxInt == 0
   632  }
   633  
   634  // auxTo32F decodes a float32 from the AuxInt value provided.
   635  func auxTo32F(i int64) float32 {
   636  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   637  }
   638  
   639  func auxIntToBool(i int64) bool {
   640  	if i == 0 {
   641  		return false
   642  	}
   643  	return true
   644  }
   645  func auxIntToInt8(i int64) int8 {
   646  	return int8(i)
   647  }
   648  func auxIntToInt16(i int64) int16 {
   649  	return int16(i)
   650  }
   651  func auxIntToInt32(i int64) int32 {
   652  	return int32(i)
   653  }
   654  func auxIntToInt64(i int64) int64 {
   655  	return i
   656  }
   657  func auxIntToUint8(i int64) uint8 {
   658  	return uint8(i)
   659  }
   660  func auxIntToFloat32(i int64) float32 {
   661  	return float32(math.Float64frombits(uint64(i)))
   662  }
   663  func auxIntToFloat64(i int64) float64 {
   664  	return math.Float64frombits(uint64(i))
   665  }
   666  func auxIntToValAndOff(i int64) ValAndOff {
   667  	return ValAndOff(i)
   668  }
   669  func auxIntToArm64BitField(i int64) arm64BitField {
   670  	return arm64BitField(i)
   671  }
   672  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   673  	var params arm64ConditionalParams
   674  	params.cond = Op(i & 0xffff)
   675  	i >>= 16
   676  	params.nzcv = uint8(i & 0x0f)
   677  	i >>= 4
   678  	params.constValue = uint8(i & 0x1f)
   679  	i >>= 5
   680  	params.ind = i == 1
   681  	return params
   682  }
   683  func auxIntToFlagConstant(x int64) flagConstant {
   684  	return flagConstant(x)
   685  }
   686  
   687  func auxIntToOp(cc int64) Op {
   688  	return Op(cc)
   689  }
   690  
   691  func boolToAuxInt(b bool) int64 {
   692  	if b {
   693  		return 1
   694  	}
   695  	return 0
   696  }
   697  func int8ToAuxInt(i int8) int64 {
   698  	return int64(i)
   699  }
   700  func int16ToAuxInt(i int16) int64 {
   701  	return int64(i)
   702  }
   703  func int32ToAuxInt(i int32) int64 {
   704  	return int64(i)
   705  }
   706  func int64ToAuxInt(i int64) int64 {
   707  	return int64(i)
   708  }
   709  func uint8ToAuxInt(i uint8) int64 {
   710  	return int64(int8(i))
   711  }
   712  func float32ToAuxInt(f float32) int64 {
   713  	return int64(math.Float64bits(float64(f)))
   714  }
   715  func float64ToAuxInt(f float64) int64 {
   716  	return int64(math.Float64bits(f))
   717  }
   718  func valAndOffToAuxInt(v ValAndOff) int64 {
   719  	return int64(v)
   720  }
   721  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   722  	return int64(v)
   723  }
   724  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   725  	if v.cond&^0xffff != 0 {
   726  		panic("condition value exceeds 16 bits")
   727  	}
   728  
   729  	var i int64
   730  	if v.ind {
   731  		i = 1 << 25
   732  	}
   733  	i |= int64(v.constValue) << 20
   734  	i |= int64(v.nzcv) << 16
   735  	i |= int64(v.cond)
   736  	return i
   737  }
   738  func flagConstantToAuxInt(x flagConstant) int64 {
   739  	return int64(x)
   740  }
   741  
   742  func opToAuxInt(o Op) int64 {
   743  	return int64(o)
   744  }
   745  
   746  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   747  type Aux interface {
   748  	CanBeAnSSAAux()
   749  }
   750  
   751  // for now only used to mark moves that need to avoid clobbering flags
   752  type auxMark bool
   753  
   754  func (auxMark) CanBeAnSSAAux() {}
   755  
   756  var AuxMark auxMark
   757  
   758  // stringAux wraps string values for use in Aux.
   759  type stringAux string
   760  
   761  func (stringAux) CanBeAnSSAAux() {}
   762  
   763  func auxToString(i Aux) string {
   764  	return string(i.(stringAux))
   765  }
   766  func auxToSym(i Aux) Sym {
   767  	// TODO: kind of a hack - allows nil interface through
   768  	s, _ := i.(Sym)
   769  	return s
   770  }
   771  func auxToType(i Aux) *types.Type {
   772  	return i.(*types.Type)
   773  }
   774  func auxToCall(i Aux) *AuxCall {
   775  	return i.(*AuxCall)
   776  }
   777  func auxToS390xCCMask(i Aux) s390x.CCMask {
   778  	return i.(s390x.CCMask)
   779  }
   780  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   781  	return i.(s390x.RotateParams)
   782  }
   783  
   784  func StringToAux(s string) Aux {
   785  	return stringAux(s)
   786  }
   787  func symToAux(s Sym) Aux {
   788  	return s
   789  }
   790  func callToAux(s *AuxCall) Aux {
   791  	return s
   792  }
   793  func typeToAux(t *types.Type) Aux {
   794  	return t
   795  }
   796  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   797  	return c
   798  }
   799  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   800  	return r
   801  }
   802  
   803  // uaddOvf reports whether unsigned a+b would overflow.
   804  func uaddOvf(a, b int64) bool {
   805  	return uint64(a)+uint64(b) < uint64(a)
   806  }
   807  
   808  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   809  	v.Op = OpStaticLECall
   810  	auxcall := v.Aux.(*AuxCall)
   811  	auxcall.Fn = sym
   812  	// Remove first arg
   813  	v.Args[0].Uses--
   814  	copy(v.Args[0:], v.Args[1:])
   815  	v.Args[len(v.Args)-1] = nil // aid GC
   816  	v.Args = v.Args[:len(v.Args)-1]
   817  	if f := v.Block.Func; f.pass.debug > 0 {
   818  		f.Warnl(v.Pos, "de-virtualizing call")
   819  	}
   820  	return v
   821  }
   822  
   823  // isSamePtr reports whether p1 and p2 point to the same address.
   824  func isSamePtr(p1, p2 *Value) bool {
   825  	if p1 == p2 {
   826  		return true
   827  	}
   828  	if p1.Op != p2.Op {
   829  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   830  			p1 = p1.Args[0]
   831  		}
   832  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   833  			p2 = p2.Args[0]
   834  		}
   835  		if p1 == p2 {
   836  			return true
   837  		}
   838  		if p1.Op != p2.Op {
   839  			return false
   840  		}
   841  	}
   842  	switch p1.Op {
   843  	case OpOffPtr:
   844  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   845  	case OpAddr, OpLocalAddr:
   846  		return p1.Aux == p2.Aux
   847  	case OpAddPtr:
   848  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   849  	}
   850  	return false
   851  }
   852  
   853  func isStackPtr(v *Value) bool {
   854  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   855  		v = v.Args[0]
   856  	}
   857  	return v.Op == OpSP || v.Op == OpLocalAddr
   858  }
   859  
   860  // disjoint reports whether the memory region specified by [p1:p1+n1)
   861  // does not overlap with [p2:p2+n2).
   862  // A return value of false does not imply the regions overlap.
   863  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   864  	if n1 == 0 || n2 == 0 {
   865  		return true
   866  	}
   867  	if p1 == p2 {
   868  		return false
   869  	}
   870  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   871  		base, offset = ptr, 0
   872  		for base.Op == OpOffPtr {
   873  			offset += base.AuxInt
   874  			base = base.Args[0]
   875  		}
   876  		if opcodeTable[base.Op].nilCheck {
   877  			base = base.Args[0]
   878  		}
   879  		return base, offset
   880  	}
   881  
   882  	// Run types-based analysis
   883  	if disjointTypes(p1.Type, p2.Type) {
   884  		return true
   885  	}
   886  
   887  	p1, off1 := baseAndOffset(p1)
   888  	p2, off2 := baseAndOffset(p2)
   889  	if isSamePtr(p1, p2) {
   890  		return !overlap(off1, n1, off2, n2)
   891  	}
   892  	// p1 and p2 are not the same, so if they are both OpAddrs then
   893  	// they point to different variables.
   894  	// If one pointer is on the stack and the other is an argument
   895  	// then they can't overlap.
   896  	switch p1.Op {
   897  	case OpAddr, OpLocalAddr:
   898  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   899  			return true
   900  		}
   901  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   902  	case OpArg, OpArgIntReg:
   903  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   904  			return true
   905  		}
   906  	case OpSP:
   907  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   908  	}
   909  	return false
   910  }
   911  
   912  // disjointTypes reports whether a memory region pointed to by a pointer of type
   913  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   914  // based on type aliasing rules.
   915  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   916  	// Unsafe pointer can alias with anything.
   917  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   918  		return false
   919  	}
   920  
   921  	if !t1.IsPtr() || !t2.IsPtr() {
   922  		panic("disjointTypes: one of arguments is not a pointer")
   923  	}
   924  
   925  	t1 = t1.Elem()
   926  	t2 = t2.Elem()
   927  
   928  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   929  	// type.HasPointers check doesn't work for them correctly.
   930  	if t1.NotInHeap() || t2.NotInHeap() {
   931  		return false
   932  	}
   933  
   934  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   935  
   936  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   937  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   938  		(isPtrShaped(t2) && !t1.HasPointers()) {
   939  		return true
   940  	}
   941  
   942  	return false
   943  }
   944  
   945  // moveSize returns the number of bytes an aligned MOV instruction moves.
   946  func moveSize(align int64, c *Config) int64 {
   947  	switch {
   948  	case align%8 == 0 && c.PtrSize == 8:
   949  		return 8
   950  	case align%4 == 0:
   951  		return 4
   952  	case align%2 == 0:
   953  		return 2
   954  	}
   955  	return 1
   956  }
   957  
   958  // mergePoint finds a block among a's blocks which dominates b and is itself
   959  // dominated by all of a's blocks. Returns nil if it can't find one.
   960  // Might return nil even if one does exist.
   961  func mergePoint(b *Block, a ...*Value) *Block {
   962  	// Walk backward from b looking for one of the a's blocks.
   963  
   964  	// Max distance
   965  	d := 100
   966  
   967  	for d > 0 {
   968  		for _, x := range a {
   969  			if b == x.Block {
   970  				goto found
   971  			}
   972  		}
   973  		if len(b.Preds) > 1 {
   974  			// Don't know which way to go back. Abort.
   975  			return nil
   976  		}
   977  		b = b.Preds[0].b
   978  		d--
   979  	}
   980  	return nil // too far away
   981  found:
   982  	// At this point, r is the first value in a that we find by walking backwards.
   983  	// if we return anything, r will be it.
   984  	r := b
   985  
   986  	// Keep going, counting the other a's that we find. They must all dominate r.
   987  	na := 0
   988  	for d > 0 {
   989  		for _, x := range a {
   990  			if b == x.Block {
   991  				na++
   992  			}
   993  		}
   994  		if na == len(a) {
   995  			// Found all of a in a backwards walk. We can return r.
   996  			return r
   997  		}
   998  		if len(b.Preds) > 1 {
   999  			return nil
  1000  		}
  1001  		b = b.Preds[0].b
  1002  		d--
  1003  
  1004  	}
  1005  	return nil // too far away
  1006  }
  1007  
  1008  // clobber invalidates values. Returns true.
  1009  // clobber is used by rewrite rules to:
  1010  //
  1011  //	A) make sure the values are really dead and never used again.
  1012  //	B) decrement use counts of the values' args.
  1013  func clobber(vv ...*Value) bool {
  1014  	for _, v := range vv {
  1015  		v.reset(OpInvalid)
  1016  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1017  	}
  1018  	return true
  1019  }
  1020  
  1021  // resetCopy resets v to be a copy of arg.
  1022  // Always returns true.
  1023  func resetCopy(v *Value, arg *Value) bool {
  1024  	v.reset(OpCopy)
  1025  	v.AddArg(arg)
  1026  	return true
  1027  }
  1028  
  1029  // clobberIfDead resets v when use count is 1. Returns true.
  1030  // clobberIfDead is used by rewrite rules to decrement
  1031  // use counts of v's args when v is dead and never used.
  1032  func clobberIfDead(v *Value) bool {
  1033  	if v.Uses == 1 {
  1034  		v.reset(OpInvalid)
  1035  	}
  1036  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1037  	return true
  1038  }
  1039  
  1040  // noteRule is an easy way to track if a rule is matched when writing
  1041  // new ones.  Make the rule of interest also conditional on
  1042  //
  1043  //	noteRule("note to self: rule of interest matched")
  1044  //
  1045  // and that message will print when the rule matches.
  1046  func noteRule(s string) bool {
  1047  	fmt.Println(s)
  1048  	return true
  1049  }
  1050  
  1051  // countRule increments Func.ruleMatches[key].
  1052  // If Func.ruleMatches is non-nil at the end
  1053  // of compilation, it will be printed to stdout.
  1054  // This is intended to make it easier to find which functions
  1055  // which contain lots of rules matches when developing new rules.
  1056  func countRule(v *Value, key string) bool {
  1057  	f := v.Block.Func
  1058  	if f.ruleMatches == nil {
  1059  		f.ruleMatches = make(map[string]int)
  1060  	}
  1061  	f.ruleMatches[key]++
  1062  	return true
  1063  }
  1064  
  1065  // warnRule generates compiler debug output with string s when
  1066  // v is not in autogenerated code, cond is true and the rule has fired.
  1067  func warnRule(cond bool, v *Value, s string) bool {
  1068  	if pos := v.Pos; pos.Line() > 1 && cond {
  1069  		v.Block.Func.Warnl(pos, s)
  1070  	}
  1071  	return true
  1072  }
  1073  
  1074  // for a pseudo-op like (LessThan x), extract x.
  1075  func flagArg(v *Value) *Value {
  1076  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1077  		return nil
  1078  	}
  1079  	return v.Args[0]
  1080  }
  1081  
  1082  // arm64Negate finds the complement to an ARM64 condition code,
  1083  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1084  //
  1085  // For floating point, it's more subtle because NaN is unordered. We do
  1086  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1087  func arm64Negate(op Op) Op {
  1088  	switch op {
  1089  	case OpARM64LessThan:
  1090  		return OpARM64GreaterEqual
  1091  	case OpARM64LessThanU:
  1092  		return OpARM64GreaterEqualU
  1093  	case OpARM64GreaterThan:
  1094  		return OpARM64LessEqual
  1095  	case OpARM64GreaterThanU:
  1096  		return OpARM64LessEqualU
  1097  	case OpARM64LessEqual:
  1098  		return OpARM64GreaterThan
  1099  	case OpARM64LessEqualU:
  1100  		return OpARM64GreaterThanU
  1101  	case OpARM64GreaterEqual:
  1102  		return OpARM64LessThan
  1103  	case OpARM64GreaterEqualU:
  1104  		return OpARM64LessThanU
  1105  	case OpARM64Equal:
  1106  		return OpARM64NotEqual
  1107  	case OpARM64NotEqual:
  1108  		return OpARM64Equal
  1109  	case OpARM64LessThanF:
  1110  		return OpARM64NotLessThanF
  1111  	case OpARM64NotLessThanF:
  1112  		return OpARM64LessThanF
  1113  	case OpARM64LessEqualF:
  1114  		return OpARM64NotLessEqualF
  1115  	case OpARM64NotLessEqualF:
  1116  		return OpARM64LessEqualF
  1117  	case OpARM64GreaterThanF:
  1118  		return OpARM64NotGreaterThanF
  1119  	case OpARM64NotGreaterThanF:
  1120  		return OpARM64GreaterThanF
  1121  	case OpARM64GreaterEqualF:
  1122  		return OpARM64NotGreaterEqualF
  1123  	case OpARM64NotGreaterEqualF:
  1124  		return OpARM64GreaterEqualF
  1125  	default:
  1126  		panic("unreachable")
  1127  	}
  1128  }
  1129  
  1130  // arm64Invert evaluates (InvertFlags op), which
  1131  // is the same as altering the condition codes such
  1132  // that the same result would be produced if the arguments
  1133  // to the flag-generating instruction were reversed, e.g.
  1134  // (InvertFlags (CMP x y)) -> (CMP y x)
  1135  func arm64Invert(op Op) Op {
  1136  	switch op {
  1137  	case OpARM64LessThan:
  1138  		return OpARM64GreaterThan
  1139  	case OpARM64LessThanU:
  1140  		return OpARM64GreaterThanU
  1141  	case OpARM64GreaterThan:
  1142  		return OpARM64LessThan
  1143  	case OpARM64GreaterThanU:
  1144  		return OpARM64LessThanU
  1145  	case OpARM64LessEqual:
  1146  		return OpARM64GreaterEqual
  1147  	case OpARM64LessEqualU:
  1148  		return OpARM64GreaterEqualU
  1149  	case OpARM64GreaterEqual:
  1150  		return OpARM64LessEqual
  1151  	case OpARM64GreaterEqualU:
  1152  		return OpARM64LessEqualU
  1153  	case OpARM64Equal, OpARM64NotEqual:
  1154  		return op
  1155  	case OpARM64LessThanF:
  1156  		return OpARM64GreaterThanF
  1157  	case OpARM64GreaterThanF:
  1158  		return OpARM64LessThanF
  1159  	case OpARM64LessEqualF:
  1160  		return OpARM64GreaterEqualF
  1161  	case OpARM64GreaterEqualF:
  1162  		return OpARM64LessEqualF
  1163  	case OpARM64NotLessThanF:
  1164  		return OpARM64NotGreaterThanF
  1165  	case OpARM64NotGreaterThanF:
  1166  		return OpARM64NotLessThanF
  1167  	case OpARM64NotLessEqualF:
  1168  		return OpARM64NotGreaterEqualF
  1169  	case OpARM64NotGreaterEqualF:
  1170  		return OpARM64NotLessEqualF
  1171  	default:
  1172  		panic("unreachable")
  1173  	}
  1174  }
  1175  
  1176  // evaluate an ARM64 op against a flags value
  1177  // that is potentially constant; return 1 for true,
  1178  // -1 for false, and 0 for not constant.
  1179  func ccARM64Eval(op Op, flags *Value) int {
  1180  	fop := flags.Op
  1181  	if fop == OpARM64InvertFlags {
  1182  		return -ccARM64Eval(op, flags.Args[0])
  1183  	}
  1184  	if fop != OpARM64FlagConstant {
  1185  		return 0
  1186  	}
  1187  	fc := flagConstant(flags.AuxInt)
  1188  	b2i := func(b bool) int {
  1189  		if b {
  1190  			return 1
  1191  		}
  1192  		return -1
  1193  	}
  1194  	switch op {
  1195  	case OpARM64Equal:
  1196  		return b2i(fc.eq())
  1197  	case OpARM64NotEqual:
  1198  		return b2i(fc.ne())
  1199  	case OpARM64LessThan:
  1200  		return b2i(fc.lt())
  1201  	case OpARM64LessThanU:
  1202  		return b2i(fc.ult())
  1203  	case OpARM64GreaterThan:
  1204  		return b2i(fc.gt())
  1205  	case OpARM64GreaterThanU:
  1206  		return b2i(fc.ugt())
  1207  	case OpARM64LessEqual:
  1208  		return b2i(fc.le())
  1209  	case OpARM64LessEqualU:
  1210  		return b2i(fc.ule())
  1211  	case OpARM64GreaterEqual:
  1212  		return b2i(fc.ge())
  1213  	case OpARM64GreaterEqualU:
  1214  		return b2i(fc.uge())
  1215  	}
  1216  	return 0
  1217  }
  1218  
  1219  // logRule logs the use of the rule s. This will only be enabled if
  1220  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1221  func logRule(s string) {
  1222  	if ruleFile == nil {
  1223  		// Open a log file to write log to. We open in append
  1224  		// mode because all.bash runs the compiler lots of times,
  1225  		// and we want the concatenation of all of those logs.
  1226  		// This means, of course, that users need to rm the old log
  1227  		// to get fresh data.
  1228  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1229  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1230  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1231  		if err != nil {
  1232  			panic(err)
  1233  		}
  1234  		ruleFile = w
  1235  	}
  1236  	_, err := fmt.Fprintln(ruleFile, s)
  1237  	if err != nil {
  1238  		panic(err)
  1239  	}
  1240  }
  1241  
  1242  var ruleFile io.Writer
  1243  
  1244  func isConstZero(v *Value) bool {
  1245  	switch v.Op {
  1246  	case OpConstNil:
  1247  		return true
  1248  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1249  		return v.AuxInt == 0
  1250  	case OpStringMake, OpIMake, OpComplexMake:
  1251  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1252  	case OpSliceMake:
  1253  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1254  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1255  		return isConstZero(v.Args[0])
  1256  	}
  1257  	return false
  1258  }
  1259  
  1260  // reciprocalExact64 reports whether 1/c is exactly representable.
  1261  func reciprocalExact64(c float64) bool {
  1262  	b := math.Float64bits(c)
  1263  	man := b & (1<<52 - 1)
  1264  	if man != 0 {
  1265  		return false // not a power of 2, denormal, or NaN
  1266  	}
  1267  	exp := b >> 52 & (1<<11 - 1)
  1268  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1269  	// changes the exponent to 0x7fe-exp.
  1270  	switch exp {
  1271  	case 0:
  1272  		return false // ±0
  1273  	case 0x7ff:
  1274  		return false // ±inf
  1275  	case 0x7fe:
  1276  		return false // exponent is not representable
  1277  	default:
  1278  		return true
  1279  	}
  1280  }
  1281  
  1282  // reciprocalExact32 reports whether 1/c is exactly representable.
  1283  func reciprocalExact32(c float32) bool {
  1284  	b := math.Float32bits(c)
  1285  	man := b & (1<<23 - 1)
  1286  	if man != 0 {
  1287  		return false // not a power of 2, denormal, or NaN
  1288  	}
  1289  	exp := b >> 23 & (1<<8 - 1)
  1290  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1291  	// changes the exponent to 0xfe-exp.
  1292  	switch exp {
  1293  	case 0:
  1294  		return false // ±0
  1295  	case 0xff:
  1296  		return false // ±inf
  1297  	case 0xfe:
  1298  		return false // exponent is not representable
  1299  	default:
  1300  		return true
  1301  	}
  1302  }
  1303  
  1304  // check if an immediate can be directly encoded into an ARM's instruction.
  1305  func isARMImmRot(v uint32) bool {
  1306  	for i := 0; i < 16; i++ {
  1307  		if v&^0xff == 0 {
  1308  			return true
  1309  		}
  1310  		v = v<<2 | v>>30
  1311  	}
  1312  
  1313  	return false
  1314  }
  1315  
  1316  // overlap reports whether the ranges given by the given offset and
  1317  // size pairs overlap.
  1318  func overlap(offset1, size1, offset2, size2 int64) bool {
  1319  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1320  		return true
  1321  	}
  1322  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1323  		return true
  1324  	}
  1325  	return false
  1326  }
  1327  
  1328  // check if value zeroes out upper 32-bit of 64-bit register.
  1329  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1330  // because it catches same amount of cases as 4.
  1331  func zeroUpper32Bits(x *Value, depth int) bool {
  1332  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1333  		// If the value is signed, it might get re-sign-extended
  1334  		// during spill and restore. See issue 68227.
  1335  		return false
  1336  	}
  1337  	switch x.Op {
  1338  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1339  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1340  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1341  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1342  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1343  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1344  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1345  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1346  		OpAMD64SHLL, OpAMD64SHLLconst:
  1347  		return true
  1348  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1349  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1350  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1351  		return true
  1352  	case OpArg: // note: but not ArgIntReg
  1353  		// amd64 always loads args from the stack unsigned.
  1354  		// most other architectures load them sign/zero extended based on the type.
  1355  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1356  	case OpPhi, OpSelect0, OpSelect1:
  1357  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1358  		// just limit recursion depth.
  1359  		if depth <= 0 {
  1360  			return false
  1361  		}
  1362  		for i := range x.Args {
  1363  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1364  				return false
  1365  			}
  1366  		}
  1367  		return true
  1368  
  1369  	}
  1370  	return false
  1371  }
  1372  
  1373  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1374  func zeroUpper48Bits(x *Value, depth int) bool {
  1375  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1376  		return false
  1377  	}
  1378  	switch x.Op {
  1379  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1380  		return true
  1381  	case OpArg: // note: but not ArgIntReg
  1382  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1383  	case OpPhi, OpSelect0, OpSelect1:
  1384  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1385  		// just limit recursion depth.
  1386  		if depth <= 0 {
  1387  			return false
  1388  		}
  1389  		for i := range x.Args {
  1390  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1391  				return false
  1392  			}
  1393  		}
  1394  		return true
  1395  
  1396  	}
  1397  	return false
  1398  }
  1399  
  1400  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1401  func zeroUpper56Bits(x *Value, depth int) bool {
  1402  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1403  		return false
  1404  	}
  1405  	switch x.Op {
  1406  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1407  		return true
  1408  	case OpArg: // note: but not ArgIntReg
  1409  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1410  	case OpPhi, OpSelect0, OpSelect1:
  1411  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1412  		// just limit recursion depth.
  1413  		if depth <= 0 {
  1414  			return false
  1415  		}
  1416  		for i := range x.Args {
  1417  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1418  				return false
  1419  			}
  1420  		}
  1421  		return true
  1422  
  1423  	}
  1424  	return false
  1425  }
  1426  
  1427  func isInlinableMemclr(c *Config, sz int64) bool {
  1428  	if sz < 0 {
  1429  		return false
  1430  	}
  1431  	// TODO: expand this check to allow other architectures
  1432  	// see CL 454255 and issue 56997
  1433  	switch c.arch {
  1434  	case "amd64", "arm64":
  1435  		return true
  1436  	case "ppc64le", "ppc64", "loong64":
  1437  		return sz < 512
  1438  	}
  1439  	return false
  1440  }
  1441  
  1442  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1443  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1444  // safe, either because Move will do all of its loads before any of its stores, or
  1445  // because the arguments are known to be disjoint.
  1446  // This is used as a check for replacing memmove with Move ops.
  1447  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1448  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1449  	// Move ops may or may not be faster for large sizes depending on how the platform
  1450  	// lowers them, so we only perform this optimization on platforms that we know to
  1451  	// have fast Move ops.
  1452  	switch c.arch {
  1453  	case "amd64":
  1454  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1455  	case "arm64":
  1456  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1457  	case "386":
  1458  		return sz <= 8
  1459  	case "s390x", "ppc64", "ppc64le":
  1460  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1461  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1462  		return sz <= 4
  1463  	}
  1464  	return false
  1465  }
  1466  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1467  	return isInlinableMemmove(dst, src, sz, c)
  1468  }
  1469  
  1470  // logLargeCopy logs the occurrence of a large copy.
  1471  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1472  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1473  func logLargeCopy(v *Value, s int64) bool {
  1474  	if s < 128 {
  1475  		return true
  1476  	}
  1477  	if logopt.Enabled() {
  1478  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1479  	}
  1480  	return true
  1481  }
  1482  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1483  	if s < 128 {
  1484  		return
  1485  	}
  1486  	if logopt.Enabled() {
  1487  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1488  	}
  1489  }
  1490  
  1491  // hasSmallRotate reports whether the architecture has rotate instructions
  1492  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1493  func hasSmallRotate(c *Config) bool {
  1494  	switch c.arch {
  1495  	case "amd64", "386":
  1496  		return true
  1497  	default:
  1498  		return false
  1499  	}
  1500  }
  1501  
  1502  func supportsPPC64PCRel() bool {
  1503  	// PCRel is currently supported for >= power10, linux only
  1504  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1505  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1506  }
  1507  
  1508  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1509  	if sh < 0 || sh >= sz {
  1510  		panic("PPC64 shift arg sh out of range")
  1511  	}
  1512  	if mb < 0 || mb >= sz {
  1513  		panic("PPC64 shift arg mb out of range")
  1514  	}
  1515  	if me < 0 || me >= sz {
  1516  		panic("PPC64 shift arg me out of range")
  1517  	}
  1518  	return int32(sh<<16 | mb<<8 | me)
  1519  }
  1520  
  1521  func GetPPC64Shiftsh(auxint int64) int64 {
  1522  	return int64(int8(auxint >> 16))
  1523  }
  1524  
  1525  func GetPPC64Shiftmb(auxint int64) int64 {
  1526  	return int64(int8(auxint >> 8))
  1527  }
  1528  
  1529  // Test if this value can encoded as a mask for a rlwinm like
  1530  // operation.  Masks can also extend from the msb and wrap to
  1531  // the lsb too.  That is, the valid masks are 32 bit strings
  1532  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1533  //
  1534  // Note: This ignores the upper 32 bits of the input. When a
  1535  // zero extended result is desired (e.g a 64 bit result), the
  1536  // user must verify the upper 32 bits are 0 and the mask is
  1537  // contiguous (that is, non-wrapping).
  1538  func isPPC64WordRotateMask(v64 int64) bool {
  1539  	// Isolate rightmost 1 (if none 0) and add.
  1540  	v := uint32(v64)
  1541  	vp := (v & -v) + v
  1542  	// Likewise, for the wrapping case.
  1543  	vn := ^v
  1544  	vpn := (vn & -vn) + vn
  1545  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1546  }
  1547  
  1548  // Test if this mask is a valid, contiguous bitmask which can be
  1549  // represented by a RLWNM mask and also clears the upper 32 bits
  1550  // of the register.
  1551  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1552  	// Isolate rightmost 1 (if none 0) and add.
  1553  	v := uint32(v64)
  1554  	vp := (v & -v) + v
  1555  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1556  }
  1557  
  1558  // Compress mask and shift into single value of the form
  1559  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1560  // be used to regenerate the input mask.
  1561  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1562  	var mb, me, mbn, men int
  1563  
  1564  	// Determine boundaries and then decode them
  1565  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1566  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1567  	} else if nbits == 32 {
  1568  		mb = bits.LeadingZeros32(uint32(mask))
  1569  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1570  		mbn = bits.LeadingZeros32(^uint32(mask))
  1571  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1572  	} else {
  1573  		mb = bits.LeadingZeros64(uint64(mask))
  1574  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1575  		mbn = bits.LeadingZeros64(^uint64(mask))
  1576  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1577  	}
  1578  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1579  	if mb == 0 && me == int(nbits) {
  1580  		// swap the inverted values
  1581  		mb, me = men, mbn
  1582  	}
  1583  
  1584  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1585  }
  1586  
  1587  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1588  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1589  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1590  // operations can be combined. This functions assumes the two opcodes can
  1591  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1592  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1593  	mb := s
  1594  	r := 64 - s
  1595  	// A larger mb is a smaller mask.
  1596  	if (encoded>>8)&0xFF < mb {
  1597  		encoded = (encoded &^ 0xFF00) | mb<<8
  1598  	}
  1599  	// The rotate is expected to be 0.
  1600  	if (encoded & 0xFF0000) != 0 {
  1601  		panic("non-zero rotate")
  1602  	}
  1603  	return encoded | r<<16
  1604  }
  1605  
  1606  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1607  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1608  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1609  	auxint := uint64(sauxint)
  1610  	rotate = int64((auxint >> 16) & 0xFF)
  1611  	mb = int64((auxint >> 8) & 0xFF)
  1612  	me = int64((auxint >> 0) & 0xFF)
  1613  	nbits := int64((auxint >> 24) & 0xFF)
  1614  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1615  	if mb > me {
  1616  		mask = ^mask
  1617  	}
  1618  	if nbits == 32 {
  1619  		mask = uint64(uint32(mask))
  1620  	}
  1621  
  1622  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1623  	// is inclusive.
  1624  	me = (me - 1) & (nbits - 1)
  1625  	return
  1626  }
  1627  
  1628  // This verifies that the mask is a set of
  1629  // consecutive bits including the least
  1630  // significant bit.
  1631  func isPPC64ValidShiftMask(v int64) bool {
  1632  	if (v != 0) && ((v+1)&v) == 0 {
  1633  		return true
  1634  	}
  1635  	return false
  1636  }
  1637  
  1638  func getPPC64ShiftMaskLength(v int64) int64 {
  1639  	return int64(bits.Len64(uint64(v)))
  1640  }
  1641  
  1642  // Decompose a shift right into an equivalent rotate/mask,
  1643  // and return mask & m.
  1644  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1645  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1646  	return m & int64(smask)
  1647  }
  1648  
  1649  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1650  func mergePPC64AndSrwi(m, s int64) int64 {
  1651  	mask := mergePPC64RShiftMask(m, s, 32)
  1652  	if !isPPC64WordRotateMask(mask) {
  1653  		return 0
  1654  	}
  1655  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1656  }
  1657  
  1658  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1659  func mergePPC64AndSrdi(m, s int64) int64 {
  1660  	mask := mergePPC64RShiftMask(m, s, 64)
  1661  
  1662  	// Verify the rotate and mask result only uses the lower 32 bits.
  1663  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1664  	if rv&uint64(mask) != 0 {
  1665  		return 0
  1666  	}
  1667  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1668  		return 0
  1669  	}
  1670  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1671  }
  1672  
  1673  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1674  func mergePPC64AndSldi(m, s int64) int64 {
  1675  	mask := -1 << s & m
  1676  
  1677  	// Verify the rotate and mask result only uses the lower 32 bits.
  1678  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1679  	if rv&uint64(mask) != 0 {
  1680  		return 0
  1681  	}
  1682  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1683  		return 0
  1684  	}
  1685  	return encodePPC64RotateMask(s&31, mask, 32)
  1686  }
  1687  
  1688  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1689  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1690  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1691  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1692  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1693  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1694  
  1695  	// Rewrite mask to apply after the final left shift.
  1696  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1697  
  1698  	r_1 := 32 - srw
  1699  	r_2 := GetPPC64Shiftsh(sld)
  1700  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1701  
  1702  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1703  		return 0
  1704  	}
  1705  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1706  }
  1707  
  1708  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1709  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1710  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1711  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1712  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1713  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1714  
  1715  	// Rewrite mask to apply after the final left shift.
  1716  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1717  
  1718  	r_1 := 64 - srd
  1719  	r_2 := GetPPC64Shiftsh(sld)
  1720  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1721  
  1722  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1723  		return 0
  1724  	}
  1725  	// This combine only works when selecting and shifting the lower 32 bits.
  1726  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1727  	if v1&mask_3 != 0 {
  1728  		return 0
  1729  	}
  1730  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1731  }
  1732  
  1733  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1734  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1735  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1736  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1737  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1738  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1739  
  1740  	// combine the masks, and adjust for the final left shift.
  1741  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1742  	r_2 := GetPPC64Shiftsh(int64(sld))
  1743  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1744  
  1745  	// Verify the result is still a valid bitmask of <= 32 bits.
  1746  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1747  		return 0
  1748  	}
  1749  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1750  }
  1751  
  1752  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1753  // or 0 if they cannot be merged.
  1754  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1755  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1756  	mask_out := (mask_rlw & uint64(mask))
  1757  
  1758  	// Verify the result is still a valid bitmask of <= 32 bits.
  1759  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1760  		return 0
  1761  	}
  1762  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1763  }
  1764  
  1765  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1766  // result. Return rlw if it does, 0 otherwise.
  1767  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1768  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1769  	if mb > me {
  1770  		return 0
  1771  	}
  1772  	return rlw
  1773  }
  1774  
  1775  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1776  // or 0 if they cannot be merged.
  1777  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1778  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1779  
  1780  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1781  	r_mask := bits.RotateLeft32(mask, int(r))
  1782  
  1783  	mask_out := (mask_rlw & uint64(r_mask))
  1784  
  1785  	// Verify the result is still a valid bitmask of <= 32 bits.
  1786  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1787  		return 0
  1788  	}
  1789  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1790  }
  1791  
  1792  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1793  // or 0 if they cannot be merged.
  1794  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1795  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1796  	if mb > me || mb < sldi {
  1797  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1798  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1799  		return 0
  1800  	}
  1801  	// combine the masks, and adjust for the final left shift.
  1802  	mask_3 := mask_1 << sldi
  1803  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1804  
  1805  	// Verify the result is still a valid bitmask of <= 32 bits.
  1806  	if uint64(uint32(mask_3)) != mask_3 {
  1807  		return 0
  1808  	}
  1809  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1810  }
  1811  
  1812  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1813  // or return 0 if they cannot be combined.
  1814  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1815  	if sld > srw || srw >= 32 {
  1816  		return 0
  1817  	}
  1818  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1819  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1820  	mask := (mask_r & mask_l) << uint(sld)
  1821  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1822  }
  1823  
  1824  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1825  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1826  // of op.
  1827  //
  1828  // E.g consider the case:
  1829  // a = (ADD x y)
  1830  // b = (CMPconst [0] a)
  1831  // c = (OR a z)
  1832  //
  1833  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1834  // would produce:
  1835  // a  = (ADD x y)
  1836  // a' = (ADDCC x y)
  1837  // a” = (Select0 a')
  1838  // b  = (CMPconst [0] a”)
  1839  // c  = (OR a z)
  1840  //
  1841  // which makes it impossible to rewrite the second user. Instead the result
  1842  // of this conversion is:
  1843  // a' = (ADDCC x y)
  1844  // a  = (Select0 a')
  1845  // b  = (CMPconst [0] a)
  1846  // c  = (OR a z)
  1847  //
  1848  // Which makes it trivial to rewrite b using a lowering rule.
  1849  func convertPPC64OpToOpCC(op *Value) *Value {
  1850  	ccOpMap := map[Op]Op{
  1851  		OpPPC64ADD:      OpPPC64ADDCC,
  1852  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1853  		OpPPC64AND:      OpPPC64ANDCC,
  1854  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1855  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1856  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1857  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1858  		OpPPC64NEG:      OpPPC64NEGCC,
  1859  		OpPPC64NOR:      OpPPC64NORCC,
  1860  		OpPPC64OR:       OpPPC64ORCC,
  1861  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1862  		OpPPC64SUB:      OpPPC64SUBCC,
  1863  		OpPPC64XOR:      OpPPC64XORCC,
  1864  	}
  1865  	b := op.Block
  1866  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1867  	opCC.AddArgs(op.Args...)
  1868  	op.reset(OpSelect0)
  1869  	op.AddArgs(opCC)
  1870  	return op
  1871  }
  1872  
  1873  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1874  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1875  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1876  	if r != 0 || mask&0xFFFF != mask {
  1877  		return 0
  1878  	}
  1879  	return int64(mask)
  1880  }
  1881  
  1882  // Convenience function to rotate a 32 bit constant value by another constant.
  1883  func rotateLeft32(v, rotate int64) int64 {
  1884  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1885  }
  1886  
  1887  func rotateRight64(v, rotate int64) int64 {
  1888  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1889  }
  1890  
  1891  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1892  func armBFAuxInt(lsb, width int64) arm64BitField {
  1893  	if lsb < 0 || lsb > 63 {
  1894  		panic("ARM(64) bit field lsb constant out of range")
  1895  	}
  1896  	if width < 1 || lsb+width > 64 {
  1897  		panic("ARM(64) bit field width constant out of range")
  1898  	}
  1899  	return arm64BitField(width | lsb<<8)
  1900  }
  1901  
  1902  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1903  func (bfc arm64BitField) lsb() int64 {
  1904  	return int64(uint64(bfc) >> 8)
  1905  }
  1906  
  1907  // returns the width part of the auxInt field of arm64 bitfield ops.
  1908  func (bfc arm64BitField) width() int64 {
  1909  	return int64(bfc) & 0xff
  1910  }
  1911  
  1912  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1913  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1914  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1915  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1916  }
  1917  
  1918  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1919  func arm64BFWidth(mask, rshift int64) int64 {
  1920  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1921  	if shiftedMask == 0 {
  1922  		panic("ARM64 BF mask is zero")
  1923  	}
  1924  	return nto(shiftedMask)
  1925  }
  1926  
  1927  // encodes condition code and NZCV flags into auxint.
  1928  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1929  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1930  		panic("Wrong conditional operation")
  1931  	}
  1932  	if nzcv&0x0f != nzcv {
  1933  		panic("Wrong value of NZCV flag")
  1934  	}
  1935  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1936  }
  1937  
  1938  // encodes condition code, NZCV flags and constant value into auxint.
  1939  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1940  	if value&0x1f != value {
  1941  		panic("Wrong value of constant")
  1942  	}
  1943  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1944  	params.constValue = value
  1945  	params.ind = true
  1946  	return params
  1947  }
  1948  
  1949  // extracts condition code from auxint.
  1950  func (condParams arm64ConditionalParams) Cond() Op {
  1951  	return condParams.cond
  1952  }
  1953  
  1954  // extracts NZCV flags from auxint.
  1955  func (condParams arm64ConditionalParams) Nzcv() int64 {
  1956  	return int64(condParams.nzcv)
  1957  }
  1958  
  1959  // extracts constant value from auxint if present.
  1960  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  1961  	return int64(condParams.constValue), condParams.ind
  1962  }
  1963  
  1964  // registerizable reports whether t is a primitive type that fits in
  1965  // a register. It assumes float64 values will always fit into registers
  1966  // even if that isn't strictly true.
  1967  func registerizable(b *Block, typ *types.Type) bool {
  1968  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1969  		return true
  1970  	}
  1971  	if typ.IsInteger() {
  1972  		return typ.Size() <= b.Func.Config.RegSize
  1973  	}
  1974  	return false
  1975  }
  1976  
  1977  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1978  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1979  	f := v.Block.Func
  1980  	if !f.Config.Race {
  1981  		return false
  1982  	}
  1983  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1984  		return false
  1985  	}
  1986  	for _, b := range f.Blocks {
  1987  		for _, v := range b.Values {
  1988  			switch v.Op {
  1989  			case OpStaticCall, OpStaticLECall:
  1990  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1991  				// Allow calls to panic*
  1992  				s := v.Aux.(*AuxCall).Fn.String()
  1993  				switch s {
  1994  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1995  					"runtime.panicdivide", "runtime.panicwrap",
  1996  					"runtime.panicshift":
  1997  					continue
  1998  				}
  1999  				// If we encountered any call, we need to keep racefunc*,
  2000  				// for accurate stacktraces.
  2001  				return false
  2002  			case OpPanicBounds, OpPanicExtend:
  2003  				// Note: these are panic generators that are ok (like the static calls above).
  2004  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2005  				// We must keep the race functions if there are any other call types.
  2006  				return false
  2007  			}
  2008  		}
  2009  	}
  2010  	if isSameCall(sym, "runtime.racefuncenter") {
  2011  		// TODO REGISTER ABI this needs to be cleaned up.
  2012  		// If we're removing racefuncenter, remove its argument as well.
  2013  		if v.Args[0].Op != OpStore {
  2014  			if v.Op == OpStaticLECall {
  2015  				// there is no store, yet.
  2016  				return true
  2017  			}
  2018  			return false
  2019  		}
  2020  		mem := v.Args[0].Args[2]
  2021  		v.Args[0].reset(OpCopy)
  2022  		v.Args[0].AddArg(mem)
  2023  	}
  2024  	return true
  2025  }
  2026  
  2027  // symIsRO reports whether sym is a read-only global.
  2028  func symIsRO(sym Sym) bool {
  2029  	lsym := sym.(*obj.LSym)
  2030  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2031  }
  2032  
  2033  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2034  func symIsROZero(sym Sym) bool {
  2035  	lsym := sym.(*obj.LSym)
  2036  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2037  		return false
  2038  	}
  2039  	for _, b := range lsym.P {
  2040  		if b != 0 {
  2041  			return false
  2042  		}
  2043  	}
  2044  	return true
  2045  }
  2046  
  2047  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2048  // and can be rewritten by rewriteFixedLoad.
  2049  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2050  	lsym := sym.(*obj.LSym)
  2051  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2052  		for _, r := range lsym.R {
  2053  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2054  				return true
  2055  			}
  2056  		}
  2057  		return false
  2058  	}
  2059  
  2060  	if strings.HasPrefix(lsym.Name, "type:") {
  2061  		// Type symbols do not contain information about their fields, unlike the cases above.
  2062  		// Hand-implement field accesses.
  2063  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2064  
  2065  		t := (*lsym.Extra).(*obj.TypeInfo).Type.(*types.Type)
  2066  
  2067  		for _, f := range rttype.Type.Fields() {
  2068  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2069  				switch f.Sym.Name {
  2070  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2071  					return true
  2072  				default:
  2073  					// fmt.Println("unknown field", f.Sym.Name)
  2074  					return false
  2075  				}
  2076  			}
  2077  		}
  2078  
  2079  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2080  			return true
  2081  		}
  2082  
  2083  		return false
  2084  	}
  2085  
  2086  	return false
  2087  }
  2088  
  2089  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2090  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2091  	b := v.Block
  2092  	f := b.Func
  2093  
  2094  	lsym := sym.(*obj.LSym)
  2095  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2096  		for _, r := range lsym.R {
  2097  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2098  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2099  					// In case we're loading a type out of a dictionary, we need to record
  2100  					// that the containing function might put that type in an interface.
  2101  					// That information is currently recorded in relocations in the dictionary,
  2102  					// but if we perform this load at compile time then the dictionary
  2103  					// might be dead.
  2104  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2105  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2106  					// Same, but if we're using an itab we need to record that the
  2107  					// itab._type might be put in an interface.
  2108  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2109  				}
  2110  				v.reset(OpAddr)
  2111  				v.Aux = symToAux(r.Sym)
  2112  				v.AddArg(sb)
  2113  				return v
  2114  			}
  2115  		}
  2116  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2117  	}
  2118  
  2119  	if strings.HasPrefix(lsym.Name, "type:") {
  2120  		// Type symbols do not contain information about their fields, unlike the cases above.
  2121  		// Hand-implement field accesses.
  2122  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2123  
  2124  		t := (*lsym.Extra).(*obj.TypeInfo).Type.(*types.Type)
  2125  
  2126  		ptrSizedOpConst := OpConst64
  2127  		if f.Config.PtrSize == 4 {
  2128  			ptrSizedOpConst = OpConst32
  2129  		}
  2130  
  2131  		for _, f := range rttype.Type.Fields() {
  2132  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2133  				switch f.Sym.Name {
  2134  				case "Size_":
  2135  					v.reset(ptrSizedOpConst)
  2136  					v.AuxInt = int64(t.Size())
  2137  					return v
  2138  				case "PtrBytes":
  2139  					v.reset(ptrSizedOpConst)
  2140  					v.AuxInt = int64(types.PtrDataSize(t))
  2141  					return v
  2142  				case "Hash":
  2143  					v.reset(OpConst32)
  2144  					v.AuxInt = int64(types.TypeHash(t))
  2145  					return v
  2146  				case "Kind_":
  2147  					v.reset(OpConst8)
  2148  					v.AuxInt = int64(reflectdata.ABIKindOfType(t))
  2149  					return v
  2150  				case "GCData":
  2151  					gcdata, _ := reflectdata.GCSym(t, true)
  2152  					v.reset(OpAddr)
  2153  					v.Aux = symToAux(gcdata)
  2154  					v.AddArg(sb)
  2155  					return v
  2156  				default:
  2157  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2158  				}
  2159  			}
  2160  		}
  2161  
  2162  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2163  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2164  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2165  			v.reset(OpAddr)
  2166  			v.Aux = symToAux(elemSym)
  2167  			v.AddArg(sb)
  2168  			return v
  2169  		}
  2170  
  2171  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2172  	}
  2173  
  2174  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2175  	return nil
  2176  }
  2177  
  2178  // read8 reads one byte from the read-only global sym at offset off.
  2179  func read8(sym Sym, off int64) uint8 {
  2180  	lsym := sym.(*obj.LSym)
  2181  	if off >= int64(len(lsym.P)) || off < 0 {
  2182  		// Invalid index into the global sym.
  2183  		// This can happen in dead code, so we don't want to panic.
  2184  		// Just return any value, it will eventually get ignored.
  2185  		// See issue 29215.
  2186  		return 0
  2187  	}
  2188  	return lsym.P[off]
  2189  }
  2190  
  2191  // read16 reads two bytes from the read-only global sym at offset off.
  2192  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2193  	lsym := sym.(*obj.LSym)
  2194  	// lsym.P is written lazily.
  2195  	// Bytes requested after the end of lsym.P are 0.
  2196  	var src []byte
  2197  	if 0 <= off && off < int64(len(lsym.P)) {
  2198  		src = lsym.P[off:]
  2199  	}
  2200  	buf := make([]byte, 2)
  2201  	copy(buf, src)
  2202  	return byteorder.Uint16(buf)
  2203  }
  2204  
  2205  // read32 reads four bytes from the read-only global sym at offset off.
  2206  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2207  	lsym := sym.(*obj.LSym)
  2208  	var src []byte
  2209  	if 0 <= off && off < int64(len(lsym.P)) {
  2210  		src = lsym.P[off:]
  2211  	}
  2212  	buf := make([]byte, 4)
  2213  	copy(buf, src)
  2214  	return byteorder.Uint32(buf)
  2215  }
  2216  
  2217  // read64 reads eight bytes from the read-only global sym at offset off.
  2218  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2219  	lsym := sym.(*obj.LSym)
  2220  	var src []byte
  2221  	if 0 <= off && off < int64(len(lsym.P)) {
  2222  		src = lsym.P[off:]
  2223  	}
  2224  	buf := make([]byte, 8)
  2225  	copy(buf, src)
  2226  	return byteorder.Uint64(buf)
  2227  }
  2228  
  2229  // sequentialAddresses reports true if it can prove that x + n == y
  2230  func sequentialAddresses(x, y *Value, n int64) bool {
  2231  	if x == y && n == 0 {
  2232  		return true
  2233  	}
  2234  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2235  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2236  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2237  		return true
  2238  	}
  2239  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2240  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2241  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2242  		return true
  2243  	}
  2244  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2245  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2246  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2247  		return true
  2248  	}
  2249  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2250  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2251  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2252  		return true
  2253  	}
  2254  	return false
  2255  }
  2256  
  2257  // flagConstant represents the result of a compile-time comparison.
  2258  // The sense of these flags does not necessarily represent the hardware's notion
  2259  // of a flags register - these are just a compile-time construct.
  2260  // We happen to match the semantics to those of arm/arm64.
  2261  // Note that these semantics differ from x86: the carry flag has the opposite
  2262  // sense on a subtraction!
  2263  //
  2264  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2265  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2266  //	 (because it does x + ^y + C).
  2267  //
  2268  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2269  type flagConstant uint8
  2270  
  2271  // N reports whether the result of an operation is negative (high bit set).
  2272  func (fc flagConstant) N() bool {
  2273  	return fc&1 != 0
  2274  }
  2275  
  2276  // Z reports whether the result of an operation is 0.
  2277  func (fc flagConstant) Z() bool {
  2278  	return fc&2 != 0
  2279  }
  2280  
  2281  // C reports whether an unsigned add overflowed (carry), or an
  2282  // unsigned subtract did not underflow (borrow).
  2283  func (fc flagConstant) C() bool {
  2284  	return fc&4 != 0
  2285  }
  2286  
  2287  // V reports whether a signed operation overflowed or underflowed.
  2288  func (fc flagConstant) V() bool {
  2289  	return fc&8 != 0
  2290  }
  2291  
  2292  func (fc flagConstant) eq() bool {
  2293  	return fc.Z()
  2294  }
  2295  func (fc flagConstant) ne() bool {
  2296  	return !fc.Z()
  2297  }
  2298  func (fc flagConstant) lt() bool {
  2299  	return fc.N() != fc.V()
  2300  }
  2301  func (fc flagConstant) le() bool {
  2302  	return fc.Z() || fc.lt()
  2303  }
  2304  func (fc flagConstant) gt() bool {
  2305  	return !fc.Z() && fc.ge()
  2306  }
  2307  func (fc flagConstant) ge() bool {
  2308  	return fc.N() == fc.V()
  2309  }
  2310  func (fc flagConstant) ult() bool {
  2311  	return !fc.C()
  2312  }
  2313  func (fc flagConstant) ule() bool {
  2314  	return fc.Z() || fc.ult()
  2315  }
  2316  func (fc flagConstant) ugt() bool {
  2317  	return !fc.Z() && fc.uge()
  2318  }
  2319  func (fc flagConstant) uge() bool {
  2320  	return fc.C()
  2321  }
  2322  
  2323  func (fc flagConstant) ltNoov() bool {
  2324  	return fc.lt() && !fc.V()
  2325  }
  2326  func (fc flagConstant) leNoov() bool {
  2327  	return fc.le() && !fc.V()
  2328  }
  2329  func (fc flagConstant) gtNoov() bool {
  2330  	return fc.gt() && !fc.V()
  2331  }
  2332  func (fc flagConstant) geNoov() bool {
  2333  	return fc.ge() && !fc.V()
  2334  }
  2335  
  2336  func (fc flagConstant) String() string {
  2337  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2338  }
  2339  
  2340  type flagConstantBuilder struct {
  2341  	N bool
  2342  	Z bool
  2343  	C bool
  2344  	V bool
  2345  }
  2346  
  2347  func (fcs flagConstantBuilder) encode() flagConstant {
  2348  	var fc flagConstant
  2349  	if fcs.N {
  2350  		fc |= 1
  2351  	}
  2352  	if fcs.Z {
  2353  		fc |= 2
  2354  	}
  2355  	if fcs.C {
  2356  		fc |= 4
  2357  	}
  2358  	if fcs.V {
  2359  		fc |= 8
  2360  	}
  2361  	return fc
  2362  }
  2363  
  2364  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2365  //  - the results of the C flag are different
  2366  //  - the results of the V flag when y==minint are different
  2367  
  2368  // addFlags64 returns the flags that would be set from computing x+y.
  2369  func addFlags64(x, y int64) flagConstant {
  2370  	var fcb flagConstantBuilder
  2371  	fcb.Z = x+y == 0
  2372  	fcb.N = x+y < 0
  2373  	fcb.C = uint64(x+y) < uint64(x)
  2374  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2375  	return fcb.encode()
  2376  }
  2377  
  2378  // subFlags64 returns the flags that would be set from computing x-y.
  2379  func subFlags64(x, y int64) flagConstant {
  2380  	var fcb flagConstantBuilder
  2381  	fcb.Z = x-y == 0
  2382  	fcb.N = x-y < 0
  2383  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2384  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2385  	return fcb.encode()
  2386  }
  2387  
  2388  // addFlags32 returns the flags that would be set from computing x+y.
  2389  func addFlags32(x, y int32) flagConstant {
  2390  	var fcb flagConstantBuilder
  2391  	fcb.Z = x+y == 0
  2392  	fcb.N = x+y < 0
  2393  	fcb.C = uint32(x+y) < uint32(x)
  2394  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2395  	return fcb.encode()
  2396  }
  2397  
  2398  // subFlags32 returns the flags that would be set from computing x-y.
  2399  func subFlags32(x, y int32) flagConstant {
  2400  	var fcb flagConstantBuilder
  2401  	fcb.Z = x-y == 0
  2402  	fcb.N = x-y < 0
  2403  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2404  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2405  	return fcb.encode()
  2406  }
  2407  
  2408  // logicFlags64 returns flags set to the sign/zeroness of x.
  2409  // C and V are set to false.
  2410  func logicFlags64(x int64) flagConstant {
  2411  	var fcb flagConstantBuilder
  2412  	fcb.Z = x == 0
  2413  	fcb.N = x < 0
  2414  	return fcb.encode()
  2415  }
  2416  
  2417  // logicFlags32 returns flags set to the sign/zeroness of x.
  2418  // C and V are set to false.
  2419  func logicFlags32(x int32) flagConstant {
  2420  	var fcb flagConstantBuilder
  2421  	fcb.Z = x == 0
  2422  	fcb.N = x < 0
  2423  	return fcb.encode()
  2424  }
  2425  
  2426  func makeJumpTableSym(b *Block) *obj.LSym {
  2427  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2428  	// The jump table symbol is accessed only from the function symbol.
  2429  	s.Set(obj.AttrStatic, true)
  2430  	return s
  2431  }
  2432  
  2433  // canRotate reports whether the architecture supports
  2434  // rotates of integer registers with the given number of bits.
  2435  func canRotate(c *Config, bits int64) bool {
  2436  	if bits > c.PtrSize*8 {
  2437  		// Don't rewrite to rotates bigger than the machine word.
  2438  		return false
  2439  	}
  2440  	switch c.arch {
  2441  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2442  		return true
  2443  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2444  		return bits >= 32
  2445  	default:
  2446  		return false
  2447  	}
  2448  }
  2449  
  2450  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2451  func isARM64bitcon(x uint64) bool {
  2452  	if x == 1<<64-1 || x == 0 {
  2453  		return false
  2454  	}
  2455  	// determine the period and sign-extend a unit to 64 bits
  2456  	switch {
  2457  	case x != x>>32|x<<32:
  2458  		// period is 64
  2459  		// nothing to do
  2460  	case x != x>>16|x<<48:
  2461  		// period is 32
  2462  		x = uint64(int64(int32(x)))
  2463  	case x != x>>8|x<<56:
  2464  		// period is 16
  2465  		x = uint64(int64(int16(x)))
  2466  	case x != x>>4|x<<60:
  2467  		// period is 8
  2468  		x = uint64(int64(int8(x)))
  2469  	default:
  2470  		// period is 4 or 2, always true
  2471  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2472  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2473  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2474  		// 0101, 1010             -- 01   rotate, repeat
  2475  		return true
  2476  	}
  2477  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2478  }
  2479  
  2480  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2481  func sequenceOfOnes(x uint64) bool {
  2482  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2483  	y += x
  2484  	return (y-1)&y == 0
  2485  }
  2486  
  2487  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2488  func isARM64addcon(v int64) bool {
  2489  	/* uimm12 or uimm24? */
  2490  	if v < 0 {
  2491  		return false
  2492  	}
  2493  	if (v & 0xFFF) == 0 {
  2494  		v >>= 12
  2495  	}
  2496  	return v <= 0xFFF
  2497  }
  2498  
  2499  // setPos sets the position of v to pos, then returns true.
  2500  // Useful for setting the result of a rewrite's position to
  2501  // something other than the default.
  2502  func setPos(v *Value, pos src.XPos) bool {
  2503  	v.Pos = pos
  2504  	return true
  2505  }
  2506  
  2507  // isNonNegative reports whether v is known to be greater or equal to zero.
  2508  // Note that this is pretty simplistic. The prove pass generates more detailed
  2509  // nonnegative information about values.
  2510  func isNonNegative(v *Value) bool {
  2511  	if !v.Type.IsInteger() {
  2512  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2513  	}
  2514  	// TODO: return true if !v.Type.IsSigned()
  2515  	// SSA isn't type-safe enough to do that now (issue 37753).
  2516  	// The checks below depend only on the pattern of bits.
  2517  
  2518  	switch v.Op {
  2519  	case OpConst64:
  2520  		return v.AuxInt >= 0
  2521  
  2522  	case OpConst32:
  2523  		return int32(v.AuxInt) >= 0
  2524  
  2525  	case OpConst16:
  2526  		return int16(v.AuxInt) >= 0
  2527  
  2528  	case OpConst8:
  2529  		return int8(v.AuxInt) >= 0
  2530  
  2531  	case OpStringLen, OpSliceLen, OpSliceCap,
  2532  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2533  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2534  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2535  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2536  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2537  		return true
  2538  
  2539  	case OpRsh64Ux64, OpRsh32Ux64:
  2540  		by := v.Args[1]
  2541  		return by.Op == OpConst64 && by.AuxInt > 0
  2542  
  2543  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2544  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2545  		return isNonNegative(v.Args[0])
  2546  
  2547  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2548  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2549  
  2550  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2551  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2552  		OpOr64, OpOr32, OpOr16, OpOr8,
  2553  		OpXor64, OpXor32, OpXor16, OpXor8:
  2554  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2555  
  2556  		// We could handle OpPhi here, but the improvements from doing
  2557  		// so are very minor, and it is neither simple nor cheap.
  2558  	}
  2559  	return false
  2560  }
  2561  
  2562  func rewriteStructLoad(v *Value) *Value {
  2563  	b := v.Block
  2564  	ptr := v.Args[0]
  2565  	mem := v.Args[1]
  2566  
  2567  	t := v.Type
  2568  	args := make([]*Value, t.NumFields())
  2569  	for i := range args {
  2570  		ft := t.FieldType(i)
  2571  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2572  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2573  	}
  2574  
  2575  	v.reset(OpStructMake)
  2576  	v.AddArgs(args...)
  2577  	return v
  2578  }
  2579  
  2580  func rewriteStructStore(v *Value) *Value {
  2581  	b := v.Block
  2582  	dst := v.Args[0]
  2583  	x := v.Args[1]
  2584  	if x.Op != OpStructMake {
  2585  		base.Fatalf("invalid struct store: %v", x)
  2586  	}
  2587  	mem := v.Args[2]
  2588  
  2589  	t := x.Type
  2590  	for i, arg := range x.Args {
  2591  		ft := t.FieldType(i)
  2592  
  2593  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2594  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2595  	}
  2596  
  2597  	return mem
  2598  }
  2599  
  2600  // isDirectType reports whether v represents a type
  2601  // (a *runtime._type) whose value is stored directly in an
  2602  // interface (i.e., is pointer or pointer-like).
  2603  func isDirectType(v *Value) bool {
  2604  	return isDirectType1(v)
  2605  }
  2606  
  2607  // v is a type
  2608  func isDirectType1(v *Value) bool {
  2609  	switch v.Op {
  2610  	case OpITab:
  2611  		return isDirectType2(v.Args[0])
  2612  	case OpAddr:
  2613  		lsym := v.Aux.(*obj.LSym)
  2614  		if lsym.Extra == nil {
  2615  			return false
  2616  		}
  2617  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2618  			return types.IsDirectIface(ti.Type.(*types.Type))
  2619  		}
  2620  	}
  2621  	return false
  2622  }
  2623  
  2624  // v is an empty interface
  2625  func isDirectType2(v *Value) bool {
  2626  	switch v.Op {
  2627  	case OpIMake:
  2628  		return isDirectType1(v.Args[0])
  2629  	}
  2630  	return false
  2631  }
  2632  
  2633  // isDirectIface reports whether v represents an itab
  2634  // (a *runtime._itab) for a type whose value is stored directly
  2635  // in an interface (i.e., is pointer or pointer-like).
  2636  func isDirectIface(v *Value) bool {
  2637  	return isDirectIface1(v, 9)
  2638  }
  2639  
  2640  // v is an itab
  2641  func isDirectIface1(v *Value, depth int) bool {
  2642  	if depth == 0 {
  2643  		return false
  2644  	}
  2645  	switch v.Op {
  2646  	case OpITab:
  2647  		return isDirectIface2(v.Args[0], depth-1)
  2648  	case OpAddr:
  2649  		lsym := v.Aux.(*obj.LSym)
  2650  		if lsym.Extra == nil {
  2651  			return false
  2652  		}
  2653  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2654  			return types.IsDirectIface(ii.Type.(*types.Type))
  2655  		}
  2656  	case OpConstNil:
  2657  		// We can treat this as direct, because if the itab is
  2658  		// nil, the data field must be nil also.
  2659  		return true
  2660  	}
  2661  	return false
  2662  }
  2663  
  2664  // v is an interface
  2665  func isDirectIface2(v *Value, depth int) bool {
  2666  	if depth == 0 {
  2667  		return false
  2668  	}
  2669  	switch v.Op {
  2670  	case OpIMake:
  2671  		return isDirectIface1(v.Args[0], depth-1)
  2672  	case OpPhi:
  2673  		for _, a := range v.Args {
  2674  			if !isDirectIface2(a, depth-1) {
  2675  				return false
  2676  			}
  2677  		}
  2678  		return true
  2679  	}
  2680  	return false
  2681  }
  2682  
  2683  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2684  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2685  	r.sum, r.carry = int64(s), int64(c)
  2686  	return
  2687  }
  2688  
  2689  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2690  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2691  	r.hi, r.lo = int64(hi), int64(lo)
  2692  	return
  2693  }
  2694  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2695  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2696  	r.hi, r.lo = int32(hi), int32(lo)
  2697  	return
  2698  }
  2699  
  2700  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2701  func flagify(v *Value) bool {
  2702  	var flagVersion Op
  2703  	switch v.Op {
  2704  	case OpAMD64ADDQconst:
  2705  		flagVersion = OpAMD64ADDQconstflags
  2706  	case OpAMD64ADDLconst:
  2707  		flagVersion = OpAMD64ADDLconstflags
  2708  	default:
  2709  		base.Fatalf("can't flagify op %s", v.Op)
  2710  	}
  2711  	inner := v.copyInto(v.Block)
  2712  	inner.Op = flagVersion
  2713  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2714  	v.reset(OpSelect0)
  2715  	v.AddArg(inner)
  2716  	return true
  2717  }
  2718  
  2719  // PanicBoundsC contains a constant for a bounds failure.
  2720  type PanicBoundsC struct {
  2721  	C int64
  2722  }
  2723  
  2724  // PanicBoundsCC contains 2 constants for a bounds failure.
  2725  type PanicBoundsCC struct {
  2726  	Cx int64
  2727  	Cy int64
  2728  }
  2729  
  2730  func (p PanicBoundsC) CanBeAnSSAAux() {
  2731  }
  2732  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2733  }
  2734  
  2735  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2736  	return i.(PanicBoundsC)
  2737  }
  2738  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2739  	return i.(PanicBoundsCC)
  2740  }
  2741  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2742  	return p
  2743  }
  2744  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2745  	return p
  2746  }
  2747  

View as plain text