rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  func copyCompatibleType(t1, t2 *types.Type) bool {
   250  	if t1.Size() != t2.Size() {
   251  		return false
   252  	}
   253  	if t1.IsInteger() {
   254  		return t2.IsInteger()
   255  	}
   256  	if isPtr(t1) {
   257  		return isPtr(t2)
   258  	}
   259  	return t1.Compare(t2) == types.CMPeq
   260  }
   261  
   262  // mergeSym merges two symbolic offsets. There is no real merging of
   263  // offsets, we just pick the non-nil one.
   264  func mergeSym(x, y Sym) Sym {
   265  	if x == nil {
   266  		return y
   267  	}
   268  	if y == nil {
   269  		return x
   270  	}
   271  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   272  }
   273  
   274  func canMergeSym(x, y Sym) bool {
   275  	return x == nil || y == nil
   276  }
   277  
   278  // canMergeLoadClobber reports whether the load can be merged into target without
   279  // invalidating the schedule.
   280  // It also checks that the other non-load argument x is something we
   281  // are ok with clobbering.
   282  func canMergeLoadClobber(target, load, x *Value) bool {
   283  	// The register containing x is going to get clobbered.
   284  	// Don't merge if we still need the value of x.
   285  	// We don't have liveness information here, but we can
   286  	// approximate x dying with:
   287  	//  1) target is x's only use.
   288  	//  2) target is not in a deeper loop than x.
   289  	switch {
   290  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   291  		// This is a simple detector to determine that x is probably
   292  		// not live after target. (It does not need to be perfect,
   293  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   294  		// We have:
   295  		//   x = Phi(?, target)
   296  		//   target = Op(load, x)
   297  		// Because target has only one use as a Phi argument, we can schedule it
   298  		// very late. Hopefully, later than the other use of x. (The other use died
   299  		// between x and target, or exists on another branch entirely).
   300  	case x.Uses > 1:
   301  		return false
   302  	}
   303  	loopnest := x.Block.Func.loopnest()
   304  	loopnest.calculateDepths()
   305  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   306  		return false
   307  	}
   308  	return canMergeLoad(target, load)
   309  }
   310  
   311  // canMergeLoad reports whether the load can be merged into target without
   312  // invalidating the schedule.
   313  func canMergeLoad(target, load *Value) bool {
   314  	if target.Block.ID != load.Block.ID {
   315  		// If the load is in a different block do not merge it.
   316  		return false
   317  	}
   318  
   319  	// We can't merge the load into the target if the load
   320  	// has more than one use.
   321  	if load.Uses != 1 {
   322  		return false
   323  	}
   324  
   325  	mem := load.MemoryArg()
   326  
   327  	// We need the load's memory arg to still be alive at target. That
   328  	// can't be the case if one of target's args depends on a memory
   329  	// state that is a successor of load's memory arg.
   330  	//
   331  	// For example, it would be invalid to merge load into target in
   332  	// the following situation because newmem has killed oldmem
   333  	// before target is reached:
   334  	//     load = read ... oldmem
   335  	//   newmem = write ... oldmem
   336  	//     arg0 = read ... newmem
   337  	//   target = add arg0 load
   338  	//
   339  	// If the argument comes from a different block then we can exclude
   340  	// it immediately because it must dominate load (which is in the
   341  	// same block as target).
   342  	var args []*Value
   343  	for _, a := range target.Args {
   344  		if a != load && a.Block.ID == target.Block.ID {
   345  			args = append(args, a)
   346  		}
   347  	}
   348  
   349  	// memPreds contains memory states known to be predecessors of load's
   350  	// memory state. It is lazily initialized.
   351  	var memPreds map[*Value]bool
   352  	for i := 0; len(args) > 0; i++ {
   353  		const limit = 100
   354  		if i >= limit {
   355  			// Give up if we have done a lot of iterations.
   356  			return false
   357  		}
   358  		v := args[len(args)-1]
   359  		args = args[:len(args)-1]
   360  		if target.Block.ID != v.Block.ID {
   361  			// Since target and load are in the same block
   362  			// we can stop searching when we leave the block.
   363  			continue
   364  		}
   365  		if v.Op == OpPhi {
   366  			// A Phi implies we have reached the top of the block.
   367  			// The memory phi, if it exists, is always
   368  			// the first logical store in the block.
   369  			continue
   370  		}
   371  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   372  			// We could handle this situation however it is likely
   373  			// to be very rare.
   374  			return false
   375  		}
   376  		if v.Op.SymEffect()&SymAddr != 0 {
   377  			// This case prevents an operation that calculates the
   378  			// address of a local variable from being forced to schedule
   379  			// before its corresponding VarDef.
   380  			// See issue 28445.
   381  			//   v1 = LOAD ...
   382  			//   v2 = VARDEF
   383  			//   v3 = LEAQ
   384  			//   v4 = CMPQ v1 v3
   385  			// We don't want to combine the CMPQ with the load, because
   386  			// that would force the CMPQ to schedule before the VARDEF, which
   387  			// in turn requires the LEAQ to schedule before the VARDEF.
   388  			return false
   389  		}
   390  		if v.Type.IsMemory() {
   391  			if memPreds == nil {
   392  				// Initialise a map containing memory states
   393  				// known to be predecessors of load's memory
   394  				// state.
   395  				memPreds = make(map[*Value]bool)
   396  				m := mem
   397  				const limit = 50
   398  				for i := 0; i < limit; i++ {
   399  					if m.Op == OpPhi {
   400  						// The memory phi, if it exists, is always
   401  						// the first logical store in the block.
   402  						break
   403  					}
   404  					if m.Block.ID != target.Block.ID {
   405  						break
   406  					}
   407  					if !m.Type.IsMemory() {
   408  						break
   409  					}
   410  					memPreds[m] = true
   411  					if len(m.Args) == 0 {
   412  						break
   413  					}
   414  					m = m.MemoryArg()
   415  				}
   416  			}
   417  
   418  			// We can merge if v is a predecessor of mem.
   419  			//
   420  			// For example, we can merge load into target in the
   421  			// following scenario:
   422  			//      x = read ... v
   423  			//    mem = write ... v
   424  			//   load = read ... mem
   425  			// target = add x load
   426  			if memPreds[v] {
   427  				continue
   428  			}
   429  			return false
   430  		}
   431  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   432  			// If v takes mem as an input then we know mem
   433  			// is valid at this point.
   434  			continue
   435  		}
   436  		for _, a := range v.Args {
   437  			if target.Block.ID == a.Block.ID {
   438  				args = append(args, a)
   439  			}
   440  		}
   441  	}
   442  
   443  	return true
   444  }
   445  
   446  // isSameCall reports whether aux is the same as the given named symbol.
   447  func isSameCall(aux Aux, name string) bool {
   448  	fn := aux.(*AuxCall).Fn
   449  	return fn != nil && fn.String() == name
   450  }
   451  
   452  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   453  func canLoadUnaligned(c *Config) bool {
   454  	return c.ctxt.Arch.Alignment == 1
   455  }
   456  
   457  // nlzX returns the number of leading zeros.
   458  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   459  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   460  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   461  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   462  
   463  // ntzX returns the number of trailing zeros.
   464  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   465  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   466  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   467  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   468  
   469  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   470  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   471  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   472  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   473  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   474  
   475  // nto returns the number of trailing ones.
   476  func nto(x int64) int64 {
   477  	return int64(ntz64(^x))
   478  }
   479  
   480  // logX returns logarithm of n base 2.
   481  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   482  func log8(n int8) int64 {
   483  	return int64(bits.Len8(uint8(n))) - 1
   484  }
   485  func log16(n int16) int64 {
   486  	return int64(bits.Len16(uint16(n))) - 1
   487  }
   488  func log32(n int32) int64 {
   489  	return int64(bits.Len32(uint32(n))) - 1
   490  }
   491  func log64(n int64) int64 {
   492  	return int64(bits.Len64(uint64(n))) - 1
   493  }
   494  
   495  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   496  // Rounds down.
   497  func log2uint32(n int64) int64 {
   498  	return int64(bits.Len32(uint32(n))) - 1
   499  }
   500  
   501  // isPowerOfTwoX functions report whether n is a power of 2.
   502  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   503  	return n > 0 && n&(n-1) == 0
   504  }
   505  
   506  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   507  func isUint64PowerOfTwo(in int64) bool {
   508  	n := uint64(in)
   509  	return n > 0 && n&(n-1) == 0
   510  }
   511  
   512  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   513  func isUint32PowerOfTwo(in int64) bool {
   514  	n := uint64(uint32(in))
   515  	return n > 0 && n&(n-1) == 0
   516  }
   517  
   518  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   519  func is32Bit(n int64) bool {
   520  	return n == int64(int32(n))
   521  }
   522  
   523  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   524  func is16Bit(n int64) bool {
   525  	return n == int64(int16(n))
   526  }
   527  
   528  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   529  func is8Bit(n int64) bool {
   530  	return n == int64(int8(n))
   531  }
   532  
   533  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   534  func isU8Bit(n int64) bool {
   535  	return n == int64(uint8(n))
   536  }
   537  
   538  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   539  func is12Bit(n int64) bool {
   540  	return -(1<<11) <= n && n < (1<<11)
   541  }
   542  
   543  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   544  func isU12Bit(n int64) bool {
   545  	return 0 <= n && n < (1<<12)
   546  }
   547  
   548  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   549  func isU16Bit(n int64) bool {
   550  	return n == int64(uint16(n))
   551  }
   552  
   553  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   554  func isU32Bit(n int64) bool {
   555  	return n == int64(uint32(n))
   556  }
   557  
   558  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   559  func is20Bit(n int64) bool {
   560  	return -(1<<19) <= n && n < (1<<19)
   561  }
   562  
   563  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   564  func b2i(b bool) int64 {
   565  	if b {
   566  		return 1
   567  	}
   568  	return 0
   569  }
   570  
   571  // b2i32 translates a boolean value to 0 or 1.
   572  func b2i32(b bool) int32 {
   573  	if b {
   574  		return 1
   575  	}
   576  	return 0
   577  }
   578  
   579  func canMulStrengthReduce(config *Config, x int64) bool {
   580  	_, ok := config.mulRecipes[x]
   581  	return ok
   582  }
   583  func canMulStrengthReduce32(config *Config, x int32) bool {
   584  	_, ok := config.mulRecipes[int64(x)]
   585  	return ok
   586  }
   587  
   588  // mulStrengthReduce returns v*x evaluated at the location
   589  // (block and source position) of m.
   590  // canMulStrengthReduce must have returned true.
   591  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   592  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   593  }
   594  
   595  // mulStrengthReduce32 returns v*x evaluated at the location
   596  // (block and source position) of m.
   597  // canMulStrengthReduce32 must have returned true.
   598  // The upper 32 bits of m might be set to junk.
   599  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   600  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   601  }
   602  
   603  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   604  // A shift is bounded if it is shifting by less than the width of the shifted value.
   605  func shiftIsBounded(v *Value) bool {
   606  	return v.AuxInt != 0
   607  }
   608  
   609  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   610  // generated code as much as possible.
   611  func canonLessThan(x, y *Value) bool {
   612  	if x.Op != y.Op {
   613  		return x.Op < y.Op
   614  	}
   615  	if !x.Pos.SameFileAndLine(y.Pos) {
   616  		return x.Pos.Before(y.Pos)
   617  	}
   618  	return x.ID < y.ID
   619  }
   620  
   621  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   622  // of the mantissa. It will panic if the truncation results in lost information.
   623  func truncate64Fto32F(f float64) float32 {
   624  	if !isExactFloat32(f) {
   625  		panic("truncate64Fto32F: truncation is not exact")
   626  	}
   627  	if !math.IsNaN(f) {
   628  		return float32(f)
   629  	}
   630  	// NaN bit patterns aren't necessarily preserved across conversion
   631  	// instructions so we need to do the conversion manually.
   632  	b := math.Float64bits(f)
   633  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   634  	//          | sign                  | exponent   | mantissa       |
   635  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   636  	return math.Float32frombits(r)
   637  }
   638  
   639  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   640  // pattern of the mantissa.
   641  func extend32Fto64F(f float32) float64 {
   642  	if !math.IsNaN(float64(f)) {
   643  		return float64(f)
   644  	}
   645  	// NaN bit patterns aren't necessarily preserved across conversion
   646  	// instructions so we need to do the conversion manually.
   647  	b := uint64(math.Float32bits(f))
   648  	//   | sign                  | exponent      | mantissa                    |
   649  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   650  	return math.Float64frombits(r)
   651  }
   652  
   653  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   654  func DivisionNeedsFixUp(v *Value) bool {
   655  	return v.AuxInt == 0
   656  }
   657  
   658  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   659  func auxFrom64F(f float64) int64 {
   660  	if f != f {
   661  		panic("can't encode a NaN in AuxInt field")
   662  	}
   663  	return int64(math.Float64bits(f))
   664  }
   665  
   666  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   667  func auxFrom32F(f float32) int64 {
   668  	if f != f {
   669  		panic("can't encode a NaN in AuxInt field")
   670  	}
   671  	return int64(math.Float64bits(extend32Fto64F(f)))
   672  }
   673  
   674  // auxTo32F decodes a float32 from the AuxInt value provided.
   675  func auxTo32F(i int64) float32 {
   676  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   677  }
   678  
   679  // auxTo64F decodes a float64 from the AuxInt value provided.
   680  func auxTo64F(i int64) float64 {
   681  	return math.Float64frombits(uint64(i))
   682  }
   683  
   684  func auxIntToBool(i int64) bool {
   685  	if i == 0 {
   686  		return false
   687  	}
   688  	return true
   689  }
   690  func auxIntToInt8(i int64) int8 {
   691  	return int8(i)
   692  }
   693  func auxIntToInt16(i int64) int16 {
   694  	return int16(i)
   695  }
   696  func auxIntToInt32(i int64) int32 {
   697  	return int32(i)
   698  }
   699  func auxIntToInt64(i int64) int64 {
   700  	return i
   701  }
   702  func auxIntToUint8(i int64) uint8 {
   703  	return uint8(i)
   704  }
   705  func auxIntToFloat32(i int64) float32 {
   706  	return float32(math.Float64frombits(uint64(i)))
   707  }
   708  func auxIntToFloat64(i int64) float64 {
   709  	return math.Float64frombits(uint64(i))
   710  }
   711  func auxIntToValAndOff(i int64) ValAndOff {
   712  	return ValAndOff(i)
   713  }
   714  func auxIntToArm64BitField(i int64) arm64BitField {
   715  	return arm64BitField(i)
   716  }
   717  func auxIntToInt128(x int64) int128 {
   718  	if x != 0 {
   719  		panic("nonzero int128 not allowed")
   720  	}
   721  	return 0
   722  }
   723  func auxIntToFlagConstant(x int64) flagConstant {
   724  	return flagConstant(x)
   725  }
   726  
   727  func auxIntToOp(cc int64) Op {
   728  	return Op(cc)
   729  }
   730  
   731  func boolToAuxInt(b bool) int64 {
   732  	if b {
   733  		return 1
   734  	}
   735  	return 0
   736  }
   737  func int8ToAuxInt(i int8) int64 {
   738  	return int64(i)
   739  }
   740  func int16ToAuxInt(i int16) int64 {
   741  	return int64(i)
   742  }
   743  func int32ToAuxInt(i int32) int64 {
   744  	return int64(i)
   745  }
   746  func int64ToAuxInt(i int64) int64 {
   747  	return int64(i)
   748  }
   749  func uint8ToAuxInt(i uint8) int64 {
   750  	return int64(int8(i))
   751  }
   752  func float32ToAuxInt(f float32) int64 {
   753  	return int64(math.Float64bits(float64(f)))
   754  }
   755  func float64ToAuxInt(f float64) int64 {
   756  	return int64(math.Float64bits(f))
   757  }
   758  func valAndOffToAuxInt(v ValAndOff) int64 {
   759  	return int64(v)
   760  }
   761  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   762  	return int64(v)
   763  }
   764  func int128ToAuxInt(x int128) int64 {
   765  	if x != 0 {
   766  		panic("nonzero int128 not allowed")
   767  	}
   768  	return 0
   769  }
   770  func flagConstantToAuxInt(x flagConstant) int64 {
   771  	return int64(x)
   772  }
   773  
   774  func opToAuxInt(o Op) int64 {
   775  	return int64(o)
   776  }
   777  
   778  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   779  type Aux interface {
   780  	CanBeAnSSAAux()
   781  }
   782  
   783  // for now only used to mark moves that need to avoid clobbering flags
   784  type auxMark bool
   785  
   786  func (auxMark) CanBeAnSSAAux() {}
   787  
   788  var AuxMark auxMark
   789  
   790  // stringAux wraps string values for use in Aux.
   791  type stringAux string
   792  
   793  func (stringAux) CanBeAnSSAAux() {}
   794  
   795  func auxToString(i Aux) string {
   796  	return string(i.(stringAux))
   797  }
   798  func auxToSym(i Aux) Sym {
   799  	// TODO: kind of a hack - allows nil interface through
   800  	s, _ := i.(Sym)
   801  	return s
   802  }
   803  func auxToType(i Aux) *types.Type {
   804  	return i.(*types.Type)
   805  }
   806  func auxToCall(i Aux) *AuxCall {
   807  	return i.(*AuxCall)
   808  }
   809  func auxToS390xCCMask(i Aux) s390x.CCMask {
   810  	return i.(s390x.CCMask)
   811  }
   812  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   813  	return i.(s390x.RotateParams)
   814  }
   815  
   816  func StringToAux(s string) Aux {
   817  	return stringAux(s)
   818  }
   819  func symToAux(s Sym) Aux {
   820  	return s
   821  }
   822  func callToAux(s *AuxCall) Aux {
   823  	return s
   824  }
   825  func typeToAux(t *types.Type) Aux {
   826  	return t
   827  }
   828  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   829  	return c
   830  }
   831  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   832  	return r
   833  }
   834  
   835  // uaddOvf reports whether unsigned a+b would overflow.
   836  func uaddOvf(a, b int64) bool {
   837  	return uint64(a)+uint64(b) < uint64(a)
   838  }
   839  
   840  // loadLSymOffset simulates reading a word at an offset into a
   841  // read-only symbol's runtime memory. If it would read a pointer to
   842  // another symbol, that symbol is returned. Otherwise, it returns nil.
   843  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   844  	if lsym.Type != objabi.SRODATA {
   845  		return nil
   846  	}
   847  
   848  	for _, r := range lsym.R {
   849  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   850  			return r.Sym
   851  		}
   852  	}
   853  
   854  	return nil
   855  }
   856  
   857  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   858  	v.Op = OpStaticLECall
   859  	auxcall := v.Aux.(*AuxCall)
   860  	auxcall.Fn = sym
   861  	// Remove first arg
   862  	v.Args[0].Uses--
   863  	copy(v.Args[0:], v.Args[1:])
   864  	v.Args[len(v.Args)-1] = nil // aid GC
   865  	v.Args = v.Args[:len(v.Args)-1]
   866  	if f := v.Block.Func; f.pass.debug > 0 {
   867  		f.Warnl(v.Pos, "de-virtualizing call")
   868  	}
   869  	return v
   870  }
   871  
   872  // isSamePtr reports whether p1 and p2 point to the same address.
   873  func isSamePtr(p1, p2 *Value) bool {
   874  	if p1 == p2 {
   875  		return true
   876  	}
   877  	if p1.Op != p2.Op {
   878  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   879  			p1 = p1.Args[0]
   880  		}
   881  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   882  			p2 = p2.Args[0]
   883  		}
   884  		if p1 == p2 {
   885  			return true
   886  		}
   887  		if p1.Op != p2.Op {
   888  			return false
   889  		}
   890  	}
   891  	switch p1.Op {
   892  	case OpOffPtr:
   893  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   894  	case OpAddr, OpLocalAddr:
   895  		return p1.Aux == p2.Aux
   896  	case OpAddPtr:
   897  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   898  	}
   899  	return false
   900  }
   901  
   902  func isStackPtr(v *Value) bool {
   903  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   904  		v = v.Args[0]
   905  	}
   906  	return v.Op == OpSP || v.Op == OpLocalAddr
   907  }
   908  
   909  // disjoint reports whether the memory region specified by [p1:p1+n1)
   910  // does not overlap with [p2:p2+n2).
   911  // A return value of false does not imply the regions overlap.
   912  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   913  	if n1 == 0 || n2 == 0 {
   914  		return true
   915  	}
   916  	if p1 == p2 {
   917  		return false
   918  	}
   919  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   920  		base, offset = ptr, 0
   921  		for base.Op == OpOffPtr {
   922  			offset += base.AuxInt
   923  			base = base.Args[0]
   924  		}
   925  		if opcodeTable[base.Op].nilCheck {
   926  			base = base.Args[0]
   927  		}
   928  		return base, offset
   929  	}
   930  
   931  	// Run types-based analysis
   932  	if disjointTypes(p1.Type, p2.Type) {
   933  		return true
   934  	}
   935  
   936  	p1, off1 := baseAndOffset(p1)
   937  	p2, off2 := baseAndOffset(p2)
   938  	if isSamePtr(p1, p2) {
   939  		return !overlap(off1, n1, off2, n2)
   940  	}
   941  	// p1 and p2 are not the same, so if they are both OpAddrs then
   942  	// they point to different variables.
   943  	// If one pointer is on the stack and the other is an argument
   944  	// then they can't overlap.
   945  	switch p1.Op {
   946  	case OpAddr, OpLocalAddr:
   947  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   948  			return true
   949  		}
   950  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   951  	case OpArg, OpArgIntReg:
   952  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   953  			return true
   954  		}
   955  	case OpSP:
   956  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   957  	}
   958  	return false
   959  }
   960  
   961  // disjointTypes reports whether a memory region pointed to by a pointer of type
   962  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   963  // based on type aliasing rules.
   964  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   965  	// Unsafe pointer can alias with anything.
   966  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   967  		return false
   968  	}
   969  
   970  	if !t1.IsPtr() || !t2.IsPtr() {
   971  		panic("disjointTypes: one of arguments is not a pointer")
   972  	}
   973  
   974  	t1 = t1.Elem()
   975  	t2 = t2.Elem()
   976  
   977  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   978  	// type.HasPointers check doesn't work for them correctly.
   979  	if t1.NotInHeap() || t2.NotInHeap() {
   980  		return false
   981  	}
   982  
   983  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   984  
   985  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   986  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   987  		(isPtrShaped(t2) && !t1.HasPointers()) {
   988  		return true
   989  	}
   990  
   991  	return false
   992  }
   993  
   994  // moveSize returns the number of bytes an aligned MOV instruction moves.
   995  func moveSize(align int64, c *Config) int64 {
   996  	switch {
   997  	case align%8 == 0 && c.PtrSize == 8:
   998  		return 8
   999  	case align%4 == 0:
  1000  		return 4
  1001  	case align%2 == 0:
  1002  		return 2
  1003  	}
  1004  	return 1
  1005  }
  1006  
  1007  // mergePoint finds a block among a's blocks which dominates b and is itself
  1008  // dominated by all of a's blocks. Returns nil if it can't find one.
  1009  // Might return nil even if one does exist.
  1010  func mergePoint(b *Block, a ...*Value) *Block {
  1011  	// Walk backward from b looking for one of the a's blocks.
  1012  
  1013  	// Max distance
  1014  	d := 100
  1015  
  1016  	for d > 0 {
  1017  		for _, x := range a {
  1018  			if b == x.Block {
  1019  				goto found
  1020  			}
  1021  		}
  1022  		if len(b.Preds) > 1 {
  1023  			// Don't know which way to go back. Abort.
  1024  			return nil
  1025  		}
  1026  		b = b.Preds[0].b
  1027  		d--
  1028  	}
  1029  	return nil // too far away
  1030  found:
  1031  	// At this point, r is the first value in a that we find by walking backwards.
  1032  	// if we return anything, r will be it.
  1033  	r := b
  1034  
  1035  	// Keep going, counting the other a's that we find. They must all dominate r.
  1036  	na := 0
  1037  	for d > 0 {
  1038  		for _, x := range a {
  1039  			if b == x.Block {
  1040  				na++
  1041  			}
  1042  		}
  1043  		if na == len(a) {
  1044  			// Found all of a in a backwards walk. We can return r.
  1045  			return r
  1046  		}
  1047  		if len(b.Preds) > 1 {
  1048  			return nil
  1049  		}
  1050  		b = b.Preds[0].b
  1051  		d--
  1052  
  1053  	}
  1054  	return nil // too far away
  1055  }
  1056  
  1057  // clobber invalidates values. Returns true.
  1058  // clobber is used by rewrite rules to:
  1059  //
  1060  //	A) make sure the values are really dead and never used again.
  1061  //	B) decrement use counts of the values' args.
  1062  func clobber(vv ...*Value) bool {
  1063  	for _, v := range vv {
  1064  		v.reset(OpInvalid)
  1065  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1066  	}
  1067  	return true
  1068  }
  1069  
  1070  // resetCopy resets v to be a copy of arg.
  1071  // Always returns true.
  1072  func resetCopy(v *Value, arg *Value) bool {
  1073  	v.reset(OpCopy)
  1074  	v.AddArg(arg)
  1075  	return true
  1076  }
  1077  
  1078  // clobberIfDead resets v when use count is 1. Returns true.
  1079  // clobberIfDead is used by rewrite rules to decrement
  1080  // use counts of v's args when v is dead and never used.
  1081  func clobberIfDead(v *Value) bool {
  1082  	if v.Uses == 1 {
  1083  		v.reset(OpInvalid)
  1084  	}
  1085  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1086  	return true
  1087  }
  1088  
  1089  // noteRule is an easy way to track if a rule is matched when writing
  1090  // new ones.  Make the rule of interest also conditional on
  1091  //
  1092  //	noteRule("note to self: rule of interest matched")
  1093  //
  1094  // and that message will print when the rule matches.
  1095  func noteRule(s string) bool {
  1096  	fmt.Println(s)
  1097  	return true
  1098  }
  1099  
  1100  // countRule increments Func.ruleMatches[key].
  1101  // If Func.ruleMatches is non-nil at the end
  1102  // of compilation, it will be printed to stdout.
  1103  // This is intended to make it easier to find which functions
  1104  // which contain lots of rules matches when developing new rules.
  1105  func countRule(v *Value, key string) bool {
  1106  	f := v.Block.Func
  1107  	if f.ruleMatches == nil {
  1108  		f.ruleMatches = make(map[string]int)
  1109  	}
  1110  	f.ruleMatches[key]++
  1111  	return true
  1112  }
  1113  
  1114  // warnRule generates compiler debug output with string s when
  1115  // v is not in autogenerated code, cond is true and the rule has fired.
  1116  func warnRule(cond bool, v *Value, s string) bool {
  1117  	if pos := v.Pos; pos.Line() > 1 && cond {
  1118  		v.Block.Func.Warnl(pos, s)
  1119  	}
  1120  	return true
  1121  }
  1122  
  1123  // for a pseudo-op like (LessThan x), extract x.
  1124  func flagArg(v *Value) *Value {
  1125  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1126  		return nil
  1127  	}
  1128  	return v.Args[0]
  1129  }
  1130  
  1131  // arm64Negate finds the complement to an ARM64 condition code,
  1132  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1133  //
  1134  // For floating point, it's more subtle because NaN is unordered. We do
  1135  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1136  func arm64Negate(op Op) Op {
  1137  	switch op {
  1138  	case OpARM64LessThan:
  1139  		return OpARM64GreaterEqual
  1140  	case OpARM64LessThanU:
  1141  		return OpARM64GreaterEqualU
  1142  	case OpARM64GreaterThan:
  1143  		return OpARM64LessEqual
  1144  	case OpARM64GreaterThanU:
  1145  		return OpARM64LessEqualU
  1146  	case OpARM64LessEqual:
  1147  		return OpARM64GreaterThan
  1148  	case OpARM64LessEqualU:
  1149  		return OpARM64GreaterThanU
  1150  	case OpARM64GreaterEqual:
  1151  		return OpARM64LessThan
  1152  	case OpARM64GreaterEqualU:
  1153  		return OpARM64LessThanU
  1154  	case OpARM64Equal:
  1155  		return OpARM64NotEqual
  1156  	case OpARM64NotEqual:
  1157  		return OpARM64Equal
  1158  	case OpARM64LessThanF:
  1159  		return OpARM64NotLessThanF
  1160  	case OpARM64NotLessThanF:
  1161  		return OpARM64LessThanF
  1162  	case OpARM64LessEqualF:
  1163  		return OpARM64NotLessEqualF
  1164  	case OpARM64NotLessEqualF:
  1165  		return OpARM64LessEqualF
  1166  	case OpARM64GreaterThanF:
  1167  		return OpARM64NotGreaterThanF
  1168  	case OpARM64NotGreaterThanF:
  1169  		return OpARM64GreaterThanF
  1170  	case OpARM64GreaterEqualF:
  1171  		return OpARM64NotGreaterEqualF
  1172  	case OpARM64NotGreaterEqualF:
  1173  		return OpARM64GreaterEqualF
  1174  	default:
  1175  		panic("unreachable")
  1176  	}
  1177  }
  1178  
  1179  // arm64Invert evaluates (InvertFlags op), which
  1180  // is the same as altering the condition codes such
  1181  // that the same result would be produced if the arguments
  1182  // to the flag-generating instruction were reversed, e.g.
  1183  // (InvertFlags (CMP x y)) -> (CMP y x)
  1184  func arm64Invert(op Op) Op {
  1185  	switch op {
  1186  	case OpARM64LessThan:
  1187  		return OpARM64GreaterThan
  1188  	case OpARM64LessThanU:
  1189  		return OpARM64GreaterThanU
  1190  	case OpARM64GreaterThan:
  1191  		return OpARM64LessThan
  1192  	case OpARM64GreaterThanU:
  1193  		return OpARM64LessThanU
  1194  	case OpARM64LessEqual:
  1195  		return OpARM64GreaterEqual
  1196  	case OpARM64LessEqualU:
  1197  		return OpARM64GreaterEqualU
  1198  	case OpARM64GreaterEqual:
  1199  		return OpARM64LessEqual
  1200  	case OpARM64GreaterEqualU:
  1201  		return OpARM64LessEqualU
  1202  	case OpARM64Equal, OpARM64NotEqual:
  1203  		return op
  1204  	case OpARM64LessThanF:
  1205  		return OpARM64GreaterThanF
  1206  	case OpARM64GreaterThanF:
  1207  		return OpARM64LessThanF
  1208  	case OpARM64LessEqualF:
  1209  		return OpARM64GreaterEqualF
  1210  	case OpARM64GreaterEqualF:
  1211  		return OpARM64LessEqualF
  1212  	case OpARM64NotLessThanF:
  1213  		return OpARM64NotGreaterThanF
  1214  	case OpARM64NotGreaterThanF:
  1215  		return OpARM64NotLessThanF
  1216  	case OpARM64NotLessEqualF:
  1217  		return OpARM64NotGreaterEqualF
  1218  	case OpARM64NotGreaterEqualF:
  1219  		return OpARM64NotLessEqualF
  1220  	default:
  1221  		panic("unreachable")
  1222  	}
  1223  }
  1224  
  1225  // evaluate an ARM64 op against a flags value
  1226  // that is potentially constant; return 1 for true,
  1227  // -1 for false, and 0 for not constant.
  1228  func ccARM64Eval(op Op, flags *Value) int {
  1229  	fop := flags.Op
  1230  	if fop == OpARM64InvertFlags {
  1231  		return -ccARM64Eval(op, flags.Args[0])
  1232  	}
  1233  	if fop != OpARM64FlagConstant {
  1234  		return 0
  1235  	}
  1236  	fc := flagConstant(flags.AuxInt)
  1237  	b2i := func(b bool) int {
  1238  		if b {
  1239  			return 1
  1240  		}
  1241  		return -1
  1242  	}
  1243  	switch op {
  1244  	case OpARM64Equal:
  1245  		return b2i(fc.eq())
  1246  	case OpARM64NotEqual:
  1247  		return b2i(fc.ne())
  1248  	case OpARM64LessThan:
  1249  		return b2i(fc.lt())
  1250  	case OpARM64LessThanU:
  1251  		return b2i(fc.ult())
  1252  	case OpARM64GreaterThan:
  1253  		return b2i(fc.gt())
  1254  	case OpARM64GreaterThanU:
  1255  		return b2i(fc.ugt())
  1256  	case OpARM64LessEqual:
  1257  		return b2i(fc.le())
  1258  	case OpARM64LessEqualU:
  1259  		return b2i(fc.ule())
  1260  	case OpARM64GreaterEqual:
  1261  		return b2i(fc.ge())
  1262  	case OpARM64GreaterEqualU:
  1263  		return b2i(fc.uge())
  1264  	}
  1265  	return 0
  1266  }
  1267  
  1268  // logRule logs the use of the rule s. This will only be enabled if
  1269  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1270  func logRule(s string) {
  1271  	if ruleFile == nil {
  1272  		// Open a log file to write log to. We open in append
  1273  		// mode because all.bash runs the compiler lots of times,
  1274  		// and we want the concatenation of all of those logs.
  1275  		// This means, of course, that users need to rm the old log
  1276  		// to get fresh data.
  1277  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1278  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1279  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1280  		if err != nil {
  1281  			panic(err)
  1282  		}
  1283  		ruleFile = w
  1284  	}
  1285  	_, err := fmt.Fprintln(ruleFile, s)
  1286  	if err != nil {
  1287  		panic(err)
  1288  	}
  1289  }
  1290  
  1291  var ruleFile io.Writer
  1292  
  1293  func isConstZero(v *Value) bool {
  1294  	switch v.Op {
  1295  	case OpConstNil:
  1296  		return true
  1297  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1298  		return v.AuxInt == 0
  1299  	case OpStringMake, OpIMake, OpComplexMake:
  1300  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1301  	case OpSliceMake:
  1302  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1303  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1304  		return isConstZero(v.Args[0])
  1305  	}
  1306  	return false
  1307  }
  1308  
  1309  // reciprocalExact64 reports whether 1/c is exactly representable.
  1310  func reciprocalExact64(c float64) bool {
  1311  	b := math.Float64bits(c)
  1312  	man := b & (1<<52 - 1)
  1313  	if man != 0 {
  1314  		return false // not a power of 2, denormal, or NaN
  1315  	}
  1316  	exp := b >> 52 & (1<<11 - 1)
  1317  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1318  	// changes the exponent to 0x7fe-exp.
  1319  	switch exp {
  1320  	case 0:
  1321  		return false // ±0
  1322  	case 0x7ff:
  1323  		return false // ±inf
  1324  	case 0x7fe:
  1325  		return false // exponent is not representable
  1326  	default:
  1327  		return true
  1328  	}
  1329  }
  1330  
  1331  // reciprocalExact32 reports whether 1/c is exactly representable.
  1332  func reciprocalExact32(c float32) bool {
  1333  	b := math.Float32bits(c)
  1334  	man := b & (1<<23 - 1)
  1335  	if man != 0 {
  1336  		return false // not a power of 2, denormal, or NaN
  1337  	}
  1338  	exp := b >> 23 & (1<<8 - 1)
  1339  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1340  	// changes the exponent to 0xfe-exp.
  1341  	switch exp {
  1342  	case 0:
  1343  		return false // ±0
  1344  	case 0xff:
  1345  		return false // ±inf
  1346  	case 0xfe:
  1347  		return false // exponent is not representable
  1348  	default:
  1349  		return true
  1350  	}
  1351  }
  1352  
  1353  // check if an immediate can be directly encoded into an ARM's instruction.
  1354  func isARMImmRot(v uint32) bool {
  1355  	for i := 0; i < 16; i++ {
  1356  		if v&^0xff == 0 {
  1357  			return true
  1358  		}
  1359  		v = v<<2 | v>>30
  1360  	}
  1361  
  1362  	return false
  1363  }
  1364  
  1365  // overlap reports whether the ranges given by the given offset and
  1366  // size pairs overlap.
  1367  func overlap(offset1, size1, offset2, size2 int64) bool {
  1368  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1369  		return true
  1370  	}
  1371  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1372  		return true
  1373  	}
  1374  	return false
  1375  }
  1376  
  1377  // check if value zeroes out upper 32-bit of 64-bit register.
  1378  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1379  // because it catches same amount of cases as 4.
  1380  func zeroUpper32Bits(x *Value, depth int) bool {
  1381  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1382  		// If the value is signed, it might get re-sign-extended
  1383  		// during spill and restore. See issue 68227.
  1384  		return false
  1385  	}
  1386  	switch x.Op {
  1387  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1388  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1389  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1390  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1391  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1392  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1393  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1394  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1395  		OpAMD64SHLL, OpAMD64SHLLconst:
  1396  		return true
  1397  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1398  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1399  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1400  		return true
  1401  	case OpArg: // note: but not ArgIntReg
  1402  		// amd64 always loads args from the stack unsigned.
  1403  		// most other architectures load them sign/zero extended based on the type.
  1404  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1405  	case OpPhi, OpSelect0, OpSelect1:
  1406  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1407  		// just limit recursion depth.
  1408  		if depth <= 0 {
  1409  			return false
  1410  		}
  1411  		for i := range x.Args {
  1412  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1413  				return false
  1414  			}
  1415  		}
  1416  		return true
  1417  
  1418  	}
  1419  	return false
  1420  }
  1421  
  1422  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1423  func zeroUpper48Bits(x *Value, depth int) bool {
  1424  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1425  		return false
  1426  	}
  1427  	switch x.Op {
  1428  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1429  		return true
  1430  	case OpArg: // note: but not ArgIntReg
  1431  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1432  	case OpPhi, OpSelect0, OpSelect1:
  1433  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1434  		// just limit recursion depth.
  1435  		if depth <= 0 {
  1436  			return false
  1437  		}
  1438  		for i := range x.Args {
  1439  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1440  				return false
  1441  			}
  1442  		}
  1443  		return true
  1444  
  1445  	}
  1446  	return false
  1447  }
  1448  
  1449  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1450  func zeroUpper56Bits(x *Value, depth int) bool {
  1451  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1452  		return false
  1453  	}
  1454  	switch x.Op {
  1455  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1456  		return true
  1457  	case OpArg: // note: but not ArgIntReg
  1458  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1459  	case OpPhi, OpSelect0, OpSelect1:
  1460  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1461  		// just limit recursion depth.
  1462  		if depth <= 0 {
  1463  			return false
  1464  		}
  1465  		for i := range x.Args {
  1466  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1467  				return false
  1468  			}
  1469  		}
  1470  		return true
  1471  
  1472  	}
  1473  	return false
  1474  }
  1475  
  1476  func isInlinableMemclr(c *Config, sz int64) bool {
  1477  	if sz < 0 {
  1478  		return false
  1479  	}
  1480  	// TODO: expand this check to allow other architectures
  1481  	// see CL 454255 and issue 56997
  1482  	switch c.arch {
  1483  	case "amd64", "arm64":
  1484  		return true
  1485  	case "ppc64le", "ppc64", "loong64":
  1486  		return sz < 512
  1487  	}
  1488  	return false
  1489  }
  1490  
  1491  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1492  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1493  // safe, either because Move will do all of its loads before any of its stores, or
  1494  // because the arguments are known to be disjoint.
  1495  // This is used as a check for replacing memmove with Move ops.
  1496  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1497  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1498  	// Move ops may or may not be faster for large sizes depending on how the platform
  1499  	// lowers them, so we only perform this optimization on platforms that we know to
  1500  	// have fast Move ops.
  1501  	switch c.arch {
  1502  	case "amd64":
  1503  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1504  	case "arm64":
  1505  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1506  	case "386":
  1507  		return sz <= 8
  1508  	case "s390x", "ppc64", "ppc64le":
  1509  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1510  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1511  		return sz <= 4
  1512  	}
  1513  	return false
  1514  }
  1515  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1516  	return isInlinableMemmove(dst, src, sz, c)
  1517  }
  1518  
  1519  // logLargeCopy logs the occurrence of a large copy.
  1520  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1521  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1522  func logLargeCopy(v *Value, s int64) bool {
  1523  	if s < 128 {
  1524  		return true
  1525  	}
  1526  	if logopt.Enabled() {
  1527  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1528  	}
  1529  	return true
  1530  }
  1531  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1532  	if s < 128 {
  1533  		return
  1534  	}
  1535  	if logopt.Enabled() {
  1536  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1537  	}
  1538  }
  1539  
  1540  // hasSmallRotate reports whether the architecture has rotate instructions
  1541  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1542  func hasSmallRotate(c *Config) bool {
  1543  	switch c.arch {
  1544  	case "amd64", "386":
  1545  		return true
  1546  	default:
  1547  		return false
  1548  	}
  1549  }
  1550  
  1551  func supportsPPC64PCRel() bool {
  1552  	// PCRel is currently supported for >= power10, linux only
  1553  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1554  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1555  }
  1556  
  1557  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1558  	if sh < 0 || sh >= sz {
  1559  		panic("PPC64 shift arg sh out of range")
  1560  	}
  1561  	if mb < 0 || mb >= sz {
  1562  		panic("PPC64 shift arg mb out of range")
  1563  	}
  1564  	if me < 0 || me >= sz {
  1565  		panic("PPC64 shift arg me out of range")
  1566  	}
  1567  	return int32(sh<<16 | mb<<8 | me)
  1568  }
  1569  
  1570  func GetPPC64Shiftsh(auxint int64) int64 {
  1571  	return int64(int8(auxint >> 16))
  1572  }
  1573  
  1574  func GetPPC64Shiftmb(auxint int64) int64 {
  1575  	return int64(int8(auxint >> 8))
  1576  }
  1577  
  1578  func GetPPC64Shiftme(auxint int64) int64 {
  1579  	return int64(int8(auxint))
  1580  }
  1581  
  1582  // Test if this value can encoded as a mask for a rlwinm like
  1583  // operation.  Masks can also extend from the msb and wrap to
  1584  // the lsb too.  That is, the valid masks are 32 bit strings
  1585  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1586  //
  1587  // Note: This ignores the upper 32 bits of the input. When a
  1588  // zero extended result is desired (e.g a 64 bit result), the
  1589  // user must verify the upper 32 bits are 0 and the mask is
  1590  // contiguous (that is, non-wrapping).
  1591  func isPPC64WordRotateMask(v64 int64) bool {
  1592  	// Isolate rightmost 1 (if none 0) and add.
  1593  	v := uint32(v64)
  1594  	vp := (v & -v) + v
  1595  	// Likewise, for the wrapping case.
  1596  	vn := ^v
  1597  	vpn := (vn & -vn) + vn
  1598  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1599  }
  1600  
  1601  // Test if this mask is a valid, contiguous bitmask which can be
  1602  // represented by a RLWNM mask and also clears the upper 32 bits
  1603  // of the register.
  1604  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1605  	// Isolate rightmost 1 (if none 0) and add.
  1606  	v := uint32(v64)
  1607  	vp := (v & -v) + v
  1608  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1609  }
  1610  
  1611  // Compress mask and shift into single value of the form
  1612  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1613  // be used to regenerate the input mask.
  1614  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1615  	var mb, me, mbn, men int
  1616  
  1617  	// Determine boundaries and then decode them
  1618  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1619  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1620  	} else if nbits == 32 {
  1621  		mb = bits.LeadingZeros32(uint32(mask))
  1622  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1623  		mbn = bits.LeadingZeros32(^uint32(mask))
  1624  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1625  	} else {
  1626  		mb = bits.LeadingZeros64(uint64(mask))
  1627  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1628  		mbn = bits.LeadingZeros64(^uint64(mask))
  1629  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1630  	}
  1631  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1632  	if mb == 0 && me == int(nbits) {
  1633  		// swap the inverted values
  1634  		mb, me = men, mbn
  1635  	}
  1636  
  1637  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1638  }
  1639  
  1640  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1641  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1642  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1643  // operations can be combined. This functions assumes the two opcodes can
  1644  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1645  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1646  	mb := s
  1647  	r := 64 - s
  1648  	// A larger mb is a smaller mask.
  1649  	if (encoded>>8)&0xFF < mb {
  1650  		encoded = (encoded &^ 0xFF00) | mb<<8
  1651  	}
  1652  	// The rotate is expected to be 0.
  1653  	if (encoded & 0xFF0000) != 0 {
  1654  		panic("non-zero rotate")
  1655  	}
  1656  	return encoded | r<<16
  1657  }
  1658  
  1659  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1660  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1661  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1662  	auxint := uint64(sauxint)
  1663  	rotate = int64((auxint >> 16) & 0xFF)
  1664  	mb = int64((auxint >> 8) & 0xFF)
  1665  	me = int64((auxint >> 0) & 0xFF)
  1666  	nbits := int64((auxint >> 24) & 0xFF)
  1667  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1668  	if mb > me {
  1669  		mask = ^mask
  1670  	}
  1671  	if nbits == 32 {
  1672  		mask = uint64(uint32(mask))
  1673  	}
  1674  
  1675  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1676  	// is inclusive.
  1677  	me = (me - 1) & (nbits - 1)
  1678  	return
  1679  }
  1680  
  1681  // This verifies that the mask is a set of
  1682  // consecutive bits including the least
  1683  // significant bit.
  1684  func isPPC64ValidShiftMask(v int64) bool {
  1685  	if (v != 0) && ((v+1)&v) == 0 {
  1686  		return true
  1687  	}
  1688  	return false
  1689  }
  1690  
  1691  func getPPC64ShiftMaskLength(v int64) int64 {
  1692  	return int64(bits.Len64(uint64(v)))
  1693  }
  1694  
  1695  // Decompose a shift right into an equivalent rotate/mask,
  1696  // and return mask & m.
  1697  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1698  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1699  	return m & int64(smask)
  1700  }
  1701  
  1702  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1703  func mergePPC64AndSrwi(m, s int64) int64 {
  1704  	mask := mergePPC64RShiftMask(m, s, 32)
  1705  	if !isPPC64WordRotateMask(mask) {
  1706  		return 0
  1707  	}
  1708  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1709  }
  1710  
  1711  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1712  func mergePPC64AndSrdi(m, s int64) int64 {
  1713  	mask := mergePPC64RShiftMask(m, s, 64)
  1714  
  1715  	// Verify the rotate and mask result only uses the lower 32 bits.
  1716  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1717  	if rv&uint64(mask) != 0 {
  1718  		return 0
  1719  	}
  1720  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1721  		return 0
  1722  	}
  1723  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1724  }
  1725  
  1726  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1727  func mergePPC64AndSldi(m, s int64) int64 {
  1728  	mask := -1 << s & m
  1729  
  1730  	// Verify the rotate and mask result only uses the lower 32 bits.
  1731  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1732  	if rv&uint64(mask) != 0 {
  1733  		return 0
  1734  	}
  1735  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1736  		return 0
  1737  	}
  1738  	return encodePPC64RotateMask(s&31, mask, 32)
  1739  }
  1740  
  1741  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1742  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1743  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1744  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1745  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1746  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1747  
  1748  	// Rewrite mask to apply after the final left shift.
  1749  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1750  
  1751  	r_1 := 32 - srw
  1752  	r_2 := GetPPC64Shiftsh(sld)
  1753  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1754  
  1755  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1756  		return 0
  1757  	}
  1758  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1759  }
  1760  
  1761  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1762  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1763  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1764  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1765  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1766  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1767  
  1768  	// Rewrite mask to apply after the final left shift.
  1769  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1770  
  1771  	r_1 := 64 - srd
  1772  	r_2 := GetPPC64Shiftsh(sld)
  1773  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1774  
  1775  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1776  		return 0
  1777  	}
  1778  	// This combine only works when selecting and shifting the lower 32 bits.
  1779  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1780  	if v1&mask_3 != 0 {
  1781  		return 0
  1782  	}
  1783  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1784  }
  1785  
  1786  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1787  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1788  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1789  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1790  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1791  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1792  
  1793  	// combine the masks, and adjust for the final left shift.
  1794  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1795  	r_2 := GetPPC64Shiftsh(int64(sld))
  1796  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1797  
  1798  	// Verify the result is still a valid bitmask of <= 32 bits.
  1799  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1800  		return 0
  1801  	}
  1802  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1803  }
  1804  
  1805  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1806  // or 0 if they cannot be merged.
  1807  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1808  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1809  	mask_out := (mask_rlw & uint64(mask))
  1810  
  1811  	// Verify the result is still a valid bitmask of <= 32 bits.
  1812  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1813  		return 0
  1814  	}
  1815  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1816  }
  1817  
  1818  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1819  // result. Return rlw if it does, 0 otherwise.
  1820  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1821  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1822  	if mb > me {
  1823  		return 0
  1824  	}
  1825  	return rlw
  1826  }
  1827  
  1828  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1829  // or 0 if they cannot be merged.
  1830  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1831  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1832  
  1833  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1834  	r_mask := bits.RotateLeft32(mask, int(r))
  1835  
  1836  	mask_out := (mask_rlw & uint64(r_mask))
  1837  
  1838  	// Verify the result is still a valid bitmask of <= 32 bits.
  1839  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1840  		return 0
  1841  	}
  1842  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1843  }
  1844  
  1845  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1846  // or 0 if they cannot be merged.
  1847  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1848  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1849  	if mb > me || mb < sldi {
  1850  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1851  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1852  		return 0
  1853  	}
  1854  	// combine the masks, and adjust for the final left shift.
  1855  	mask_3 := mask_1 << sldi
  1856  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1857  
  1858  	// Verify the result is still a valid bitmask of <= 32 bits.
  1859  	if uint64(uint32(mask_3)) != mask_3 {
  1860  		return 0
  1861  	}
  1862  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1863  }
  1864  
  1865  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1866  // or return 0 if they cannot be combined.
  1867  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1868  	if sld > srw || srw >= 32 {
  1869  		return 0
  1870  	}
  1871  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1872  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1873  	mask := (mask_r & mask_l) << uint(sld)
  1874  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1875  }
  1876  
  1877  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1878  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1879  // of op.
  1880  //
  1881  // E.g consider the case:
  1882  // a = (ADD x y)
  1883  // b = (CMPconst [0] a)
  1884  // c = (OR a z)
  1885  //
  1886  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1887  // would produce:
  1888  // a  = (ADD x y)
  1889  // a' = (ADDCC x y)
  1890  // a” = (Select0 a')
  1891  // b  = (CMPconst [0] a”)
  1892  // c  = (OR a z)
  1893  //
  1894  // which makes it impossible to rewrite the second user. Instead the result
  1895  // of this conversion is:
  1896  // a' = (ADDCC x y)
  1897  // a  = (Select0 a')
  1898  // b  = (CMPconst [0] a)
  1899  // c  = (OR a z)
  1900  //
  1901  // Which makes it trivial to rewrite b using a lowering rule.
  1902  func convertPPC64OpToOpCC(op *Value) *Value {
  1903  	ccOpMap := map[Op]Op{
  1904  		OpPPC64ADD:      OpPPC64ADDCC,
  1905  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1906  		OpPPC64AND:      OpPPC64ANDCC,
  1907  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1908  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1909  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1910  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1911  		OpPPC64NEG:      OpPPC64NEGCC,
  1912  		OpPPC64NOR:      OpPPC64NORCC,
  1913  		OpPPC64OR:       OpPPC64ORCC,
  1914  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1915  		OpPPC64SUB:      OpPPC64SUBCC,
  1916  		OpPPC64XOR:      OpPPC64XORCC,
  1917  	}
  1918  	b := op.Block
  1919  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1920  	opCC.AddArgs(op.Args...)
  1921  	op.reset(OpSelect0)
  1922  	op.AddArgs(opCC)
  1923  	return op
  1924  }
  1925  
  1926  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1927  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1928  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1929  	if r != 0 || mask&0xFFFF != mask {
  1930  		return 0
  1931  	}
  1932  	return int64(mask)
  1933  }
  1934  
  1935  // Convenience function to rotate a 32 bit constant value by another constant.
  1936  func rotateLeft32(v, rotate int64) int64 {
  1937  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1938  }
  1939  
  1940  func rotateRight64(v, rotate int64) int64 {
  1941  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1942  }
  1943  
  1944  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1945  func armBFAuxInt(lsb, width int64) arm64BitField {
  1946  	if lsb < 0 || lsb > 63 {
  1947  		panic("ARM(64) bit field lsb constant out of range")
  1948  	}
  1949  	if width < 1 || lsb+width > 64 {
  1950  		panic("ARM(64) bit field width constant out of range")
  1951  	}
  1952  	return arm64BitField(width | lsb<<8)
  1953  }
  1954  
  1955  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1956  func (bfc arm64BitField) lsb() int64 {
  1957  	return int64(uint64(bfc) >> 8)
  1958  }
  1959  
  1960  // returns the width part of the auxInt field of arm64 bitfield ops.
  1961  func (bfc arm64BitField) width() int64 {
  1962  	return int64(bfc) & 0xff
  1963  }
  1964  
  1965  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1966  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1967  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1968  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1969  }
  1970  
  1971  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1972  func arm64BFWidth(mask, rshift int64) int64 {
  1973  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1974  	if shiftedMask == 0 {
  1975  		panic("ARM64 BF mask is zero")
  1976  	}
  1977  	return nto(shiftedMask)
  1978  }
  1979  
  1980  // registerizable reports whether t is a primitive type that fits in
  1981  // a register. It assumes float64 values will always fit into registers
  1982  // even if that isn't strictly true.
  1983  func registerizable(b *Block, typ *types.Type) bool {
  1984  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1985  		return true
  1986  	}
  1987  	if typ.IsInteger() {
  1988  		return typ.Size() <= b.Func.Config.RegSize
  1989  	}
  1990  	return false
  1991  }
  1992  
  1993  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1994  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1995  	f := v.Block.Func
  1996  	if !f.Config.Race {
  1997  		return false
  1998  	}
  1999  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2000  		return false
  2001  	}
  2002  	for _, b := range f.Blocks {
  2003  		for _, v := range b.Values {
  2004  			switch v.Op {
  2005  			case OpStaticCall, OpStaticLECall:
  2006  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2007  				// Allow calls to panic*
  2008  				s := v.Aux.(*AuxCall).Fn.String()
  2009  				switch s {
  2010  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2011  					"runtime.panicdivide", "runtime.panicwrap",
  2012  					"runtime.panicshift":
  2013  					continue
  2014  				}
  2015  				// If we encountered any call, we need to keep racefunc*,
  2016  				// for accurate stacktraces.
  2017  				return false
  2018  			case OpPanicBounds, OpPanicExtend:
  2019  				// Note: these are panic generators that are ok (like the static calls above).
  2020  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2021  				// We must keep the race functions if there are any other call types.
  2022  				return false
  2023  			}
  2024  		}
  2025  	}
  2026  	if isSameCall(sym, "runtime.racefuncenter") {
  2027  		// TODO REGISTER ABI this needs to be cleaned up.
  2028  		// If we're removing racefuncenter, remove its argument as well.
  2029  		if v.Args[0].Op != OpStore {
  2030  			if v.Op == OpStaticLECall {
  2031  				// there is no store, yet.
  2032  				return true
  2033  			}
  2034  			return false
  2035  		}
  2036  		mem := v.Args[0].Args[2]
  2037  		v.Args[0].reset(OpCopy)
  2038  		v.Args[0].AddArg(mem)
  2039  	}
  2040  	return true
  2041  }
  2042  
  2043  // symIsRO reports whether sym is a read-only global.
  2044  func symIsRO(sym Sym) bool {
  2045  	lsym := sym.(*obj.LSym)
  2046  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2047  }
  2048  
  2049  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2050  func symIsROZero(sym Sym) bool {
  2051  	lsym := sym.(*obj.LSym)
  2052  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2053  		return false
  2054  	}
  2055  	for _, b := range lsym.P {
  2056  		if b != 0 {
  2057  			return false
  2058  		}
  2059  	}
  2060  	return true
  2061  }
  2062  
  2063  // isFixed32 returns true if the int32 at offset off in symbol sym
  2064  // is known and constant.
  2065  func isFixed32(c *Config, sym Sym, off int64) bool {
  2066  	return isFixed(c, sym, off, 4)
  2067  }
  2068  
  2069  // isFixed returns true if the range [off,off+size] of the symbol sym
  2070  // is known and constant.
  2071  func isFixed(c *Config, sym Sym, off, size int64) bool {
  2072  	lsym := sym.(*obj.LSym)
  2073  	if lsym.Extra == nil {
  2074  		return false
  2075  	}
  2076  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2077  		if off == 2*c.PtrSize && size == 4 {
  2078  			return true // type hash field
  2079  		}
  2080  	}
  2081  	return false
  2082  }
  2083  func fixed32(c *Config, sym Sym, off int64) int32 {
  2084  	lsym := sym.(*obj.LSym)
  2085  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2086  		if off == 2*c.PtrSize {
  2087  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  2088  		}
  2089  	}
  2090  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  2091  	return 0
  2092  }
  2093  
  2094  // isFixedSym returns true if the contents of sym at the given offset
  2095  // is known and is the constant address of another symbol.
  2096  func isFixedSym(sym Sym, off int64) bool {
  2097  	lsym := sym.(*obj.LSym)
  2098  	switch {
  2099  	case lsym.Type == objabi.SRODATA:
  2100  		// itabs, dictionaries
  2101  	default:
  2102  		return false
  2103  	}
  2104  	for _, r := range lsym.R {
  2105  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2106  			return true
  2107  		}
  2108  	}
  2109  	return false
  2110  }
  2111  func fixedSym(f *Func, sym Sym, off int64) Sym {
  2112  	lsym := sym.(*obj.LSym)
  2113  	for _, r := range lsym.R {
  2114  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2115  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2116  				// In case we're loading a type out of a dictionary, we need to record
  2117  				// that the containing function might put that type in an interface.
  2118  				// That information is currently recorded in relocations in the dictionary,
  2119  				// but if we perform this load at compile time then the dictionary
  2120  				// might be dead.
  2121  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2122  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2123  				// Same, but if we're using an itab we need to record that the
  2124  				// itab._type might be put in an interface.
  2125  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2126  			}
  2127  			return r.Sym
  2128  		}
  2129  	}
  2130  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2131  	return nil
  2132  }
  2133  
  2134  // read8 reads one byte from the read-only global sym at offset off.
  2135  func read8(sym Sym, off int64) uint8 {
  2136  	lsym := sym.(*obj.LSym)
  2137  	if off >= int64(len(lsym.P)) || off < 0 {
  2138  		// Invalid index into the global sym.
  2139  		// This can happen in dead code, so we don't want to panic.
  2140  		// Just return any value, it will eventually get ignored.
  2141  		// See issue 29215.
  2142  		return 0
  2143  	}
  2144  	return lsym.P[off]
  2145  }
  2146  
  2147  // read16 reads two bytes from the read-only global sym at offset off.
  2148  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2149  	lsym := sym.(*obj.LSym)
  2150  	// lsym.P is written lazily.
  2151  	// Bytes requested after the end of lsym.P are 0.
  2152  	var src []byte
  2153  	if 0 <= off && off < int64(len(lsym.P)) {
  2154  		src = lsym.P[off:]
  2155  	}
  2156  	buf := make([]byte, 2)
  2157  	copy(buf, src)
  2158  	return byteorder.Uint16(buf)
  2159  }
  2160  
  2161  // read32 reads four bytes from the read-only global sym at offset off.
  2162  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2163  	lsym := sym.(*obj.LSym)
  2164  	var src []byte
  2165  	if 0 <= off && off < int64(len(lsym.P)) {
  2166  		src = lsym.P[off:]
  2167  	}
  2168  	buf := make([]byte, 4)
  2169  	copy(buf, src)
  2170  	return byteorder.Uint32(buf)
  2171  }
  2172  
  2173  // read64 reads eight bytes from the read-only global sym at offset off.
  2174  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2175  	lsym := sym.(*obj.LSym)
  2176  	var src []byte
  2177  	if 0 <= off && off < int64(len(lsym.P)) {
  2178  		src = lsym.P[off:]
  2179  	}
  2180  	buf := make([]byte, 8)
  2181  	copy(buf, src)
  2182  	return byteorder.Uint64(buf)
  2183  }
  2184  
  2185  // sequentialAddresses reports true if it can prove that x + n == y
  2186  func sequentialAddresses(x, y *Value, n int64) bool {
  2187  	if x == y && n == 0 {
  2188  		return true
  2189  	}
  2190  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2191  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2192  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2193  		return true
  2194  	}
  2195  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2196  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2197  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2198  		return true
  2199  	}
  2200  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2201  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2202  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2203  		return true
  2204  	}
  2205  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2206  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2207  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2208  		return true
  2209  	}
  2210  	return false
  2211  }
  2212  
  2213  // flagConstant represents the result of a compile-time comparison.
  2214  // The sense of these flags does not necessarily represent the hardware's notion
  2215  // of a flags register - these are just a compile-time construct.
  2216  // We happen to match the semantics to those of arm/arm64.
  2217  // Note that these semantics differ from x86: the carry flag has the opposite
  2218  // sense on a subtraction!
  2219  //
  2220  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2221  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2222  //	 (because it does x + ^y + C).
  2223  //
  2224  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2225  type flagConstant uint8
  2226  
  2227  // N reports whether the result of an operation is negative (high bit set).
  2228  func (fc flagConstant) N() bool {
  2229  	return fc&1 != 0
  2230  }
  2231  
  2232  // Z reports whether the result of an operation is 0.
  2233  func (fc flagConstant) Z() bool {
  2234  	return fc&2 != 0
  2235  }
  2236  
  2237  // C reports whether an unsigned add overflowed (carry), or an
  2238  // unsigned subtract did not underflow (borrow).
  2239  func (fc flagConstant) C() bool {
  2240  	return fc&4 != 0
  2241  }
  2242  
  2243  // V reports whether a signed operation overflowed or underflowed.
  2244  func (fc flagConstant) V() bool {
  2245  	return fc&8 != 0
  2246  }
  2247  
  2248  func (fc flagConstant) eq() bool {
  2249  	return fc.Z()
  2250  }
  2251  func (fc flagConstant) ne() bool {
  2252  	return !fc.Z()
  2253  }
  2254  func (fc flagConstant) lt() bool {
  2255  	return fc.N() != fc.V()
  2256  }
  2257  func (fc flagConstant) le() bool {
  2258  	return fc.Z() || fc.lt()
  2259  }
  2260  func (fc flagConstant) gt() bool {
  2261  	return !fc.Z() && fc.ge()
  2262  }
  2263  func (fc flagConstant) ge() bool {
  2264  	return fc.N() == fc.V()
  2265  }
  2266  func (fc flagConstant) ult() bool {
  2267  	return !fc.C()
  2268  }
  2269  func (fc flagConstant) ule() bool {
  2270  	return fc.Z() || fc.ult()
  2271  }
  2272  func (fc flagConstant) ugt() bool {
  2273  	return !fc.Z() && fc.uge()
  2274  }
  2275  func (fc flagConstant) uge() bool {
  2276  	return fc.C()
  2277  }
  2278  
  2279  func (fc flagConstant) ltNoov() bool {
  2280  	return fc.lt() && !fc.V()
  2281  }
  2282  func (fc flagConstant) leNoov() bool {
  2283  	return fc.le() && !fc.V()
  2284  }
  2285  func (fc flagConstant) gtNoov() bool {
  2286  	return fc.gt() && !fc.V()
  2287  }
  2288  func (fc flagConstant) geNoov() bool {
  2289  	return fc.ge() && !fc.V()
  2290  }
  2291  
  2292  func (fc flagConstant) String() string {
  2293  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2294  }
  2295  
  2296  type flagConstantBuilder struct {
  2297  	N bool
  2298  	Z bool
  2299  	C bool
  2300  	V bool
  2301  }
  2302  
  2303  func (fcs flagConstantBuilder) encode() flagConstant {
  2304  	var fc flagConstant
  2305  	if fcs.N {
  2306  		fc |= 1
  2307  	}
  2308  	if fcs.Z {
  2309  		fc |= 2
  2310  	}
  2311  	if fcs.C {
  2312  		fc |= 4
  2313  	}
  2314  	if fcs.V {
  2315  		fc |= 8
  2316  	}
  2317  	return fc
  2318  }
  2319  
  2320  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2321  //  - the results of the C flag are different
  2322  //  - the results of the V flag when y==minint are different
  2323  
  2324  // addFlags64 returns the flags that would be set from computing x+y.
  2325  func addFlags64(x, y int64) flagConstant {
  2326  	var fcb flagConstantBuilder
  2327  	fcb.Z = x+y == 0
  2328  	fcb.N = x+y < 0
  2329  	fcb.C = uint64(x+y) < uint64(x)
  2330  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2331  	return fcb.encode()
  2332  }
  2333  
  2334  // subFlags64 returns the flags that would be set from computing x-y.
  2335  func subFlags64(x, y int64) flagConstant {
  2336  	var fcb flagConstantBuilder
  2337  	fcb.Z = x-y == 0
  2338  	fcb.N = x-y < 0
  2339  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2340  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2341  	return fcb.encode()
  2342  }
  2343  
  2344  // addFlags32 returns the flags that would be set from computing x+y.
  2345  func addFlags32(x, y int32) flagConstant {
  2346  	var fcb flagConstantBuilder
  2347  	fcb.Z = x+y == 0
  2348  	fcb.N = x+y < 0
  2349  	fcb.C = uint32(x+y) < uint32(x)
  2350  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2351  	return fcb.encode()
  2352  }
  2353  
  2354  // subFlags32 returns the flags that would be set from computing x-y.
  2355  func subFlags32(x, y int32) flagConstant {
  2356  	var fcb flagConstantBuilder
  2357  	fcb.Z = x-y == 0
  2358  	fcb.N = x-y < 0
  2359  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2360  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2361  	return fcb.encode()
  2362  }
  2363  
  2364  // logicFlags64 returns flags set to the sign/zeroness of x.
  2365  // C and V are set to false.
  2366  func logicFlags64(x int64) flagConstant {
  2367  	var fcb flagConstantBuilder
  2368  	fcb.Z = x == 0
  2369  	fcb.N = x < 0
  2370  	return fcb.encode()
  2371  }
  2372  
  2373  // logicFlags32 returns flags set to the sign/zeroness of x.
  2374  // C and V are set to false.
  2375  func logicFlags32(x int32) flagConstant {
  2376  	var fcb flagConstantBuilder
  2377  	fcb.Z = x == 0
  2378  	fcb.N = x < 0
  2379  	return fcb.encode()
  2380  }
  2381  
  2382  func makeJumpTableSym(b *Block) *obj.LSym {
  2383  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2384  	// The jump table symbol is accessed only from the function symbol.
  2385  	s.Set(obj.AttrStatic, true)
  2386  	return s
  2387  }
  2388  
  2389  // canRotate reports whether the architecture supports
  2390  // rotates of integer registers with the given number of bits.
  2391  func canRotate(c *Config, bits int64) bool {
  2392  	if bits > c.PtrSize*8 {
  2393  		// Don't rewrite to rotates bigger than the machine word.
  2394  		return false
  2395  	}
  2396  	switch c.arch {
  2397  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2398  		return true
  2399  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2400  		return bits >= 32
  2401  	default:
  2402  		return false
  2403  	}
  2404  }
  2405  
  2406  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2407  func isARM64bitcon(x uint64) bool {
  2408  	if x == 1<<64-1 || x == 0 {
  2409  		return false
  2410  	}
  2411  	// determine the period and sign-extend a unit to 64 bits
  2412  	switch {
  2413  	case x != x>>32|x<<32:
  2414  		// period is 64
  2415  		// nothing to do
  2416  	case x != x>>16|x<<48:
  2417  		// period is 32
  2418  		x = uint64(int64(int32(x)))
  2419  	case x != x>>8|x<<56:
  2420  		// period is 16
  2421  		x = uint64(int64(int16(x)))
  2422  	case x != x>>4|x<<60:
  2423  		// period is 8
  2424  		x = uint64(int64(int8(x)))
  2425  	default:
  2426  		// period is 4 or 2, always true
  2427  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2428  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2429  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2430  		// 0101, 1010             -- 01   rotate, repeat
  2431  		return true
  2432  	}
  2433  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2434  }
  2435  
  2436  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2437  func sequenceOfOnes(x uint64) bool {
  2438  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2439  	y += x
  2440  	return (y-1)&y == 0
  2441  }
  2442  
  2443  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2444  func isARM64addcon(v int64) bool {
  2445  	/* uimm12 or uimm24? */
  2446  	if v < 0 {
  2447  		return false
  2448  	}
  2449  	if (v & 0xFFF) == 0 {
  2450  		v >>= 12
  2451  	}
  2452  	return v <= 0xFFF
  2453  }
  2454  
  2455  // setPos sets the position of v to pos, then returns true.
  2456  // Useful for setting the result of a rewrite's position to
  2457  // something other than the default.
  2458  func setPos(v *Value, pos src.XPos) bool {
  2459  	v.Pos = pos
  2460  	return true
  2461  }
  2462  
  2463  // isNonNegative reports whether v is known to be greater or equal to zero.
  2464  // Note that this is pretty simplistic. The prove pass generates more detailed
  2465  // nonnegative information about values.
  2466  func isNonNegative(v *Value) bool {
  2467  	if !v.Type.IsInteger() {
  2468  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2469  	}
  2470  	// TODO: return true if !v.Type.IsSigned()
  2471  	// SSA isn't type-safe enough to do that now (issue 37753).
  2472  	// The checks below depend only on the pattern of bits.
  2473  
  2474  	switch v.Op {
  2475  	case OpConst64:
  2476  		return v.AuxInt >= 0
  2477  
  2478  	case OpConst32:
  2479  		return int32(v.AuxInt) >= 0
  2480  
  2481  	case OpConst16:
  2482  		return int16(v.AuxInt) >= 0
  2483  
  2484  	case OpConst8:
  2485  		return int8(v.AuxInt) >= 0
  2486  
  2487  	case OpStringLen, OpSliceLen, OpSliceCap,
  2488  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2489  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2490  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2491  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2492  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2493  		return true
  2494  
  2495  	case OpRsh64Ux64, OpRsh32Ux64:
  2496  		by := v.Args[1]
  2497  		return by.Op == OpConst64 && by.AuxInt > 0
  2498  
  2499  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2500  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2501  		return isNonNegative(v.Args[0])
  2502  
  2503  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2504  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2505  
  2506  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2507  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2508  		OpOr64, OpOr32, OpOr16, OpOr8,
  2509  		OpXor64, OpXor32, OpXor16, OpXor8:
  2510  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2511  
  2512  		// We could handle OpPhi here, but the improvements from doing
  2513  		// so are very minor, and it is neither simple nor cheap.
  2514  	}
  2515  	return false
  2516  }
  2517  
  2518  func rewriteStructLoad(v *Value) *Value {
  2519  	b := v.Block
  2520  	ptr := v.Args[0]
  2521  	mem := v.Args[1]
  2522  
  2523  	t := v.Type
  2524  	args := make([]*Value, t.NumFields())
  2525  	for i := range args {
  2526  		ft := t.FieldType(i)
  2527  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2528  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2529  	}
  2530  
  2531  	v.reset(OpStructMake)
  2532  	v.AddArgs(args...)
  2533  	return v
  2534  }
  2535  
  2536  func rewriteStructStore(v *Value) *Value {
  2537  	b := v.Block
  2538  	dst := v.Args[0]
  2539  	x := v.Args[1]
  2540  	if x.Op != OpStructMake {
  2541  		base.Fatalf("invalid struct store: %v", x)
  2542  	}
  2543  	mem := v.Args[2]
  2544  
  2545  	t := x.Type
  2546  	for i, arg := range x.Args {
  2547  		ft := t.FieldType(i)
  2548  
  2549  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2550  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2551  	}
  2552  
  2553  	return mem
  2554  }
  2555  
  2556  // isDirectType reports whether v represents a type
  2557  // (a *runtime._type) whose value is stored directly in an
  2558  // interface (i.e., is pointer or pointer-like).
  2559  func isDirectType(v *Value) bool {
  2560  	return isDirectType1(v)
  2561  }
  2562  
  2563  // v is a type
  2564  func isDirectType1(v *Value) bool {
  2565  	switch v.Op {
  2566  	case OpITab:
  2567  		return isDirectType2(v.Args[0])
  2568  	case OpAddr:
  2569  		lsym := v.Aux.(*obj.LSym)
  2570  		if lsym.Extra == nil {
  2571  			return false
  2572  		}
  2573  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2574  			return types.IsDirectIface(ti.Type.(*types.Type))
  2575  		}
  2576  	}
  2577  	return false
  2578  }
  2579  
  2580  // v is an empty interface
  2581  func isDirectType2(v *Value) bool {
  2582  	switch v.Op {
  2583  	case OpIMake:
  2584  		return isDirectType1(v.Args[0])
  2585  	}
  2586  	return false
  2587  }
  2588  
  2589  // isDirectIface reports whether v represents an itab
  2590  // (a *runtime._itab) for a type whose value is stored directly
  2591  // in an interface (i.e., is pointer or pointer-like).
  2592  func isDirectIface(v *Value) bool {
  2593  	return isDirectIface1(v, 9)
  2594  }
  2595  
  2596  // v is an itab
  2597  func isDirectIface1(v *Value, depth int) bool {
  2598  	if depth == 0 {
  2599  		return false
  2600  	}
  2601  	switch v.Op {
  2602  	case OpITab:
  2603  		return isDirectIface2(v.Args[0], depth-1)
  2604  	case OpAddr:
  2605  		lsym := v.Aux.(*obj.LSym)
  2606  		if lsym.Extra == nil {
  2607  			return false
  2608  		}
  2609  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2610  			return types.IsDirectIface(ii.Type.(*types.Type))
  2611  		}
  2612  	case OpConstNil:
  2613  		// We can treat this as direct, because if the itab is
  2614  		// nil, the data field must be nil also.
  2615  		return true
  2616  	}
  2617  	return false
  2618  }
  2619  
  2620  // v is an interface
  2621  func isDirectIface2(v *Value, depth int) bool {
  2622  	if depth == 0 {
  2623  		return false
  2624  	}
  2625  	switch v.Op {
  2626  	case OpIMake:
  2627  		return isDirectIface1(v.Args[0], depth-1)
  2628  	case OpPhi:
  2629  		for _, a := range v.Args {
  2630  			if !isDirectIface2(a, depth-1) {
  2631  				return false
  2632  			}
  2633  		}
  2634  		return true
  2635  	}
  2636  	return false
  2637  }
  2638  
  2639  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2640  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2641  	r.sum, r.carry = int64(s), int64(c)
  2642  	return
  2643  }
  2644  
  2645  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2646  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2647  	r.hi, r.lo = int64(hi), int64(lo)
  2648  	return
  2649  }
  2650  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2651  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2652  	r.hi, r.lo = int32(hi), int32(lo)
  2653  	return
  2654  }
  2655  
  2656  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2657  func flagify(v *Value) bool {
  2658  	var flagVersion Op
  2659  	switch v.Op {
  2660  	case OpAMD64ADDQconst:
  2661  		flagVersion = OpAMD64ADDQconstflags
  2662  	case OpAMD64ADDLconst:
  2663  		flagVersion = OpAMD64ADDLconstflags
  2664  	default:
  2665  		base.Fatalf("can't flagify op %s", v.Op)
  2666  	}
  2667  	inner := v.copyInto(v.Block)
  2668  	inner.Op = flagVersion
  2669  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2670  	v.reset(OpSelect0)
  2671  	v.AddArg(inner)
  2672  	return true
  2673  }
  2674
View as plain text