Source file src/cmd/internal/obj/arm64/inst.go

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"cmd/internal/obj"
     9  	"fmt"
    10  	"iter"
    11  	"math"
    12  	"math/bits"
    13  )
    14  
    15  // instEncoder represents an instruction encoder.
    16  type instEncoder struct {
    17  	goOp      obj.As    // Go opcode mnemonic
    18  	fixedBits uint32    // Known bits
    19  	args      []operand // Operands, in Go order
    20  }
    21  
    22  type varBits struct {
    23  	// The low and high bit index in the binary encoding, exclusive on hi
    24  	lo, hi  int
    25  	encoded bool // If true then its value is already encoded
    26  	bits    uint32
    27  }
    28  
    29  // component is the component of an binary encoding.
    30  // e.g. for operand <Zda>.<T>, <T>'s encoding function might be described as:
    31  //
    32  //	For the "Byte and halfword" variant: is the size specifier,
    33  //	sz	<T>
    34  //	0	B
    35  //	1	H
    36  //	bit range mappings:
    37  //	sz: [22:23)
    38  //
    39  // Then sz is the component of the binary encoding.
    40  type component uint16
    41  
    42  type elemEncoder struct {
    43  	fn func(uint32) (uint32, bool)
    44  	// comp is the component of the binary encoding.
    45  	comp component
    46  }
    47  
    48  // operand is the operand type of an instruction.
    49  type operand struct {
    50  	class AClass // Operand class, register, constant, memory operation etc.
    51  	// The elements that this operand includes, this only includes the encoding-related parts
    52  	// They are represented as a list of pointers to the encoding functions.
    53  	// The first returned value is the encoded binary, the second is the ok signal.
    54  	// The encoding functions return the ok signal for deduplication purposes:
    55  	// For example:
    56  	//	SDOT  <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
    57  	//	SDOT  <Zda>.H, <Zn>.B, <Zm>.B
    58  	//	SDOT  <Zda>.S, <Zn>.H, <Zm>.H
    59  	//
    60  	// <T> and <Tb> are specified in the encoding text, that there is a constraint "T = 4*Tb".
    61  	// We don't know this fact by looking at the encoding format solely, without this information
    62  	// the first encoding domain entails the other 2. And at instruction matching phase we simply
    63  	// cannot deduplicate them. So we defer this deduplication to the encoding phase.
    64  	// We need the ok signal with [elemEncoder.comp] field to deduplicate them.
    65  	elemEncoders []elemEncoder
    66  }
    67  
    68  // opsInProg returns an iterator over the operands ([Addr]) of p
    69  func opsInProg(p *obj.Prog) iter.Seq[*obj.Addr] {
    70  	return func(yield func(*obj.Addr) bool) {
    71  		// Go order: From, Reg, RestArgs..., To
    72  		// For SVE, Reg is unused as it's so common that registers have arrangements.
    73  		if p.From.Type != obj.TYPE_NONE {
    74  			if !yield(&p.From) {
    75  				return
    76  			}
    77  		}
    78  		for j := range p.RestArgs {
    79  			if !yield(&p.RestArgs[j].Addr) {
    80  				return
    81  			}
    82  		}
    83  		if p.To.Type != obj.TYPE_NONE {
    84  			if !yield(&p.To) {
    85  				return
    86  			}
    87  		}
    88  	}
    89  }
    90  
    91  // aclass returns the AClass of an Addr.
    92  func aclass(a *obj.Addr) AClass {
    93  	if a.Type == obj.TYPE_REG {
    94  		if a.Offset&(int64(1)<<62) != 0 {
    95  			return AC_PREGSEL
    96  		}
    97  		if a.Reg >= REG_Z0 && a.Reg <= REG_Z31 {
    98  			return AC_ZREG
    99  		}
   100  		if a.Reg >= REG_P0 && a.Reg <= REG_PN15 {
   101  			return AC_PREG
   102  		}
   103  		if a.Reg >= REG_ARNG && a.Reg < REG_ELEM {
   104  			return AC_ARNG
   105  		}
   106  		if a.Reg >= REG_ZARNG && a.Reg < REG_ZARNGELEM {
   107  			return AC_ARNG
   108  		}
   109  		if a.Reg >= REG_ZARNGELEM && a.Reg < REG_PZELEM {
   110  			return AC_ARNGIDX
   111  		}
   112  		if a.Reg >= REG_PZELEM && a.Reg < REG_PARNGZM {
   113  			if a.Reg&(1<<5) == 0 {
   114  				return AC_ZREGIDX
   115  			} else {
   116  				return AC_PREGIDX
   117  			}
   118  		}
   119  		if a.Reg >= REG_PARNGZM && a.Reg < REG_PARNGZM_END {
   120  			switch (a.Reg >> 5) & 15 {
   121  			case PRED_M, PRED_Z:
   122  				return AC_PREGZM
   123  			default:
   124  				return AC_ARNG
   125  			}
   126  		}
   127  		if a.Reg >= REG_V0 && a.Reg <= REG_V31 {
   128  			return AC_VREG
   129  		}
   130  		if a.Reg >= REG_R0 && a.Reg <= REG_R31 || a.Reg == REG_RSP {
   131  			return AC_SPZGREG
   132  		}
   133  	}
   134  	if a.Type == obj.TYPE_CONST || a.Type == obj.TYPE_FCONST {
   135  		return AC_IMM
   136  	}
   137  	if a.Type == obj.TYPE_REGLIST {
   138  		if a.Scale > 0 {
   139  			return AC_REGLIST_RANGE
   140  		}
   141  		switch (a.Offset >> 12) & 0xf {
   142  		case 0x7:
   143  			return AC_REGLIST1
   144  		case 0xa:
   145  			return AC_REGLIST2
   146  		case 0x6:
   147  			return AC_REGLIST3
   148  		case 0x2:
   149  			return AC_REGLIST4
   150  		}
   151  	}
   152  	if a.Type == obj.TYPE_MEM {
   153  		if a.Index == 0 {
   154  			if a.Scale&-32768 != 0 {
   155  				return AC_MEMOFFMULVL
   156  			}
   157  			return AC_MEMOFF
   158  		}
   159  		return AC_MEMEXT
   160  	}
   161  	if a.Type == obj.TYPE_SPECIAL {
   162  		return AC_SPECIAL
   163  	}
   164  	panic(fmt.Errorf("unknown AClass, addr = %v\n", a))
   165  }
   166  
   167  // addrComponent returns the binary (component) of the stored element in a at index, for operand
   168  // of type aclass.
   169  //
   170  // For example, for operand of type AC_ARNG, it has 2 permissible components (identified by index)
   171  //  0. register: <reg>
   172  //  1. arrangement: <T>
   173  //
   174  // They are stored in a.Reg as:
   175  //
   176  //	reg | (arrangement << 5)
   177  //
   178  // More details are in the comments in the switch cases of this function.
   179  func addrComponent(a *obj.Addr, acl AClass, index int) uint32 {
   180  	switch acl {
   181  	//	AClass: AC_PREGSEL
   182  	//	GNU mnemonic: <preg>.<T>[<selreg>, <imm>]
   183  	//	Go mnemonic:
   184  	//		[selreg, $idximm](preg.T)
   185  	//	Encoding:
   186  	//		Type = TYPE_REG
   187  	// 		Offset = packed bits: preg (5 bits) | T (4 bits) | selreg (5 bits) | idximm (6 bits) | sentinel (bit 62)
   188  	case AC_PREGSEL:
   189  		switch index {
   190  		case 0:
   191  			return uint32(a.Offset & 31)
   192  		case 1:
   193  			return uint32((a.Offset >> 5) & 15)
   194  		case 2:
   195  			return uint32((a.Offset>>9)&31 + REG_R0) // encoding functions assume a ARM64 register
   196  		case 3:
   197  			// This is to check the width of selreg, which is guaranteed to be W in AC_PREGSEL.
   198  			// W check always returns true as it's resolved in the generator already.
   199  			// So we just return a default value 0 here.
   200  			return 0
   201  		case 4:
   202  			return uint32((a.Offset >> 14) & 63)
   203  		default:
   204  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   205  		}
   206  	//	AClass: AC_ARNG, AC_PREG, AC_PREGZ, AC_PREGM, AC_ZREG
   207  	//	GNU mnemonic: <reg>.<T> Or <reg>/<T> (T is M or Z)
   208  	//	Go mnemonic:
   209  	//		reg.<T>
   210  	//	Encoding:
   211  	//		Type = TYPE_REG
   212  	// 		Reg = reg | (arrangement or predication << 5)
   213  	case AC_ARNG, AC_PREG, AC_PREGZM, AC_ZREG:
   214  		switch index {
   215  		case 0:
   216  			return uint32(a.Reg & 31)
   217  		case 1:
   218  			return uint32((a.Reg >> 5) & 15)
   219  		default:
   220  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   221  		}
   222  	//	AClass: AC_ARNGIDX, AC_PREGIDX, AC_ZREGIDX
   223  	//	GNU mnemonic: <reg>.<T>[<index>]
   224  	//	Go mnemonic:
   225  	//		reg.T[index]
   226  	//	Encoding:
   227  	//		Type = TYPE_REG
   228  	// 		Reg = reg | (arrangement << 5)
   229  	//		Index = index
   230  	case AC_ARNGIDX, AC_PREGIDX, AC_ZREGIDX:
   231  		switch index {
   232  		case 0:
   233  			return uint32(a.Reg & 31)
   234  		case 1:
   235  			// Arrangement
   236  			return uint32((a.Reg >> 5) & 15)
   237  		case 2:
   238  			// Index
   239  			return uint32(a.Index)
   240  		default:
   241  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   242  		}
   243  	//	AClass: AC_SPZGREG, AC_VREG
   244  	//	GNU mnemonic: <width><reg>
   245  	//	Go mnemonic:
   246  	//		reg (the width is already represented in the opcode)
   247  	//	Encoding:
   248  	//		Type = TYPE_REG
   249  	// 		Reg = reg
   250  	case AC_SPZGREG, AC_VREG:
   251  		switch index {
   252  		case 0:
   253  			// These are all width checks, they should map to no-op checks altogether.
   254  			return 0
   255  		case 1:
   256  			return uint32(a.Reg)
   257  		default:
   258  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   259  		}
   260  	//	AClass: AC_IMM
   261  	//	GNU mnemonic: <imm>, <shift>
   262  	//	Go mnemonic:
   263  	//		$imm<<shift
   264  	//	Encoding:
   265  	//		Type = TYPE_CONST or TYPE_FCONST
   266  	//		Offset = imm (shift already applied)
   267  	case AC_IMM:
   268  		switch index {
   269  		case 0:
   270  			if a.Type == obj.TYPE_FCONST {
   271  				switch v := a.Val.(type) {
   272  				case float64:
   273  					return math.Float32bits(float32(v))
   274  				default:
   275  					panic(fmt.Errorf("unknown float immediate value %v", a.Val))
   276  				}
   277  			}
   278  			return uint32(a.Offset)
   279  		default:
   280  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   281  		}
   282  	//	AClass: AC_REGLIST1, AC_REGLIST2, AC_REGLIST3, AC_REGLIST4, AC_REGLIST_RANGE
   283  	//	GNU mnemonic: {reg1.T, reg2.T, ...}
   284  	//	Go mnemonic:
   285  	//		[reg1.T, reg2.T, ...]
   286  	//	Encoding:
   287  	//		Type = TYPE_REGLIST
   288  	// 		Offset = register prefix | register count | arrangement (opcode) | first register
   289  	//		Scale = range size - 1 (if REGLIST_RANGE)
   290  	case AC_REGLIST1, AC_REGLIST2, AC_REGLIST3, AC_REGLIST4, AC_REGLIST_RANGE:
   291  		firstReg := int(a.Offset & 31)
   292  		prefix := a.Offset >> 32 & 0b11
   293  		sum := 32
   294  		if prefix == 2 {
   295  			sum = 16
   296  		}
   297  		switch acl {
   298  		case AC_REGLIST1:
   299  			if index > 2 {
   300  				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   301  			}
   302  		case AC_REGLIST2:
   303  			if index > 4 {
   304  				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   305  			}
   306  		case AC_REGLIST3:
   307  			if index > 6 {
   308  				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   309  			}
   310  		case AC_REGLIST4:
   311  			if index > 8 {
   312  				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   313  			}
   314  		case AC_REGLIST_RANGE:
   315  			// It behaves just like a AC_REGLIST2
   316  			if index > 4 {
   317  				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   318  			}
   319  		}
   320  		switch index % 2 {
   321  		case 0:
   322  			// register
   323  			if a.Scale > 0 {
   324  				// For register ranges in SVE we allow discontiguous registers.
   325  				return uint32((firstReg + (index/2)*int(a.Scale)) % sum)
   326  			}
   327  			return uint32((firstReg + index/2) % sum)
   328  		case 1:
   329  			// arrangement
   330  			curQ := a.Offset >> 30 & 0b11
   331  			curSize := a.Offset >> 10 & 0b11
   332  			switch curQ {
   333  			case 0:
   334  				switch curSize {
   335  				case 0:
   336  					return ARNG_8B
   337  				case 1:
   338  					return ARNG_4H
   339  				case 2:
   340  					return ARNG_2S
   341  				case 3:
   342  					return ARNG_1D
   343  				default:
   344  					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
   345  				}
   346  			case 1:
   347  				switch curSize {
   348  				case 0:
   349  					return ARNG_16B
   350  				case 1:
   351  					return ARNG_8H
   352  				case 2:
   353  					return ARNG_4S
   354  				case 3:
   355  					return ARNG_2D
   356  				default:
   357  					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
   358  				}
   359  			case 2:
   360  				switch curSize {
   361  				case 1:
   362  					return ARNG_B
   363  				case 2:
   364  					return ARNG_H
   365  				case 3:
   366  					return ARNG_S
   367  				default:
   368  					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
   369  				}
   370  			case 3:
   371  				switch curSize {
   372  				case 1:
   373  					return ARNG_D
   374  				case 2:
   375  					return ARNG_Q
   376  				default:
   377  					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
   378  				}
   379  			default:
   380  				panic(fmt.Errorf("unknown Q value at %d in AClass %d", index, acl))
   381  			}
   382  		default:
   383  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   384  		}
   385  	//	AClass: AC_SPECIAL
   386  	//	GNU mnemonic: <special>
   387  	//	Go mnemonic:
   388  	//		special
   389  	//	Encoding:
   390  	//		Type = TYPE_SPECIAL
   391  	//		Offset = SpecialOperand enum value
   392  	case AC_SPECIAL:
   393  		switch index {
   394  		case 0:
   395  			return uint32(a.Offset)
   396  		default:
   397  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   398  		}
   399  	//	AClass: AC_MEMOFF, AC_MEMOFFMULVL
   400  	//	GNU mnemonic: [<reg>.<T>, #<imm>]
   401  	//	Go mnemonic:
   402  	//		imm(reg.T)
   403  	//	Encoding:
   404  	//		Type = TYPE_MEM
   405  	//		Reg = Base register (with arrangement if applicable)
   406  	//		Offset = Immediate offset
   407  	case AC_MEMOFF, AC_MEMOFFMULVL:
   408  		switch index {
   409  		case 0:
   410  			return uint32(a.Reg & 31)
   411  		case 1:
   412  			return uint32((a.Reg >> 5) & 15)
   413  		case 2:
   414  			return uint32(a.Offset)
   415  		default:
   416  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   417  		}
   418  	//	AClass: AC_MEMEXT
   419  	//	GNU mnemonic: [<reg1>.<T1>, <reg2>.<T2>, <mod> <amount>]
   420  	//	Go mnemonic:
   421  	//		(reg2.T2.mod<<amount)(reg1.T1)
   422  	//	Encoding:
   423  	//		Type = TYPE_MEM
   424  	//		Reg = Index register (with arrangement if applicable)
   425  	//		Index = Base register (with arrangement if applicable)
   426  	//		Scale = Packed mod and amount
   427  	case AC_MEMEXT:
   428  		switch index {
   429  		case 0:
   430  			return uint32(a.Index)
   431  		case 1:
   432  			return uint32((a.Index >> 5) & 15)
   433  		case 2:
   434  			return uint32(a.Reg)
   435  		case 3:
   436  			return uint32((a.Reg >> 5) & 15)
   437  		case 4:
   438  			// mod is either 1 (UXTW), 2 (SXTW), or 4 (LSL)
   439  			mod := uint32((a.Scale >> 9) & 0x7)
   440  			amount := uint32((a.Scale >> 12) & 0x7)
   441  			if mod == 0 && amount > 0 {
   442  				// LSL is implied when no extension is specified but amount > 0
   443  				mod |= 1 << 2
   444  			}
   445  			return mod
   446  		case 5:
   447  			return uint32((a.Scale >> 12) & 0x7)
   448  		default:
   449  			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
   450  		}
   451  	}
   452  	// TODO: handle more AClasses.
   453  	panic(fmt.Errorf("unknown AClass %d", acl))
   454  }
   455  
   456  var codeI1Tsz uint32 = 0xffffffff
   457  var codeImm2Tsz uint32 = 0xfffffffe
   458  var codeShift161919212223 uint32 = 0xfffffffd
   459  var codeShift161919212224 uint32 = 0xfffffffc
   460  var codeShift588102224 uint32 = 0xfffffffb
   461  var codeLogicalImmArrEncoding uint32 = 0xfffffffa
   462  var codeImm3Tsize1621 uint32 = 0xfffffff9
   463  var codeShiftI1TszhTszl uint32 = 0xfffffff8
   464  var codeNoOp uint32 = 0xfffffff7
   465  
   466  // encodeI1Tsz is the implementation of the following encoding logic:
   467  // Is the immediate index, in the range 0 to one less than the number of elements in 128 bits, encoded in "i1:tsz".
   468  // bit range mappings:
   469  // i1: [20:21)
   470  // tsz: [16:20)
   471  // Note:
   472  //
   473  //	arr is the arrangement.
   474  //	This encoding is aligned to the high bit of the box, according to the spec.
   475  func encodeI1Tsz(v, arr uint32) (uint32, bool) {
   476  	switch arr {
   477  	case ARNG_B:
   478  		if v > 15 {
   479  			return 0, false
   480  		}
   481  		return v << 17, true
   482  	case ARNG_H:
   483  		if v > 7 {
   484  			return 0, false
   485  		}
   486  		return v << 18, true
   487  	case ARNG_S:
   488  		if v > 3 {
   489  			return 0, false
   490  		}
   491  		return v << 19, true
   492  	case ARNG_D:
   493  		if v > 1 {
   494  			return 0, false
   495  		}
   496  		return v << 20, true
   497  	case ARNG_Q:
   498  		if v > 0 {
   499  			return 0, false
   500  		}
   501  		return 0, true
   502  	default:
   503  		return 0, false
   504  	}
   505  }
   506  
   507  // encodeImm2Tsz is the implementation of the following encoding logic:
   508  // Is the immediate index, in the range 0 to one less than the number of elements in 512 bits, encoded in "imm2:tsz".
   509  // bit range mappings:
   510  // imm2: [22:24)
   511  // tsz: [16:21)
   512  // Note:
   513  //
   514  //	arr is the arrangement.
   515  //	This encoding is aligned to the high bit of the box, according to the spec.
   516  func encodeImm2Tsz(v, arr uint32) (uint32, bool) {
   517  	switch arr {
   518  	case ARNG_B:
   519  		if v > 63 {
   520  			return 0, false
   521  		}
   522  		v <<= 1
   523  		return (v&31)<<16 | (v>>5)<<22, true
   524  	case ARNG_H:
   525  		if v > 31 {
   526  			return 0, false
   527  		}
   528  		v <<= 2
   529  		return (v&31)<<16 | (v>>5)<<22, true
   530  	case ARNG_S:
   531  		if v > 15 {
   532  			return 0, false
   533  		}
   534  		v <<= 3
   535  		return (v&31)<<16 | (v>>5)<<22, true
   536  	case ARNG_D:
   537  		if v > 7 {
   538  			return 0, false
   539  		}
   540  		v <<= 4
   541  		return (v&31)<<16 | (v>>5)<<22, true
   542  	case ARNG_Q:
   543  		if v > 3 {
   544  			return 0, false
   545  		}
   546  		v <<= 5
   547  		return (v&31)<<16 | (v>>5)<<22, true
   548  	default:
   549  		return 0, false
   550  	}
   551  }
   552  
   553  type arrAlignType int
   554  
   555  const (
   556  	arrAlignBHSD arrAlignType = iota
   557  	arrAlignHSD
   558  	arrAlignBHS
   559  )
   560  
   561  // encodeShiftTriple encodes an shift immediate value in "tszh:tszl:imm3".
   562  // tszh, tszl, imm3 are in ranges, sorted by bit position.
   563  // These shifts are also bounded by arrangement element size.
   564  func encodeShiftTriple(v uint32, r [6]int, prevAddr *obj.Addr, op obj.As) (uint32, bool) {
   565  	// The previous op must be a scalable vector, and we need its arrangement.
   566  	acl := aclass(prevAddr)
   567  	if acl != AC_ARNG {
   568  		return 0, false
   569  	}
   570  	arr := addrComponent(prevAddr, acl, 1) // Get arrangement
   571  	elemBits := uint32(0)
   572  	switch arr {
   573  	case ARNG_B:
   574  		elemBits = 8
   575  	case ARNG_H:
   576  		elemBits = 16
   577  	case ARNG_S:
   578  		elemBits = 32
   579  	case ARNG_D:
   580  		elemBits = 64
   581  	default:
   582  		return 0, false
   583  	}
   584  	if v >= elemBits {
   585  		return 0, false
   586  	}
   587  	var C uint32
   588  	// Unfortunately these information are in the decoding ASL.
   589  	// For these instructions, the esize (see comment in the switch below)
   590  	// is derived from the destination arrangement, however how this function is called is deriving
   591  	// the esize from one of the source.
   592  	// We need to address this discrepancy.
   593  	effectiveEsize := elemBits
   594  	switch op {
   595  	case AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT,
   596  		AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT:
   597  		effectiveEsize = elemBits / 2
   598  	}
   599  	switch op {
   600  	case AZASR, AZLSR, AZURSHR, AZASRD,
   601  		AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT,
   602  		AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZSRSHR, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT,
   603  		AZURSRA, AZUSRA, AZXAR, AZSRI, AZSRSRA, AZSSRA:
   604  		// ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3);
   605  		if v == 0 {
   606  			return 0, false
   607  		}
   608  		C = (2 * effectiveEsize) - v
   609  	default:
   610  		// ASL: let shift : integer = UInt(tsize::imm3) - esize;
   611  		C = effectiveEsize + v
   612  	}
   613  	var chunks [3]uint32
   614  	for i := 0; i < 6; i += 2 {
   615  		chunks[i/2] = C & ((1 << (r[i+1] - r[i])) - 1)
   616  		C >>= (r[i+1] - r[i])
   617  	}
   618  	return uint32((chunks[0] << r[0]) |
   619  		(chunks[1] << r[2]) |
   620  		(chunks[2] << r[4])), true
   621  }
   622  
   623  // encodeLogicalImmEncoding is the implementation of the following encoding logic:
   624  // Is the size specifier,
   625  // imm13	<T>
   626  // 0xxxxxx0xxxxx	S
   627  // 0xxxxxx10xxxx	H
   628  // 0xxxxxx110xxx	B
   629  // 0xxxxxx1110xx	B
   630  // 0xxxxxx11110x	B
   631  // 0xxxxxx11111x	RESERVED
   632  // 1xxxxxxxxxxxx	D
   633  // At the meantime:
   634  // Is a 64, 32, 16 or 8-bit bitmask consisting of replicated 2, 4, 8, 16, 32 or 64 bit fields,
   635  // each field containing a rotated run of non-zero bits, encoded in the "imm13" field.
   636  //
   637  // bit range mappings:
   638  // imm13: [5:18)
   639  //
   640  // ARM created a "clever" recipe that can generate useful repeating 8-64 bit bitmasks.
   641  // Instead of storing the literal binary number, the processor reads a 13-bit recipe
   642  // using three fields (bits from high to low):
   643  // N (1 bit), immr (6 bits), and imms (6 bits).
   644  //
   645  // How the recipe works:
   646  // Every logical immediate represents a repeating pattern (like repeating tiles). The processor
   647  // uses the three fields to figure out the size of the tile, how many 1s are in the tile, and
   648  // how far to rotate it.
   649  // The N bit combined with the upper bits of imms determines the width of the repeating block.
   650  // Depending on these bits, the fundamental block can be 2, 4, 8, 16, 32, or 64 bits wide.
   651  // The lower bits of imms dictate exactly how many contiguous 1s exist inside that block.
   652  // The immr value tells the processor how many bits to rotate that block to the right.
   653  // Finally, the resulting block is duplicated to fill a standard 64-bit lane.
   654  func encodeLogicalImmArrEncoding(v uint64, adjacentAddr *obj.Addr) (uint32, bool) {
   655  	acl := aclass(adjacentAddr)
   656  	if acl != AC_ARNG {
   657  		return 0, false
   658  	}
   659  	arr := addrComponent(adjacentAddr, acl, 1)
   660  
   661  	// Replicate the given immediate to fill a full 64-bit lane.
   662  	// This ensures our pattern-shrinking logic naturally respects the vector lane bounds.
   663  	var val uint64
   664  	switch arr {
   665  	case ARNG_B: // 8-bit lane
   666  		v8 := uint64(v & 0xFF)
   667  		val = v8 * 0x0101010101010101
   668  	case ARNG_H: // 16-bit lane
   669  		v16 := uint64(v & 0xFFFF)
   670  		val = v16 * 0x0001000100010001
   671  	case ARNG_S: // 32-bit lane
   672  		v32 := uint64(v)
   673  		val = v32 | (v32 << 32)
   674  	case ARNG_D: // 64-bit lane
   675  		val = uint64(v)
   676  	default:
   677  		return 0, false
   678  	}
   679  
   680  	// Reject all zeros or all ones (handled by MOV/EOR, invalid for AND/ORR immediates)
   681  	if val == 0 || val == ^uint64(0) {
   682  		return 0, false
   683  	}
   684  
   685  	// Find the absolute smallest repeating pattern size (64 down to 2)
   686  	size := uint64(64)
   687  	for size > 2 {
   688  		half := size / 2
   689  		mask := (uint64(1) << half) - 1
   690  		lower := val & mask
   691  		upper := (val >> half) & mask
   692  
   693  		// If the top half matches the bottom half, shrink our window
   694  		if lower == upper {
   695  			size = half
   696  			val = lower
   697  		} else {
   698  			break
   699  		}
   700  	}
   701  
   702  	// Count the contiguous ones in this minimal pattern
   703  	mask := (uint64(1) << size) - 1
   704  	val &= mask
   705  	ones := bits.OnesCount64(val)
   706  
   707  	// Find the right-rotation (rot) needed to align the 1s at the bottom
   708  	expected := (uint64(1) << ones) - 1
   709  	rot := -1
   710  	for r := 0; r < int(size); r++ {
   711  		// Right rotate 'val' by 'r' bits within a 'size'-bit window
   712  		rotated := ((val >> r) | (val << (int(size) - r))) & mask
   713  		if rotated == expected {
   714  			rot = r
   715  			break
   716  		}
   717  	}
   718  
   719  	if rot == -1 {
   720  		return 0, false
   721  	}
   722  
   723  	// immr is the amount the hardware must right-rotate the base pattern.
   724  	// Since 'rot' is how much we right-rotated the target to find the base,
   725  	// the hardware needs the inverse rotation.
   726  	immr := uint32((int(size) - rot) % int(size))
   727  
   728  	// If we couldn't find a rotation that forms a perfect contiguous block of 1s, it's invalid.
   729  	if rot == -1 {
   730  		return 0, false
   731  	}
   732  
   733  	// Encode N, immr, and imms
   734  	n := uint32(0)
   735  	if size == 64 {
   736  		n = 1
   737  	}
   738  
   739  	// The imms prefix is mathematically generated by (~(size*2 - 1) & 0x3F).
   740  	// We then OR it with the number of ones (minus 1).
   741  	imms := (uint32(^(size*2 - 1)) & 0x3F) | uint32(ones-1)
   742  
   743  	// Construct the final 13-bit field: N (1) | immr (6) | imms (6)
   744  	imm13 := (n << 12) | (immr << 6) | imms
   745  
   746  	// Shift by 5 to place imm13 into instruction bits [5:17]
   747  	return imm13 << 5, true
   748  }
   749  
   750  // encodeImm3Tsize1621 is the implementation of the following encoding logic:
   751  // Is the immediate shift amount, in the range 1 to number of bits per element, encoded in "tsize:imm3".
   752  // bit range mappings:
   753  // imm3: [16:19)
   754  // tsize: [19:21)
   755  //
   756  // srcArr is the <Tb> in the source reglist (ranged).
   757  func encodeImm3Tsize1621(v uint32, srcArr uint32) (uint32, bool) {
   758  	// From ARM ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3);
   759  	// Then tsize::imm3 = (2 * esize) - shift.
   760  	var size uint32
   761  	switch srcArr {
   762  	case ARNG_H:
   763  		// It's the destination size, which is half the source.
   764  		size = 8
   765  	case ARNG_S:
   766  		size = 16
   767  	default:
   768  		return 0, false
   769  	}
   770  	if v < 1 || v > size {
   771  		return 0, false
   772  	}
   773  	return (((2*size - v) & 0x1f) << 16), true
   774  }
   775  
   776  // encodeShiftI1TszhTszl is the implementation of the following encoding logic:
   777  // Is the element index, in the range 0 to one less than the number of vector elements in a 128-bit vector register, encoded in "i1:tszh:tszl".
   778  // bit range mappings:
   779  // i1: [23:24)
   780  // tszh: [22:23)
   781  // tszl: [18:21)
   782  //
   783  // arr is the arrangement
   784  func encodeShiftI1TszhTszl(v uint32, arr uint32) (uint32, bool) {
   785  	var shift, max uint32
   786  	switch arr {
   787  	case ARNG_B:
   788  		shift = 1
   789  		max = 16
   790  	case ARNG_H:
   791  		shift = 2
   792  		max = 8
   793  	case ARNG_S:
   794  		shift = 3
   795  		max = 4
   796  	case ARNG_D:
   797  		shift = 4
   798  		max = 2
   799  	default:
   800  		return 0, false
   801  	}
   802  	if v >= max {
   803  		return 0, false
   804  	}
   805  	v <<= shift
   806  	return ((v & 0x7) << 18) | ((v >> 3) << 22), true
   807  }
   808  
   809  // tryEncode tries to encode p with i, it returns the encoded binary and ok signal.
   810  func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) {
   811  	bin := i.fixedBits
   812  	// Some elements are encoded in the same component, they need to be equal.
   813  	// For example { <Zn1>.<Tb>-<Zn2>.<Tb> }.
   814  	// The 2 instances of <Tb> must encode to the same value.
   815  	encoded := map[component]uint32{}
   816  	var addrs []*obj.Addr
   817  	for addr := range opsInProg(p) {
   818  		addrs = append(addrs, addr)
   819  	}
   820  	if len(addrs) != len(i.args) {
   821  		return 0, false
   822  	}
   823  	for opIdx, addr := range addrs {
   824  		if opIdx >= len(i.args) {
   825  			return 0, false
   826  		}
   827  		op := i.args[opIdx]
   828  		acl := aclass(addr)
   829  		if acl != op.class {
   830  			return 0, false
   831  		}
   832  		for i, enc := range op.elemEncoders {
   833  			val := addrComponent(addr, acl, i)
   834  			if (p.As == AZFCPY || p.As == AZFDUP) && acl == AC_IMM {
   835  				// These instructions expects ARM's 8-bit float encoding.
   836  				// Reinterpret the uint32 bits back as a float32, then convert to float64 for chipfloat7
   837  				fval := float64(math.Float32frombits(val))
   838  				encode := (&ctxt7{}).chipfloat7(fval)
   839  				if encode == -1 {
   840  					// Handle error or return false to indicate mismatch
   841  					return 0, false
   842  				}
   843  				val = uint32(encode)
   844  			}
   845  			if b, ok := enc.fn(val); ok || b != 0 {
   846  				specialB := uint32(b)
   847  				if !ok {
   848  					specialB = b
   849  					switch b {
   850  					case codeI1Tsz:
   851  						b, ok = encodeI1Tsz(val, addrComponent(addr, acl, i-1))
   852  					case codeImm2Tsz:
   853  						b, ok = encodeImm2Tsz(val, addrComponent(addr, acl, i-1))
   854  					case codeShift161919212223:
   855  						b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 23}, addrs[opIdx+1], p.As)
   856  					case codeShift161919212224:
   857  						b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 24}, addrs[opIdx+1], p.As)
   858  					case codeShift588102224:
   859  						b, ok = encodeShiftTriple(val, [6]int{5, 8, 8, 10, 22, 24}, addrs[opIdx+1], p.As)
   860  					case codeLogicalImmArrEncoding:
   861  						// Now that we know this is an immediate.
   862  						// ARM64 allows imm13 to encode up to 64 bits of immediates.
   863  						// addrComponent is not the right fit here, we need to extract [Offset] fields manually.
   864  						b, ok = encodeLogicalImmArrEncoding(uint64(addr.Offset), addrs[opIdx+1])
   865  					case codeImm3Tsize1621:
   866  						b, ok = encodeImm3Tsize1621(val, addrComponent(addrs[opIdx+1], aclass(addrs[opIdx+1]), 1))
   867  					case codeShiftI1TszhTszl:
   868  						b, ok = encodeShiftI1TszhTszl(val, addrComponent(addr, AC_PREGSEL, 1))
   869  					case codeNoOp:
   870  						b, ok = 0, true
   871  					default:
   872  						panic(fmt.Errorf("unknown encoding function code %d", b))
   873  					}
   874  				}
   875  				if !ok {
   876  					return 0, false
   877  				}
   878  				bin |= b
   879  				if _, ok := encoded[enc.comp]; ok && b != encoded[enc.comp] {
   880  					if specialB == codeNoOp {
   881  						// NoOp encodings don't need checks.
   882  						continue
   883  					}
   884  					return 0, false
   885  				}
   886  				if enc.comp != enc_NIL && specialB != codeNoOp {
   887  					// NoOp encodings don't need bookkeeping.
   888  					encoded[enc.comp] = b
   889  				}
   890  			} else {
   891  				return 0, false
   892  			}
   893  		}
   894  	}
   895  	return bin, true
   896  }
   897  

View as plain text