Source file src/simd/archsimd/_gen/simdgen/xed.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"maps"
    11  	"reflect"
    12  	"regexp"
    13  	"slices"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"simd/archsimd/_gen/unify"
    18  
    19  	"golang.org/x/arch/x86/xeddata"
    20  	"gopkg.in/yaml.v3"
    21  )
    22  
    23  const (
    24  	NOT_REG_CLASS = iota // not a register
    25  	VREG_CLASS           // classify as a vector register; see
    26  	GREG_CLASS           // classify as a general register
    27  )
    28  
    29  // instVariant is a bitmap indicating a variant of an instruction that has
    30  // optional parameters.
    31  type instVariant uint8
    32  
    33  const (
    34  	instVariantNone instVariant = 0
    35  
    36  	// instVariantMasked indicates that this is the masked variant of an
    37  	// optionally-masked instruction.
    38  	instVariantMasked instVariant = 1 << iota
    39  )
    40  
    41  var operandRemarks int
    42  
    43  // TODO: Doc. Returns Values with Def domains.
    44  func loadXED(xedPath string) []*unify.Value {
    45  	// TODO: Obviously a bunch more to do here.
    46  
    47  	db, err := xeddata.NewDatabase(xedPath)
    48  	if err != nil {
    49  		log.Fatalf("open database: %v", err)
    50  	}
    51  
    52  	var defs []*unify.Value
    53  	type opData struct {
    54  		inst *xeddata.Inst
    55  		ops  []operand
    56  		mem  string
    57  	}
    58  	// Maps from opcode to opdata(s).
    59  	memOps := make(map[string][]opData, 0)
    60  	otherOps := make(map[string][]opData, 0)
    61  	appendDefs := func(inst *xeddata.Inst, ops []operand, addFields map[string]string) {
    62  		applyQuirks(inst, ops)
    63  
    64  		defsPos := len(defs)
    65  		defs = append(defs, instToUVal(inst, ops, addFields)...)
    66  
    67  		if *flagDebugXED {
    68  			for i := defsPos; i < len(defs); i++ {
    69  				y, _ := yaml.Marshal(defs[i])
    70  				fmt.Printf("==>\n%s\n", y)
    71  			}
    72  		}
    73  	}
    74  	err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) {
    75  		inst.Pattern = xeddata.ExpandStates(db, inst.Pattern)
    76  
    77  		switch {
    78  		case inst.RealOpcode == "N":
    79  			return // Skip unstable instructions
    80  		case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") ||
    81  			inst.Extension == "FMA" || inst.Extension == "VAES"):
    82  			// We're only interested in AVX and SHA instructions.
    83  			return
    84  		}
    85  
    86  		if *flagDebugXED {
    87  			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
    88  		}
    89  
    90  		ops, err := decodeOperands(db, strings.Fields(inst.Operands))
    91  		if err != nil {
    92  			operandRemarks++
    93  			if *Verbose {
    94  				log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err)
    95  			}
    96  			return
    97  		}
    98  		var data map[string][]opData
    99  		mem := checkMem(ops)
   100  		if mem == "vbcst" {
   101  			// A pure vreg variant might exist, wait for later to see if we can
   102  			// merge them
   103  			data = memOps
   104  		} else {
   105  			data = otherOps
   106  		}
   107  		opcode := inst.Opcode()
   108  		if _, ok := data[opcode]; !ok {
   109  			s := make([]opData, 1)
   110  			s[0] = opData{inst, ops, mem}
   111  			data[opcode] = s
   112  		} else {
   113  			data[opcode] = append(data[opcode], opData{inst, ops, mem})
   114  		}
   115  	})
   116  	for _, s := range otherOps {
   117  		for _, o := range s {
   118  			addFields := map[string]string{}
   119  			if o.mem == "noMem" {
   120  				opcode := o.inst.Opcode()
   121  				// Checking if there is a vbcst variant of this operation exist
   122  				// First check the opcode
   123  				// Keep this logic in sync with [decodeOperands]
   124  				if ms, ok := memOps[opcode]; ok {
   125  					feat1, ok1 := decodeCPUFeature(o.inst)
   126  					// Then check if there exist such an operation that for all vreg
   127  					// shapes they are the same at the same index
   128  					var feat1Match, feat2Match string
   129  					matchIdx := -1
   130  					var featMismatchCnt int
   131  				outer:
   132  					for i, m := range ms {
   133  						// Their CPU feature should match first
   134  						var featMismatch bool
   135  						feat2, ok2 := decodeCPUFeature(m.inst)
   136  						if !ok1 || !ok2 {
   137  							continue
   138  						}
   139  						if feat1 != feat2 {
   140  							featMismatch = true
   141  							featMismatchCnt++
   142  						}
   143  						if len(o.ops) == len(m.ops) {
   144  							for j := range o.ops {
   145  								if reflect.TypeOf(o.ops[j]) == reflect.TypeOf(m.ops[j]) {
   146  									v1, ok3 := o.ops[j].(operandVReg)
   147  									v2, _ := m.ops[j].(operandVReg)
   148  									if !ok3 {
   149  										continue
   150  									}
   151  									if v1.vecShape != v2.vecShape {
   152  										// A mismatch, skip this memOp
   153  										continue outer
   154  									}
   155  								} else {
   156  									_, ok3 := o.ops[j].(operandVReg)
   157  									_, ok4 := m.ops[j].(operandMem)
   158  									// The only difference must be the vreg and mem, no other cases.
   159  									if !ok3 || !ok4 {
   160  										// A mismatch, skip this memOp
   161  										continue outer
   162  									}
   163  								}
   164  							}
   165  							// Found a match, break early
   166  							matchIdx = i
   167  							feat1Match = feat1
   168  							feat2Match = feat2
   169  							if featMismatchCnt > 1 {
   170  								panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
   171  							}
   172  							if !featMismatch {
   173  								// Mismatch feat is ok but should prioritize matching cases.
   174  								break
   175  							}
   176  						}
   177  					}
   178  					// Remove the match from memOps, it's now merged to this pure vreg operation
   179  					if matchIdx != -1 {
   180  						memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
   181  						// Merge is done by adding a new field
   182  						// Right now we only have vbcst
   183  						addFields["memFeatures"] = "vbcst"
   184  						if feat1Match != feat2Match {
   185  							addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
   186  						}
   187  					}
   188  				}
   189  			}
   190  			appendDefs(o.inst, o.ops, addFields)
   191  		}
   192  	}
   193  	for _, ms := range memOps {
   194  		for _, m := range ms {
   195  			if *Verbose {
   196  				log.Printf("mem op not merged: %s, %v\n", m.inst.Opcode(), m)
   197  			}
   198  			appendDefs(m.inst, m.ops, nil)
   199  		}
   200  	}
   201  	if err != nil {
   202  		log.Fatalf("walk insts: %v", err)
   203  	}
   204  
   205  	if len(unknownFeatures) > 0 {
   206  		if !*Verbose {
   207  			nInst := 0
   208  			for _, insts := range unknownFeatures {
   209  				nInst += len(insts)
   210  			}
   211  			log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
   212  		} else {
   213  			keys := slices.Sorted(maps.Keys(unknownFeatures))
   214  			for _, key := range keys {
   215  				log.Printf("unhandled ISASet %s", key)
   216  				log.Printf("  opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
   217  			}
   218  		}
   219  	}
   220  
   221  	return defs
   222  }
   223  
   224  var (
   225  	maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`)
   226  	maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`)
   227  )
   228  
   229  func applyQuirks(inst *xeddata.Inst, ops []operand) {
   230  	opc := inst.Opcode()
   231  	switch {
   232  	case maskRequiredRe.MatchString(opc):
   233  		// The mask on these instructions is marked optional, but the
   234  		// instruction is pointless without the mask.
   235  		for i, op := range ops {
   236  			if op, ok := op.(operandMask); ok {
   237  				op.optional = false
   238  				ops[i] = op
   239  			}
   240  		}
   241  
   242  	case maskOptionalRe.MatchString(opc):
   243  		// Conversely, these masks should be marked optional and aren't.
   244  		for i, op := range ops {
   245  			if op, ok := op.(operandMask); ok && op.action.r {
   246  				op.optional = true
   247  				ops[i] = op
   248  			}
   249  		}
   250  	}
   251  }
   252  
   253  type operandCommon struct {
   254  	action operandAction
   255  }
   256  
   257  // operandAction defines whether this operand is read and/or written.
   258  //
   259  // TODO: Should this live in [xeddata.Operand]?
   260  type operandAction struct {
   261  	r  bool // Read
   262  	w  bool // Written
   263  	cr bool // Read is conditional (implies r==true)
   264  	cw bool // Write is conditional (implies w==true)
   265  }
   266  
   267  type operandMem struct {
   268  	operandCommon
   269  	vecShape
   270  	elemBaseType scalarBaseType
   271  	// The following fields are not flushed to the final output
   272  	// Supports full-vector broadcasting; implies the operand having a "vv"(vector vector) type specified in width and
   273  	// the instruction is with attribute TXT=BCASTSTR.
   274  	vbcst   bool
   275  	unknown bool // unknown kind
   276  }
   277  
   278  type vecShape struct {
   279  	elemBits  int    // Element size in bits
   280  	bits      int    // Register width in bits (total vector bits)
   281  	fixedName string // the fixed register name
   282  }
   283  
   284  type operandVReg struct { // Vector register
   285  	operandCommon
   286  	vecShape
   287  	elemBaseType scalarBaseType
   288  }
   289  
   290  type operandGReg struct { // Vector register
   291  	operandCommon
   292  	vecShape
   293  	elemBaseType scalarBaseType
   294  }
   295  
   296  // operandMask is a vector mask.
   297  //
   298  // Regardless of the actual mask representation, the [vecShape] of this operand
   299  // corresponds to the "bit for bit" type of mask. That is, elemBits gives the
   300  // element width covered by each mask element, and bits/elemBits gives the total
   301  // number of mask elements. (bits gives the total number of bits as if this were
   302  // a bit-for-bit mask, which may be meaningless on its own.)
   303  type operandMask struct {
   304  	operandCommon
   305  	vecShape
   306  	// Bits in the mask is w/bits.
   307  
   308  	allMasks bool // If set, size cannot be inferred because all operands are masks.
   309  
   310  	// Mask can be omitted, in which case it defaults to K0/"no mask"
   311  	optional bool
   312  }
   313  
   314  type operandImm struct {
   315  	operandCommon
   316  	bits int // Immediate size in bits
   317  }
   318  
   319  type operand interface {
   320  	common() operandCommon
   321  	addToDef(b *unify.DefBuilder)
   322  }
   323  
   324  func strVal(s any) *unify.Value {
   325  	return unify.NewValue(unify.NewStringExact(fmt.Sprint(s)))
   326  }
   327  
   328  func (o operandCommon) common() operandCommon {
   329  	return o
   330  }
   331  
   332  func (o operandMem) addToDef(b *unify.DefBuilder) {
   333  	b.Add("class", strVal("memory"))
   334  	if o.unknown {
   335  		return
   336  	}
   337  	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
   338  	if err != nil {
   339  		panic("parsing baseRe: " + err.Error())
   340  	}
   341  	b.Add("base", unify.NewValue(baseDomain))
   342  	b.Add("bits", strVal(o.bits))
   343  	if o.elemBits != o.bits {
   344  		b.Add("elemBits", strVal(o.elemBits))
   345  	}
   346  }
   347  
   348  func (o operandVReg) addToDef(b *unify.DefBuilder) {
   349  	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
   350  	if err != nil {
   351  		panic("parsing baseRe: " + err.Error())
   352  	}
   353  	b.Add("class", strVal("vreg"))
   354  	b.Add("bits", strVal(o.bits))
   355  	b.Add("base", unify.NewValue(baseDomain))
   356  	// If elemBits == bits, then the vector can be ANY shape. This happens with,
   357  	// for example, logical ops.
   358  	if o.elemBits != o.bits {
   359  		b.Add("elemBits", strVal(o.elemBits))
   360  	}
   361  	if o.fixedName != "" {
   362  		b.Add("fixedReg", strVal(o.fixedName))
   363  	}
   364  }
   365  
   366  func (o operandGReg) addToDef(b *unify.DefBuilder) {
   367  	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
   368  	if err != nil {
   369  		panic("parsing baseRe: " + err.Error())
   370  	}
   371  	b.Add("class", strVal("greg"))
   372  	b.Add("bits", strVal(o.bits))
   373  	b.Add("base", unify.NewValue(baseDomain))
   374  	if o.elemBits != o.bits {
   375  		b.Add("elemBits", strVal(o.elemBits))
   376  	}
   377  	if o.fixedName != "" {
   378  		b.Add("fixedReg", strVal(o.fixedName))
   379  	}
   380  }
   381  
   382  func (o operandMask) addToDef(b *unify.DefBuilder) {
   383  	b.Add("class", strVal("mask"))
   384  	if o.allMasks {
   385  		// If all operands are masks, omit sizes and let unification determine mask sizes.
   386  		return
   387  	}
   388  	b.Add("elemBits", strVal(o.elemBits))
   389  	b.Add("bits", strVal(o.bits))
   390  	if o.fixedName != "" {
   391  		b.Add("fixedReg", strVal(o.fixedName))
   392  	}
   393  }
   394  
   395  func (o operandImm) addToDef(b *unify.DefBuilder) {
   396  	b.Add("class", strVal("immediate"))
   397  	b.Add("bits", strVal(o.bits))
   398  }
   399  
   400  var actionEncoding = map[string]operandAction{
   401  	"r":   {r: true},
   402  	"cr":  {r: true, cr: true},
   403  	"w":   {w: true},
   404  	"cw":  {w: true, cw: true},
   405  	"rw":  {r: true, w: true},
   406  	"crw": {r: true, w: true, cr: true},
   407  	"rcw": {r: true, w: true, cw: true},
   408  }
   409  
   410  func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
   411  	op, err := xeddata.NewOperand(db, operand)
   412  	if err != nil {
   413  		log.Fatalf("parsing operand %q: %v", operand, err)
   414  	}
   415  	if *flagDebugXED {
   416  		fmt.Printf("  %+v\n", op)
   417  	}
   418  
   419  	if strings.HasPrefix(op.Name, "EMX_BROADCAST") {
   420  		// This refers to a set of macros defined in all-state.txt that set a
   421  		// BCAST operand to various fixed values. But the BCAST operand is
   422  		// itself suppressed and "internal", so I think we can just ignore this
   423  		// operand.
   424  		return nil, nil
   425  	}
   426  
   427  	// TODO: See xed_decoded_inst_operand_action. This might need to be more
   428  	// complicated.
   429  	action, ok := actionEncoding[op.Action]
   430  	if !ok {
   431  		return nil, fmt.Errorf("unknown action %q", op.Action)
   432  	}
   433  	common := operandCommon{action: action}
   434  
   435  	lhs := op.NameLHS()
   436  	if strings.HasPrefix(lhs, "MEM") {
   437  		// looks like XED data has an inconsistency on VPADDD, marking attribute
   438  		// VPBROADCASTD instead of the canonical BCASTSTR.
   439  		if op.Width == "vv" && (op.Attributes["TXT=BCASTSTR"] ||
   440  			op.Attributes["TXT=VPBROADCASTD"]) {
   441  			baseType, elemBits, ok := decodeType(op)
   442  			if !ok {
   443  				return nil, fmt.Errorf("failed to decode memory width %q", operand)
   444  			}
   445  			// This operand has two possible width([bits]):
   446  			// 1. the same as the other operands
   447  			// 2. the element width as the other operands (broaccasting)
   448  			// left it default to 2, later we will set a new field in the operation
   449  			// to indicate this dual-width property.
   450  			shape := vecShape{elemBits: elemBits, bits: elemBits}
   451  			return operandMem{
   452  				operandCommon: common,
   453  				vecShape:      shape,
   454  				elemBaseType:  baseType,
   455  				vbcst:         true,
   456  				unknown:       false,
   457  			}, nil
   458  		}
   459  		// TODO: parse op.Width better to handle all cases
   460  		// Right now this will at least miss VPBROADCAST.
   461  		return operandMem{
   462  			operandCommon: common,
   463  			unknown:       true,
   464  		}, nil
   465  	} else if strings.HasPrefix(lhs, "REG") {
   466  		if op.Width == "mskw" {
   467  			// The mask operand doesn't specify a width. We have to infer it.
   468  			//
   469  			// XED uses the marker ZEROSTR to indicate that a mask operand is
   470  			// optional and, if omitted, implies K0, aka "no mask".
   471  			return operandMask{
   472  				operandCommon: common,
   473  				optional:      op.Attributes["TXT=ZEROSTR"],
   474  			}, nil
   475  		} else {
   476  			class, regBits, fixedReg := decodeReg(op)
   477  			if class == NOT_REG_CLASS {
   478  				return nil, fmt.Errorf("failed to decode register %q", operand)
   479  			}
   480  			baseType, elemBits, ok := decodeType(op)
   481  			if !ok {
   482  				return nil, fmt.Errorf("failed to decode register width %q", operand)
   483  			}
   484  			shape := vecShape{elemBits: elemBits, bits: regBits, fixedName: fixedReg}
   485  			if class == VREG_CLASS {
   486  				return operandVReg{
   487  					operandCommon: common,
   488  					vecShape:      shape,
   489  					elemBaseType:  baseType,
   490  				}, nil
   491  			}
   492  			// general register
   493  			m := min(shape.bits, shape.elemBits)
   494  			shape.bits, shape.elemBits = m, m
   495  			return operandGReg{
   496  				operandCommon: common,
   497  				vecShape:      shape,
   498  				elemBaseType:  baseType,
   499  			}, nil
   500  
   501  		}
   502  	} else if strings.HasPrefix(lhs, "IMM") {
   503  		_, bits, ok := decodeType(op)
   504  		if !ok {
   505  			return nil, fmt.Errorf("failed to decode register width %q", operand)
   506  		}
   507  		return operandImm{
   508  			operandCommon: common,
   509  			bits:          bits,
   510  		}, nil
   511  	}
   512  
   513  	// TODO: BASE and SEG
   514  	return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand)
   515  }
   516  
   517  func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) {
   518  	// Decode the XED operand descriptions.
   519  	for _, o := range operands {
   520  		op, err := decodeOperand(db, o)
   521  		if err != nil {
   522  			return nil, err
   523  		}
   524  		if op != nil {
   525  			ops = append(ops, op)
   526  		}
   527  	}
   528  
   529  	// XED doesn't encode the size of mask operands. If there are mask operands,
   530  	// try to infer their sizes from other operands.
   531  	if err := inferMaskSizes(ops); err != nil {
   532  		return nil, fmt.Errorf("%w in operands %+v", err, operands)
   533  	}
   534  
   535  	return ops, nil
   536  }
   537  
   538  func inferMaskSizes(ops []operand) error {
   539  	// This is a heuristic and it falls apart in some cases:
   540  	//
   541  	// - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate
   542  	// mask size.
   543  	//
   544  	// - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have
   545  	// mixed input sizes and the XED doesn't indicate which operands the mask
   546  	// applies to.
   547  	//
   548  	// - VPDP* and VP4DP* have really complex mixed operand patterns.
   549  	//
   550  	// I think for these we may just have to hand-write a table of which
   551  	// operands each mask applies to.
   552  	inferMask := func(r, w bool) error {
   553  		var masks []int
   554  		var rSizes, wSizes, sizes []vecShape
   555  		allMasks := true
   556  		hasWMask := false
   557  		for i, op := range ops {
   558  			action := op.common().action
   559  			if _, ok := op.(operandMask); ok {
   560  				if action.r && action.w {
   561  					return fmt.Errorf("unexpected rw mask")
   562  				}
   563  				if action.r == r || action.w == w {
   564  					masks = append(masks, i)
   565  				}
   566  				if action.w {
   567  					hasWMask = true
   568  				}
   569  			} else {
   570  				allMasks = false
   571  				if reg, ok := op.(operandVReg); ok {
   572  					if action.r {
   573  						rSizes = append(rSizes, reg.vecShape)
   574  					}
   575  					if action.w {
   576  						wSizes = append(wSizes, reg.vecShape)
   577  					}
   578  				}
   579  			}
   580  		}
   581  		if len(masks) == 0 {
   582  			return nil
   583  		}
   584  
   585  		if r {
   586  			sizes = rSizes
   587  			if len(sizes) == 0 {
   588  				sizes = wSizes
   589  			}
   590  		}
   591  		if w {
   592  			sizes = wSizes
   593  			if len(sizes) == 0 {
   594  				sizes = rSizes
   595  			}
   596  		}
   597  
   598  		if len(sizes) == 0 {
   599  			// If all operands are masks, leave the mask inferrence to the users.
   600  			if allMasks {
   601  				for _, i := range masks {
   602  					m := ops[i].(operandMask)
   603  					m.allMasks = true
   604  					ops[i] = m
   605  				}
   606  				return nil
   607  			}
   608  			return fmt.Errorf("cannot infer mask size: no register operands")
   609  		}
   610  		shape, ok := singular(sizes)
   611  		if !ok {
   612  			if !hasWMask && len(wSizes) == 1 && len(masks) == 1 {
   613  				// This pattern looks like predicate mask, so its shape should align with the
   614  				// output. TODO: verify this is a safe assumption.
   615  				shape = wSizes[0]
   616  			} else {
   617  				return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes)
   618  			}
   619  		}
   620  		for _, i := range masks {
   621  			m := ops[i].(operandMask)
   622  			m.vecShape = shape
   623  			ops[i] = m
   624  		}
   625  		return nil
   626  	}
   627  	if err := inferMask(true, false); err != nil {
   628  		return err
   629  	}
   630  	if err := inferMask(false, true); err != nil {
   631  		return err
   632  	}
   633  	return nil
   634  }
   635  
   636  // addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def.
   637  //
   638  // Optional mask input operands are added to the inVariant field if
   639  // variant&instVariantMasked, and omitted otherwise.
   640  func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) {
   641  	var inVals, inVar, outVals []*unify.Value
   642  	asmPos := 0
   643  	for _, op := range ops {
   644  		var db unify.DefBuilder
   645  		op.addToDef(&db)
   646  		db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
   647  
   648  		action := op.common().action
   649  		asmCount := 1 // # of assembly operands; 0 or 1
   650  		if action.r {
   651  			inVal := unify.NewValue(db.Build())
   652  			// If this is an optional mask, put it in the input variant tuple.
   653  			if mask, ok := op.(operandMask); ok && mask.optional {
   654  				if variant&instVariantMasked != 0 {
   655  					inVar = append(inVar, inVal)
   656  				} else {
   657  					// This operand doesn't appear in the assembly at all.
   658  					asmCount = 0
   659  				}
   660  			} else {
   661  				// Just a regular input operand.
   662  				inVals = append(inVals, inVal)
   663  			}
   664  		}
   665  		if action.w {
   666  			outVal := unify.NewValue(db.Build())
   667  			outVals = append(outVals, outVal)
   668  		}
   669  
   670  		asmPos += asmCount
   671  	}
   672  
   673  	instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...)))
   674  	instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...)))
   675  	instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...)))
   676  	memFeatures := checkMem(ops)
   677  	if memFeatures != "noMem" {
   678  		instDB.Add("memFeatures", unify.NewValue(unify.NewStringExact(memFeatures)))
   679  	}
   680  }
   681  
   682  // checkMem checks the shapes of memory operand in the operation and returns the shape.
   683  // Keep this function in sync with [decodeOperand].
   684  func checkMem(ops []operand) string {
   685  	memState := "noMem"
   686  	var mem *operandMem
   687  	memCnt := 0
   688  	for _, op := range ops {
   689  		if m, ok := op.(operandMem); ok {
   690  			mem = &m
   691  			memCnt++
   692  		}
   693  	}
   694  	if mem != nil {
   695  		if mem.unknown {
   696  			memState = "unknown"
   697  		} else if memCnt > 1 {
   698  			memState = "tooManyMem"
   699  		} else {
   700  			// We only have vbcst case as of now.
   701  			// This shape has an indication that [bits] fields has two possible value:
   702  			// 1. The element broadcast width, which is its peer vreg operand's [elemBits] (default val in the parsed XED data)
   703  			// 2. The full vector width, which is its peer vreg operand's [bits] (godefs should be aware of this)
   704  			memState = "vbcst"
   705  		}
   706  	}
   707  	return memState
   708  }
   709  
   710  func instToUVal(inst *xeddata.Inst, ops []operand, addFields map[string]string) []*unify.Value {
   711  	feature, ok := decodeCPUFeature(inst)
   712  	if !ok {
   713  		return nil
   714  	}
   715  
   716  	var vals []*unify.Value
   717  	vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone, addFields))
   718  	if hasOptionalMask(ops) {
   719  		vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked, addFields))
   720  	}
   721  	return vals
   722  }
   723  
   724  func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant, addFields map[string]string) *unify.Value {
   725  	var db unify.DefBuilder
   726  	db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
   727  	db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
   728  	addOperandsToDef(ops, &db, variant)
   729  	db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature)))
   730  	for k, v := range addFields {
   731  		db.Add(k, unify.NewValue(unify.NewStringExact(v)))
   732  	}
   733  
   734  	if strings.Contains(inst.Pattern, "ZEROING=0") {
   735  		// This is an EVEX instruction, but the ".Z" (zero-merging)
   736  		// instruction flag is NOT valid. EVEX.z must be zero.
   737  		//
   738  		// This can mean a few things:
   739  		//
   740  		// - The output of an instruction is a mask, so merging modes don't
   741  		// make any sense. E.g., VCMPPS.
   742  		//
   743  		// - There are no masks involved anywhere. (Maybe MASK=0 is also set
   744  		// in this case?) E.g., VINSERTPS.
   745  		//
   746  		// - The operation inherently performs merging. E.g., VCOMPRESSPS
   747  		// with a mem operand.
   748  		//
   749  		// There may be other reasons.
   750  		db.Add("zeroing", unify.NewValue(unify.NewStringExact("false")))
   751  	}
   752  	pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
   753  	return unify.NewValuePos(db.Build(), pos)
   754  }
   755  
   756  // decodeCPUFeature returns the CPU feature name required by inst. These match
   757  // the names of the "Has*" feature checks in the simd package.
   758  func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
   759  	isaSet := inst.ISASet
   760  	if isaSet == "" {
   761  		// Older instructions don't have an ISA set. Use their "extension"
   762  		// instead.
   763  		isaSet = inst.Extension
   764  	}
   765  	// We require AVX512VL to use AVX512 at all, so strip off the vector length
   766  	// suffixes.
   767  	if strings.HasPrefix(isaSet, "AVX512") {
   768  		isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
   769  	}
   770  
   771  	feat, ok := cpuFeatureMap[isaSet]
   772  	if !ok {
   773  		imap := unknownFeatures[isaSet]
   774  		if imap == nil {
   775  			imap = make(map[string]struct{})
   776  			unknownFeatures[isaSet] = imap
   777  		}
   778  		imap[inst.Opcode()] = struct{}{}
   779  		return "", false
   780  	}
   781  	if feat == "ignore" {
   782  		return "", false
   783  	}
   784  	return feat, true
   785  }
   786  
   787  var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
   788  
   789  // cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
   790  // name to expose in the SIMD feature check API.
   791  //
   792  // See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
   793  var cpuFeatureMap = map[string]string{
   794  	"AVX":      "AVX",
   795  	"AVX_VNNI": "AVXVNNI",
   796  	"AVX2":     "AVX2",
   797  	"AVXAES":   "AVXAES",
   798  	"SHA":      "SHA",
   799  	"FMA":      "FMA",
   800  	"VAES":     "VAES",
   801  
   802  	// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
   803  	"AVX512F":  "AVX512",
   804  	"AVX512BW": "AVX512",
   805  	"AVX512CD": "AVX512",
   806  	"AVX512DQ": "AVX512",
   807  	// AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
   808  	// required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
   809  
   810  	// AVX-512 extension features
   811  	"AVX512_BITALG":     "AVX512BITALG",
   812  	"AVX512_GFNI":       "AVX512GFNI",
   813  	"AVX512_VBMI":       "AVX512VBMI",
   814  	"AVX512_VBMI2":      "AVX512VBMI2",
   815  	"AVX512_VNNI":       "AVX512VNNI",
   816  	"AVX512_VPOPCNTDQ":  "AVX512VPOPCNTDQ",
   817  	"AVX512_VAES":       "AVX512VAES",
   818  	"AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
   819  
   820  	// AVX 10.2 (not yet supported)
   821  	"AVX10_2_RC": "ignore",
   822  }
   823  
   824  func init() {
   825  	// TODO: In general, Intel doesn't make any guarantees about what flags are
   826  	// set, so this means our feature checks need to ensure these, just to be
   827  	// sure.
   828  	var features = map[string]featureInfo{
   829  		"AVX2":   {Implies: []string{"AVX"}},
   830  		"AVX512": {Implies: []string{"AVX2"}},
   831  
   832  		"AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
   833  		"FMA":    {Implies: []string{"AVX"}},
   834  		"VAES":   {Implies: []string{"AVX"}},
   835  
   836  		// AVX-512 subfeatures.
   837  		"AVX512BITALG":    {Implies: []string{"AVX512"}},
   838  		"AVX512GFNI":      {Implies: []string{"AVX512"}},
   839  		"AVX512VBMI":      {Implies: []string{"AVX512"}},
   840  		"AVX512VBMI2":     {Implies: []string{"AVX512"}},
   841  		"AVX512VNNI":      {Implies: []string{"AVX512"}},
   842  		"AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
   843  		"AVX512VAES":      {Implies: []string{"AVX512"}},
   844  
   845  		// AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
   846  		// instructions to VEX encoding, limited to 256 bit vectors. They're
   847  		// intended for lower end CPUs that want to support VNNI/IFMA without
   848  		// supporting AVX-512. As such, they're built on AVX2's VEX encoding.
   849  		"AVXVNNI": {Implies: []string{"AVX2"}},
   850  		"AVXIFMA": {Implies: []string{"AVX2"}},
   851  	}
   852  	registerFeatureInfo("amd64", goarchFeatures{
   853  		featureVar: "X86",
   854  		features:   features,
   855  	})
   856  }
   857  
   858  var unknownFeatures = map[string]map[string]struct{}{}
   859  
   860  // hasOptionalMask returns whether there is an optional mask operand in ops.
   861  func hasOptionalMask(ops []operand) bool {
   862  	for _, op := range ops {
   863  		if op, ok := op.(operandMask); ok && op.optional {
   864  			return true
   865  		}
   866  	}
   867  	return false
   868  }
   869  
   870  func singular[T comparable](xs []T) (T, bool) {
   871  	if len(xs) == 0 {
   872  		return *new(T), false
   873  	}
   874  	for _, x := range xs[1:] {
   875  		if x != xs[0] {
   876  			return *new(T), false
   877  		}
   878  	}
   879  	return xs[0], true
   880  }
   881  
   882  type fixedReg struct {
   883  	class int
   884  	name  string
   885  	width int
   886  }
   887  
   888  var fixedRegMap = map[string]fixedReg{
   889  	"XED_REG_XMM0": {VREG_CLASS, "x0", 128},
   890  }
   891  
   892  // decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS, VREG_CLASS_FIXED,
   893  // GREG_CLASS_FIXED), width in bits and reg name(if fixed).
   894  // If the operand cannot be decided as a register, then the clas is NOT_REG_CLASS.
   895  func decodeReg(op *xeddata.Operand) (class, width int, name string) {
   896  	// op.Width tells us the total width, e.g.,:
   897  	//
   898  	//    dq => 128 bits (XMM)
   899  	//    qq => 256 bits (YMM)
   900  	//    mskw => K
   901  	//    z[iuf?](8|16|32|...) => 512 bits (ZMM)
   902  	//
   903  	// But the encoding is really weird and it's not clear if these *always*
   904  	// mean XMM/YMM/ZMM or if other irregular things can use these large widths.
   905  	// Hence, we dig into the register sets themselves.
   906  
   907  	if !strings.HasPrefix(op.NameLHS(), "REG") {
   908  		return NOT_REG_CLASS, 0, ""
   909  	}
   910  	// TODO: We shouldn't be relying on the macro naming conventions. We should
   911  	// use all-dec-patterns.txt, but xeddata doesn't support that table right now.
   912  	rhs := op.NameRHS()
   913  	if !strings.HasSuffix(rhs, "()") {
   914  		if fixedReg, ok := fixedRegMap[rhs]; ok {
   915  			return fixedReg.class, fixedReg.width, fixedReg.name
   916  		}
   917  		return NOT_REG_CLASS, 0, ""
   918  	}
   919  	switch {
   920  	case strings.HasPrefix(rhs, "XMM_"):
   921  		return VREG_CLASS, 128, ""
   922  	case strings.HasPrefix(rhs, "YMM_"):
   923  		return VREG_CLASS, 256, ""
   924  	case strings.HasPrefix(rhs, "ZMM_"):
   925  		return VREG_CLASS, 512, ""
   926  	case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"):
   927  		return GREG_CLASS, 64, ""
   928  	case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"):
   929  		return GREG_CLASS, 32, ""
   930  	}
   931  	return NOT_REG_CLASS, 0, ""
   932  }
   933  
   934  var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)
   935  
   936  // scalarBaseType describes the base type of a scalar element. This is a Go
   937  // type, but without the bit width suffix (with the exception of
   938  // scalarBaseIntOrUint).
   939  type scalarBaseType int
   940  
   941  const (
   942  	scalarBaseInt scalarBaseType = iota
   943  	scalarBaseUint
   944  	scalarBaseIntOrUint // Signed or unsigned is unspecified
   945  	scalarBaseFloat
   946  	scalarBaseComplex
   947  	scalarBaseBFloat
   948  	scalarBaseHFloat
   949  )
   950  
   951  func (s scalarBaseType) regex() string {
   952  	switch s {
   953  	case scalarBaseInt:
   954  		return "int"
   955  	case scalarBaseUint:
   956  		return "uint"
   957  	case scalarBaseIntOrUint:
   958  		return "int|uint"
   959  	case scalarBaseFloat:
   960  		return "float"
   961  	case scalarBaseComplex:
   962  		return "complex"
   963  	case scalarBaseBFloat:
   964  		return "BFloat"
   965  	case scalarBaseHFloat:
   966  		return "HFloat"
   967  	}
   968  	panic(fmt.Sprintf("unknown scalar base type %d", s))
   969  }
   970  
   971  func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) {
   972  	// The xtype tells you the element type. i8, i16, i32, i64, f32, etc.
   973  	//
   974  	// TODO: Things like AVX2 VPAND have an xtype of u256 because they're
   975  	// element-width agnostic. Do I map that to all widths, or just omit the
   976  	// element width and let unification flesh it out? There's no u512
   977  	// (presumably those are all masked, so elem width matters). These are all
   978  	// Category: LOGICAL, so maybe we could use that info?
   979  
   980  	// Handle some weird ones.
   981  	switch op.Xtype {
   982  	// 8-bit float formats as defined by Open Compute Project "OCP 8-bit
   983  	// Floating Point Specification (OFP8)".
   984  	case "bf8": // E5M2 float
   985  		return scalarBaseBFloat, 8, true
   986  	case "hf8": // E4M3 float
   987  		return scalarBaseHFloat, 8, true
   988  	case "bf16": // bfloat16 float
   989  		return scalarBaseBFloat, 16, true
   990  	case "2f16":
   991  		// Complex consisting of 2 float16s. Doesn't exist in Go, but we can say
   992  		// what it would be.
   993  		return scalarBaseComplex, 32, true
   994  	case "2i8", "2I8":
   995  		// These just use the lower INT8 in each 16 bit field.
   996  		// As far as I can tell, "2I8" is a typo.
   997  		return scalarBaseInt, 8, true
   998  	case "2u16", "2U16":
   999  		// some VPDP* has it
  1000  		// TODO: does "z" means it has zeroing?
  1001  		return scalarBaseUint, 16, true
  1002  	case "2i16", "2I16":
  1003  		// some VPDP* has it
  1004  		return scalarBaseInt, 16, true
  1005  	case "4u8", "4U8":
  1006  		// some VPDP* has it
  1007  		return scalarBaseUint, 8, true
  1008  	case "4i8", "4I8":
  1009  		// some VPDP* has it
  1010  		return scalarBaseInt, 8, true
  1011  	}
  1012  
  1013  	// The rest follow a simple pattern.
  1014  	m := xtypeRe.FindStringSubmatch(op.Xtype)
  1015  	if m == nil {
  1016  		// TODO: Report unrecognized xtype
  1017  		return 0, 0, false
  1018  	}
  1019  	bits, _ = strconv.Atoi(m[2])
  1020  	switch m[1] {
  1021  	case "i", "u":
  1022  		// XED is rather inconsistent about what's signed, unsigned, or doesn't
  1023  		// matter, so merge them together and let the Go definitions narrow as
  1024  		// appropriate. Maybe there's a better way to do this.
  1025  		return scalarBaseIntOrUint, bits, true
  1026  	case "f":
  1027  		return scalarBaseFloat, bits, true
  1028  	default:
  1029  		panic("unreachable")
  1030  	}
  1031  }
  1032  

View as plain text