gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

cpuid.go (48515B)


      1 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
      2 
      3 // Package cpuid provides information about the CPU running the current program.
      4 //
      5 // CPU features are detected on startup, and kept for fast access through the life of the application.
      6 // Currently x86 / x64 (AMD64) as well as arm64 is supported.
      7 //
      8 // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
      9 //
     10 // Package home: https://github.com/klauspost/cpuid
     11 package cpuid
     12 
     13 import (
     14 	"flag"
     15 	"fmt"
     16 	"math"
     17 	"math/bits"
     18 	"os"
     19 	"runtime"
     20 	"strings"
     21 )
     22 
     23 // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
     24 // and Processor Programming Reference (PPR)
     25 
     26 // Vendor is a representation of a CPU vendor.
     27 type Vendor int
     28 
     29 const (
     30 	VendorUnknown Vendor = iota
     31 	Intel
     32 	AMD
     33 	VIA
     34 	Transmeta
     35 	NSC
     36 	KVM  // Kernel-based Virtual Machine
     37 	MSVM // Microsoft Hyper-V or Windows Virtual PC
     38 	VMware
     39 	XenHVM
     40 	Bhyve
     41 	Hygon
     42 	SiS
     43 	RDC
     44 
     45 	Ampere
     46 	ARM
     47 	Broadcom
     48 	Cavium
     49 	DEC
     50 	Fujitsu
     51 	Infineon
     52 	Motorola
     53 	NVIDIA
     54 	AMCC
     55 	Qualcomm
     56 	Marvell
     57 
     58 	lastVendor
     59 )
     60 
     61 //go:generate stringer -type=FeatureID,Vendor
     62 
     63 // FeatureID is the ID of a specific cpu feature.
     64 type FeatureID int
     65 
     66 const (
     67 	// Keep index -1 as unknown
     68 	UNKNOWN = -1
     69 
     70 	// Add features
     71 	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
     72 	AESNI                               // Advanced Encryption Standard New Instructions
     73 	AMD3DNOW                            // AMD 3DNOW
     74 	AMD3DNOWEXT                         // AMD 3DNowExt
     75 	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
     76 	AMXFP16                             // Tile computational operations on FP16 numbers
     77 	AMXINT8                             // Tile computational operations on 8-bit integers
     78 	AMXTILE                             // Tile architecture
     79 	AVX                                 // AVX functions
     80 	AVX2                                // AVX2 functions
     81 	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
     82 	AVX512BITALG                        // AVX-512 Bit Algorithms
     83 	AVX512BW                            // AVX-512 Byte and Word Instructions
     84 	AVX512CD                            // AVX-512 Conflict Detection Instructions
     85 	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
     86 	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
     87 	AVX512F                             // AVX-512 Foundation
     88 	AVX512FP16                          // AVX-512 FP16 Instructions
     89 	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
     90 	AVX512PF                            // AVX-512 Prefetch Instructions
     91 	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
     92 	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
     93 	AVX512VL                            // AVX-512 Vector Length Extensions
     94 	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
     95 	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
     96 	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
     97 	AVXIFMA                             // AVX-IFMA instructions
     98 	AVXNECONVERT                        // AVX-NE-CONVERT instructions
     99 	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
    100 	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
    101 	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
    102 	BHI_CTRL                            // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
    103 	BMI1                                // Bit Manipulation Instruction Set 1
    104 	BMI2                                // Bit Manipulation Instruction Set 2
    105 	CETIBT                              // Intel CET Indirect Branch Tracking
    106 	CETSS                               // Intel CET Shadow Stack
    107 	CLDEMOTE                            // Cache Line Demote
    108 	CLMUL                               // Carry-less Multiplication
    109 	CLZERO                              // CLZERO instruction supported
    110 	CMOV                                // i686 CMOV
    111 	CMPCCXADD                           // CMPCCXADD instructions
    112 	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
    113 	CMPXCHG8                            // CMPXCHG8 instruction
    114 	CPBOOST                             // Core Performance Boost
    115 	CPPC                                // AMD: Collaborative Processor Performance Control
    116 	CX16                                // CMPXCHG16B Instruction
    117 	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
    118 	ENQCMD                              // Enqueue Command
    119 	ERMS                                // Enhanced REP MOVSB/STOSB
    120 	F16C                                // Half-precision floating-point conversion
    121 	FLUSH_L1D                           // Flush L1D cache
    122 	FMA3                                // Intel FMA 3. Does not imply AVX.
    123 	FMA4                                // Bulldozer FMA4 functions
    124 	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
    125 	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
    126 	FSRM                                // Fast Short Rep Mov
    127 	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
    128 	FXSROPT                             // FXSAVE/FXRSTOR optimizations
    129 	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
    130 	HLE                                 // Hardware Lock Elision
    131 	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
    132 	HTT                                 // Hyperthreading (enabled)
    133 	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
    134 	HYBRID_CPU                          // This part has CPUs of more than one type.
    135 	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
    136 	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
    137 	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
    138 	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
    139 	IBRS                                // AMD: Indirect Branch Restricted Speculation
    140 	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
    141 	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
    142 	IBS                                 // Instruction Based Sampling (AMD)
    143 	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
    144 	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
    145 	IBSFFV                              // Instruction Based Sampling Feature (AMD)
    146 	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
    147 	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
    148 	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
    149 	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
    150 	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
    151 	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
    152 	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
    153 	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
    154 	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
    155 	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
    156 	IDPRED_CTRL                         // IPRED_DIS
    157 	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
    158 	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
    159 	LAHF                                // LAHF/SAHF in long mode
    160 	LAM                                 // If set, CPU supports Linear Address Masking
    161 	LBRVIRT                             // LBR virtualization
    162 	LZCNT                               // LZCNT instruction
    163 	MCAOVERFLOW                         // MCA overflow recovery support.
    164 	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
    165 	MCOMMIT                             // MCOMMIT instruction supported
    166 	MD_CLEAR                            // VERW clears CPU buffers
    167 	MMX                                 // standard MMX
    168 	MMXEXT                              // SSE integer functions or AMD MMX ext
    169 	MOVBE                               // MOVBE instruction (big-endian)
    170 	MOVDIR64B                           // Move 64 Bytes as Direct Store
    171 	MOVDIRI                             // Move Doubleword as Direct Store
    172 	MOVSB_ZL                            // Fast Zero-Length MOVSB
    173 	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
    174 	MPX                                 // Intel MPX (Memory Protection Extensions)
    175 	MSRIRC                              // Instruction Retired Counter MSR available
    176 	MSRLIST                             // Read/Write List of Model Specific Registers
    177 	MSR_PAGEFLUSH                       // Page Flush MSR available
    178 	NRIPS                               // Indicates support for NRIP save on VMEXIT
    179 	NX                                  // NX (No-Execute) bit
    180 	OSXSAVE                             // XSAVE enabled by OS
    181 	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
    182 	POPCNT                              // POPCNT instruction
    183 	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
    184 	PREFETCHI                           // PREFETCHIT0/1 instructions
    185 	PSFD                                // Predictive Store Forward Disable
    186 	RDPRU                               // RDPRU instruction supported
    187 	RDRAND                              // RDRAND instruction is available
    188 	RDSEED                              // RDSEED instruction is available
    189 	RDTSCP                              // RDTSCP Instruction
    190 	RRSBA_CTRL                          // Restricted RSB Alternate
    191 	RTM                                 // Restricted Transactional Memory
    192 	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
    193 	SERIALIZE                           // Serialize Instruction Execution
    194 	SEV                                 // AMD Secure Encrypted Virtualization supported
    195 	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
    196 	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
    197 	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
    198 	SEV_ES                              // AMD SEV Encrypted State supported
    199 	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
    200 	SEV_SNP                             // AMD SEV Secure Nested Paging supported
    201 	SGX                                 // Software Guard Extensions
    202 	SGXLC                               // Software Guard Extensions Launch Control
    203 	SHA                                 // Intel SHA Extensions
    204 	SME                                 // AMD Secure Memory Encryption supported
    205 	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
    206 	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
    207 	SRBDS_CTRL                          // SRBDS mitigation MSR available
    208 	SSE                                 // SSE functions
    209 	SSE2                                // P4 SSE functions
    210 	SSE3                                // Prescott SSE3 functions
    211 	SSE4                                // Penryn SSE4.1 functions
    212 	SSE42                               // Nehalem SSE4.2 functions
    213 	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
    214 	SSSE3                               // Conroe SSSE3 functions
    215 	STIBP                               // Single Thread Indirect Branch Predictors
    216 	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
    217 	STOSB_SHORT                         // Fast short STOSB
    218 	SUCCOR                              // Software uncorrectable error containment and recovery capability.
    219 	SVM                                 // AMD Secure Virtual Machine
    220 	SVMDA                               // Indicates support for the SVM decode assists.
    221 	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
    222 	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
    223 	SVMNP                               // AMD SVM nested paging
    224 	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
    225 	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
    226 	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
    227 	SYSEE                               // SYSENTER and SYSEXIT instructions
    228 	TBM                                 // AMD Trailing Bit Manipulation
    229 	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
    230 	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
    231 	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
    232 	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
    233 	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
    234 	VAES                                // Vector AES. AVX(512) versions requires additional checks.
    235 	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
    236 	VMPL                                // AMD VM Permission Levels supported
    237 	VMSA_REGPROT                        // AMD VMSA Register Protection supported
    238 	VMX                                 // Virtual Machine Extensions
    239 	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
    240 	VTE                                 // AMD Virtual Transparent Encryption supported
    241 	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
    242 	WBNOINVD                            // Write Back and Do Not Invalidate Cache
    243 	WRMSRNS                             // Non-Serializing Write to Model Specific Register
    244 	X87                                 // FPU
    245 	XGETBV1                             // Supports XGETBV with ECX = 1
    246 	XOP                                 // Bulldozer XOP functions
    247 	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
    248 	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
    249 	XSAVEOPT                            // XSAVEOPT available
    250 	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
    251 
    252 	// ARM features:
    253 	AESARM   // AES instructions
    254 	ARMCPUID // Some CPU ID registers readable at user-level
    255 	ASIMD    // Advanced SIMD
    256 	ASIMDDP  // SIMD Dot Product
    257 	ASIMDHP  // Advanced SIMD half-precision floating point
    258 	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
    259 	ATOMICS  // Large System Extensions (LSE)
    260 	CRC32    // CRC32/CRC32C instructions
    261 	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
    262 	EVTSTRM  // Generic timer
    263 	FCMA     // Floatin point complex number addition and multiplication
    264 	FP       // Single-precision and double-precision floating point
    265 	FPHP     // Half-precision floating point
    266 	GPA      // Generic Pointer Authentication
    267 	JSCVT    // Javascript-style double->int convert (FJCVTZS)
    268 	LRCPC    // Weaker release consistency (LDAPR, etc)
    269 	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
    270 	SHA1     // SHA-1 instructions (SHA1C, etc)
    271 	SHA2     // SHA-2 instructions (SHA256H, etc)
    272 	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
    273 	SHA512   // SHA512 instructions
    274 	SM3      // SM3 instructions
    275 	SM4      // SM4 instructions
    276 	SVE      // Scalable Vector Extension
    277 	// Keep it last. It automatically defines the size of []flagSet
    278 	lastID
    279 
    280 	firstID FeatureID = UNKNOWN + 1
    281 )
    282 
    283 // CPUInfo contains information about the detected system CPU.
    284 type CPUInfo struct {
    285 	BrandName      string  // Brand name reported by the CPU
    286 	VendorID       Vendor  // Comparable CPU vendor ID
    287 	VendorString   string  // Raw vendor string.
    288 	featureSet     flagSet // Features of the CPU
    289 	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
    290 	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
    291 	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
    292 	Family         int     // CPU family number
    293 	Model          int     // CPU model number
    294 	Stepping       int     // CPU stepping info
    295 	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
    296 	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
    297 	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
    298 	Cache          struct {
    299 		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
    300 		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
    301 		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
    302 		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
    303 	}
    304 	SGX       SGXSupport
    305 	maxFunc   uint32
    306 	maxExFunc uint32
    307 }
    308 
    309 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
    310 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
    311 var xgetbv func(index uint32) (eax, edx uint32)
    312 var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
    313 var darwinHasAVX512 = func() bool { return false }
    314 
    315 // CPU contains information about the CPU as detected on startup,
    316 // or when Detect last was called.
    317 //
    318 // Use this as the primary entry point to you data.
    319 var CPU CPUInfo
    320 
    321 func init() {
    322 	initCPU()
    323 	Detect()
    324 }
    325 
    326 // Detect will re-detect current CPU info.
    327 // This will replace the content of the exported CPU variable.
    328 //
    329 // Unless you expect the CPU to change while you are running your program
    330 // you should not need to call this function.
    331 // If you call this, you must ensure that no other goroutine is accessing the
    332 // exported CPU variable.
    333 func Detect() {
    334 	// Set defaults
    335 	CPU.ThreadsPerCore = 1
    336 	CPU.Cache.L1I = -1
    337 	CPU.Cache.L1D = -1
    338 	CPU.Cache.L2 = -1
    339 	CPU.Cache.L3 = -1
    340 	safe := true
    341 	if detectArmFlag != nil {
    342 		safe = !*detectArmFlag
    343 	}
    344 	addInfo(&CPU, safe)
    345 	if displayFeats != nil && *displayFeats {
    346 		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
    347 		// Exit with non-zero so tests will print value.
    348 		os.Exit(1)
    349 	}
    350 	if disableFlag != nil {
    351 		s := strings.Split(*disableFlag, ",")
    352 		for _, feat := range s {
    353 			feat := ParseFeature(strings.TrimSpace(feat))
    354 			if feat != UNKNOWN {
    355 				CPU.featureSet.unset(feat)
    356 			}
    357 		}
    358 	}
    359 }
    360 
    361 // DetectARM will detect ARM64 features.
    362 // This is NOT done automatically since it can potentially crash
    363 // if the OS does not handle the command.
    364 // If in the future this can be done safely this function may not
    365 // do anything.
    366 func DetectARM() {
    367 	addInfo(&CPU, false)
    368 }
    369 
    370 var detectArmFlag *bool
    371 var displayFeats *bool
    372 var disableFlag *string
    373 
    374 // Flags will enable flags.
    375 // This must be called *before* flag.Parse AND
    376 // Detect must be called after the flags have been parsed.
    377 // Note that this means that any detection used in init() functions
    378 // will not contain these flags.
    379 func Flags() {
    380 	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
    381 	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
    382 	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
    383 }
    384 
    385 // Supports returns whether the CPU supports all of the requested features.
    386 func (c CPUInfo) Supports(ids ...FeatureID) bool {
    387 	for _, id := range ids {
    388 		if !c.featureSet.inSet(id) {
    389 			return false
    390 		}
    391 	}
    392 	return true
    393 }
    394 
    395 // Has allows for checking a single feature.
    396 // Should be inlined by the compiler.
    397 func (c *CPUInfo) Has(id FeatureID) bool {
    398 	return c.featureSet.inSet(id)
    399 }
    400 
    401 // AnyOf returns whether the CPU supports one or more of the requested features.
    402 func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
    403 	for _, id := range ids {
    404 		if c.featureSet.inSet(id) {
    405 			return true
    406 		}
    407 	}
    408 	return false
    409 }
    410 
    411 // Features contains several features combined for a fast check using
    412 // CpuInfo.HasAll
    413 type Features *flagSet
    414 
    415 // CombineFeatures allows to combine several features for a close to constant time lookup.
    416 func CombineFeatures(ids ...FeatureID) Features {
    417 	var v flagSet
    418 	for _, id := range ids {
    419 		v.set(id)
    420 	}
    421 	return &v
    422 }
    423 
    424 func (c *CPUInfo) HasAll(f Features) bool {
    425 	return c.featureSet.hasSetP(f)
    426 }
    427 
    428 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
    429 var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
    430 var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
    431 var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
    432 var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
    433 var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
    434 
    435 // X64Level returns the microarchitecture level detected on the CPU.
    436 // If features are lacking or non x64 mode, 0 is returned.
    437 // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
    438 func (c CPUInfo) X64Level() int {
    439 	if !c.featureSet.hasOneOf(oneOfLevel) {
    440 		return 0
    441 	}
    442 	if c.featureSet.hasSetP(level4Features) {
    443 		return 4
    444 	}
    445 	if c.featureSet.hasSetP(level3Features) {
    446 		return 3
    447 	}
    448 	if c.featureSet.hasSetP(level2Features) {
    449 		return 2
    450 	}
    451 	if c.featureSet.hasSetP(level1Features) {
    452 		return 1
    453 	}
    454 	return 0
    455 }
    456 
    457 // Disable will disable one or several features.
    458 func (c *CPUInfo) Disable(ids ...FeatureID) bool {
    459 	for _, id := range ids {
    460 		c.featureSet.unset(id)
    461 	}
    462 	return true
    463 }
    464 
    465 // Enable will disable one or several features even if they were undetected.
    466 // This is of course not recommended for obvious reasons.
    467 func (c *CPUInfo) Enable(ids ...FeatureID) bool {
    468 	for _, id := range ids {
    469 		c.featureSet.set(id)
    470 	}
    471 	return true
    472 }
    473 
    474 // IsVendor returns true if vendor is recognized as Intel
    475 func (c CPUInfo) IsVendor(v Vendor) bool {
    476 	return c.VendorID == v
    477 }
    478 
    479 // FeatureSet returns all available features as strings.
    480 func (c CPUInfo) FeatureSet() []string {
    481 	s := make([]string, 0, c.featureSet.nEnabled())
    482 	s = append(s, c.featureSet.Strings()...)
    483 	return s
    484 }
    485 
    486 // RTCounter returns the 64-bit time-stamp counter
    487 // Uses the RDTSCP instruction. The value 0 is returned
    488 // if the CPU does not support the instruction.
    489 func (c CPUInfo) RTCounter() uint64 {
    490 	if !c.Supports(RDTSCP) {
    491 		return 0
    492 	}
    493 	a, _, _, d := rdtscpAsm()
    494 	return uint64(a) | (uint64(d) << 32)
    495 }
    496 
    497 // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
    498 // This variable is OS dependent, but on Linux contains information
    499 // about the current cpu/core the code is running on.
    500 // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
    501 func (c CPUInfo) Ia32TscAux() uint32 {
    502 	if !c.Supports(RDTSCP) {
    503 		return 0
    504 	}
    505 	_, _, ecx, _ := rdtscpAsm()
    506 	return ecx
    507 }
    508 
    509 // LogicalCPU will return the Logical CPU the code is currently executing on.
    510 // This is likely to change when the OS re-schedules the running thread
    511 // to another CPU.
    512 // If the current core cannot be detected, -1 will be returned.
    513 func (c CPUInfo) LogicalCPU() int {
    514 	if c.maxFunc < 1 {
    515 		return -1
    516 	}
    517 	_, ebx, _, _ := cpuid(1)
    518 	return int(ebx >> 24)
    519 }
    520 
    521 // frequencies tries to compute the clock speed of the CPU. If leaf 15 is
    522 // supported, use it, otherwise parse the brand string. Yes, really.
    523 func (c *CPUInfo) frequencies() {
    524 	c.Hz, c.BoostFreq = 0, 0
    525 	mfi := maxFunctionID()
    526 	if mfi >= 0x15 {
    527 		eax, ebx, ecx, _ := cpuid(0x15)
    528 		if eax != 0 && ebx != 0 && ecx != 0 {
    529 			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
    530 		}
    531 	}
    532 	if mfi >= 0x16 {
    533 		a, b, _, _ := cpuid(0x16)
    534 		// Base...
    535 		if a&0xffff > 0 {
    536 			c.Hz = int64(a&0xffff) * 1_000_000
    537 		}
    538 		// Boost...
    539 		if b&0xffff > 0 {
    540 			c.BoostFreq = int64(b&0xffff) * 1_000_000
    541 		}
    542 	}
    543 	if c.Hz > 0 {
    544 		return
    545 	}
    546 
    547 	// computeHz determines the official rated speed of a CPU from its brand
    548 	// string. This insanity is *actually the official documented way to do
    549 	// this according to Intel*, prior to leaf 0x15 existing. The official
    550 	// documentation only shows this working for exactly `x.xx` or `xxxx`
    551 	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
    552 	// sizes.
    553 	model := c.BrandName
    554 	hz := strings.LastIndex(model, "Hz")
    555 	if hz < 3 {
    556 		return
    557 	}
    558 	var multiplier int64
    559 	switch model[hz-1] {
    560 	case 'M':
    561 		multiplier = 1000 * 1000
    562 	case 'G':
    563 		multiplier = 1000 * 1000 * 1000
    564 	case 'T':
    565 		multiplier = 1000 * 1000 * 1000 * 1000
    566 	}
    567 	if multiplier == 0 {
    568 		return
    569 	}
    570 	freq := int64(0)
    571 	divisor := int64(0)
    572 	decimalShift := int64(1)
    573 	var i int
    574 	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
    575 		if model[i] >= '0' && model[i] <= '9' {
    576 			freq += int64(model[i]-'0') * decimalShift
    577 			decimalShift *= 10
    578 		} else if model[i] == '.' {
    579 			if divisor != 0 {
    580 				return
    581 			}
    582 			divisor = decimalShift
    583 		} else {
    584 			return
    585 		}
    586 	}
    587 	// we didn't find a space
    588 	if i < 0 {
    589 		return
    590 	}
    591 	if divisor != 0 {
    592 		c.Hz = (freq * multiplier) / divisor
    593 		return
    594 	}
    595 	c.Hz = freq * multiplier
    596 }
    597 
    598 // VM Will return true if the cpu id indicates we are in
    599 // a virtual machine.
    600 func (c CPUInfo) VM() bool {
    601 	return CPU.featureSet.inSet(HYPERVISOR)
    602 }
    603 
    604 // flags contains detected cpu features and characteristics
    605 type flags uint64
    606 
    607 // log2(bits_in_uint64)
    608 const flagBitsLog2 = 6
    609 const flagBits = 1 << flagBitsLog2
    610 const flagMask = flagBits - 1
    611 
    612 // flagSet contains detected cpu features and characteristics in an array of flags
    613 type flagSet [(lastID + flagMask) / flagBits]flags
    614 
    615 func (s *flagSet) inSet(feat FeatureID) bool {
    616 	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
    617 }
    618 
    619 func (s *flagSet) set(feat FeatureID) {
    620 	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
    621 }
    622 
    623 // setIf will set a feature if boolean is true.
    624 func (s *flagSet) setIf(cond bool, features ...FeatureID) {
    625 	if cond {
    626 		for _, offset := range features {
    627 			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
    628 		}
    629 	}
    630 }
    631 
    632 func (s *flagSet) unset(offset FeatureID) {
    633 	bit := flags(1 << (offset & flagMask))
    634 	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
    635 }
    636 
    637 // or with another flagset.
    638 func (s *flagSet) or(other flagSet) {
    639 	for i, v := range other[:] {
    640 		s[i] |= v
    641 	}
    642 }
    643 
    644 // hasSet returns whether all features are present.
    645 func (s *flagSet) hasSet(other flagSet) bool {
    646 	for i, v := range other[:] {
    647 		if s[i]&v != v {
    648 			return false
    649 		}
    650 	}
    651 	return true
    652 }
    653 
    654 // hasSet returns whether all features are present.
    655 func (s *flagSet) hasSetP(other *flagSet) bool {
    656 	for i, v := range other[:] {
    657 		if s[i]&v != v {
    658 			return false
    659 		}
    660 	}
    661 	return true
    662 }
    663 
    664 // hasOneOf returns whether one or more features are present.
    665 func (s *flagSet) hasOneOf(other *flagSet) bool {
    666 	for i, v := range other[:] {
    667 		if s[i]&v != 0 {
    668 			return true
    669 		}
    670 	}
    671 	return false
    672 }
    673 
    674 // nEnabled will return the number of enabled flags.
    675 func (s *flagSet) nEnabled() (n int) {
    676 	for _, v := range s[:] {
    677 		n += bits.OnesCount64(uint64(v))
    678 	}
    679 	return n
    680 }
    681 
    682 func flagSetWith(feat ...FeatureID) flagSet {
    683 	var res flagSet
    684 	for _, f := range feat {
    685 		res.set(f)
    686 	}
    687 	return res
    688 }
    689 
    690 // ParseFeature will parse the string and return the ID of the matching feature.
    691 // Will return UNKNOWN if not found.
    692 func ParseFeature(s string) FeatureID {
    693 	s = strings.ToUpper(s)
    694 	for i := firstID; i < lastID; i++ {
    695 		if i.String() == s {
    696 			return i
    697 		}
    698 	}
    699 	return UNKNOWN
    700 }
    701 
    702 // Strings returns an array of the detected features for FlagsSet.
    703 func (s flagSet) Strings() []string {
    704 	if len(s) == 0 {
    705 		return []string{""}
    706 	}
    707 	r := make([]string, 0)
    708 	for i := firstID; i < lastID; i++ {
    709 		if s.inSet(i) {
    710 			r = append(r, i.String())
    711 		}
    712 	}
    713 	return r
    714 }
    715 
    716 func maxExtendedFunction() uint32 {
    717 	eax, _, _, _ := cpuid(0x80000000)
    718 	return eax
    719 }
    720 
    721 func maxFunctionID() uint32 {
    722 	a, _, _, _ := cpuid(0)
    723 	return a
    724 }
    725 
    726 func brandName() string {
    727 	if maxExtendedFunction() >= 0x80000004 {
    728 		v := make([]uint32, 0, 48)
    729 		for i := uint32(0); i < 3; i++ {
    730 			a, b, c, d := cpuid(0x80000002 + i)
    731 			v = append(v, a, b, c, d)
    732 		}
    733 		return strings.Trim(string(valAsString(v...)), " ")
    734 	}
    735 	return "unknown"
    736 }
    737 
    738 func threadsPerCore() int {
    739 	mfi := maxFunctionID()
    740 	vend, _ := vendorID()
    741 
    742 	if mfi < 0x4 || (vend != Intel && vend != AMD) {
    743 		return 1
    744 	}
    745 
    746 	if mfi < 0xb {
    747 		if vend != Intel {
    748 			return 1
    749 		}
    750 		_, b, _, d := cpuid(1)
    751 		if (d & (1 << 28)) != 0 {
    752 			// v will contain logical core count
    753 			v := (b >> 16) & 255
    754 			if v > 1 {
    755 				a4, _, _, _ := cpuid(4)
    756 				// physical cores
    757 				v2 := (a4 >> 26) + 1
    758 				if v2 > 0 {
    759 					return int(v) / int(v2)
    760 				}
    761 			}
    762 		}
    763 		return 1
    764 	}
    765 	_, b, _, _ := cpuidex(0xb, 0)
    766 	if b&0xffff == 0 {
    767 		if vend == AMD {
    768 			// Workaround for AMD returning 0, assume 2 if >= Zen 2
    769 			// It will be more correct than not.
    770 			fam, _, _ := familyModel()
    771 			_, _, _, d := cpuid(1)
    772 			if (d&(1<<28)) != 0 && fam >= 23 {
    773 				return 2
    774 			}
    775 		}
    776 		return 1
    777 	}
    778 	return int(b & 0xffff)
    779 }
    780 
    781 func logicalCores() int {
    782 	mfi := maxFunctionID()
    783 	v, _ := vendorID()
    784 	switch v {
    785 	case Intel:
    786 		// Use this on old Intel processors
    787 		if mfi < 0xb {
    788 			if mfi < 1 {
    789 				return 0
    790 			}
    791 			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
    792 			// that can be assigned to logical processors in a physical package.
    793 			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
    794 			_, ebx, _, _ := cpuid(1)
    795 			logical := (ebx >> 16) & 0xff
    796 			return int(logical)
    797 		}
    798 		_, b, _, _ := cpuidex(0xb, 1)
    799 		return int(b & 0xffff)
    800 	case AMD, Hygon:
    801 		_, b, _, _ := cpuid(1)
    802 		return int((b >> 16) & 0xff)
    803 	default:
    804 		return 0
    805 	}
    806 }
    807 
    808 func familyModel() (family, model, stepping int) {
    809 	if maxFunctionID() < 0x1 {
    810 		return 0, 0, 0
    811 	}
    812 	eax, _, _, _ := cpuid(1)
    813 	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
    814 	family = int((eax >> 8) & 0xf)
    815 	extFam := family == 0x6 // Intel is 0x6, needs extended model.
    816 	if family == 0xf {
    817 		// Add ExtFamily
    818 		family += int((eax >> 20) & 0xff)
    819 		extFam = true
    820 	}
    821 	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
    822 	model = int((eax >> 4) & 0xf)
    823 	if extFam {
    824 		// Add ExtModel
    825 		model += int((eax >> 12) & 0xf0)
    826 	}
    827 	stepping = int(eax & 0xf)
    828 	return family, model, stepping
    829 }
    830 
    831 func physicalCores() int {
    832 	v, _ := vendorID()
    833 	switch v {
    834 	case Intel:
    835 		return logicalCores() / threadsPerCore()
    836 	case AMD, Hygon:
    837 		lc := logicalCores()
    838 		tpc := threadsPerCore()
    839 		if lc > 0 && tpc > 0 {
    840 			return lc / tpc
    841 		}
    842 
    843 		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
    844 		if maxExtendedFunction() >= 0x80000008 {
    845 			_, _, c, _ := cpuid(0x80000008)
    846 			if c&0xff > 0 {
    847 				return int(c&0xff) + 1
    848 			}
    849 		}
    850 	}
    851 	return 0
    852 }
    853 
    854 // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
    855 var vendorMapping = map[string]Vendor{
    856 	"AMDisbetter!": AMD,
    857 	"AuthenticAMD": AMD,
    858 	"CentaurHauls": VIA,
    859 	"GenuineIntel": Intel,
    860 	"TransmetaCPU": Transmeta,
    861 	"GenuineTMx86": Transmeta,
    862 	"Geode by NSC": NSC,
    863 	"VIA VIA VIA ": VIA,
    864 	"KVMKVMKVMKVM": KVM,
    865 	"Microsoft Hv": MSVM,
    866 	"VMwareVMware": VMware,
    867 	"XenVMMXenVMM": XenHVM,
    868 	"bhyve bhyve ": Bhyve,
    869 	"HygonGenuine": Hygon,
    870 	"Vortex86 SoC": SiS,
    871 	"SiS SiS SiS ": SiS,
    872 	"RiseRiseRise": SiS,
    873 	"Genuine  RDC": RDC,
    874 }
    875 
    876 func vendorID() (Vendor, string) {
    877 	_, b, c, d := cpuid(0)
    878 	v := string(valAsString(b, d, c))
    879 	vend, ok := vendorMapping[v]
    880 	if !ok {
    881 		return VendorUnknown, v
    882 	}
    883 	return vend, v
    884 }
    885 
    886 func cacheLine() int {
    887 	if maxFunctionID() < 0x1 {
    888 		return 0
    889 	}
    890 
    891 	_, ebx, _, _ := cpuid(1)
    892 	cache := (ebx & 0xff00) >> 5 // cflush size
    893 	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
    894 		_, _, ecx, _ := cpuid(0x80000006)
    895 		cache = ecx & 0xff // cacheline size
    896 	}
    897 	// TODO: Read from Cache and TLB Information
    898 	return int(cache)
    899 }
    900 
    901 func (c *CPUInfo) cacheSize() {
    902 	c.Cache.L1D = -1
    903 	c.Cache.L1I = -1
    904 	c.Cache.L2 = -1
    905 	c.Cache.L3 = -1
    906 	vendor, _ := vendorID()
    907 	switch vendor {
    908 	case Intel:
    909 		if maxFunctionID() < 4 {
    910 			return
    911 		}
    912 		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
    913 		for i := uint32(0); ; i++ {
    914 			eax, ebx, ecx, _ := cpuidex(4, i)
    915 			cacheType := eax & 15
    916 			if cacheType == 0 {
    917 				break
    918 			}
    919 			cacheLevel := (eax >> 5) & 7
    920 			coherency := int(ebx&0xfff) + 1
    921 			partitions := int((ebx>>12)&0x3ff) + 1
    922 			associativity := int((ebx>>22)&0x3ff) + 1
    923 			sets := int(ecx) + 1
    924 			size := associativity * partitions * coherency * sets
    925 			switch cacheLevel {
    926 			case 1:
    927 				if cacheType == 1 {
    928 					// 1 = Data Cache
    929 					c.Cache.L1D = size
    930 				} else if cacheType == 2 {
    931 					// 2 = Instruction Cache
    932 					c.Cache.L1I = size
    933 				} else {
    934 					if c.Cache.L1D < 0 {
    935 						c.Cache.L1I = size
    936 					}
    937 					if c.Cache.L1I < 0 {
    938 						c.Cache.L1I = size
    939 					}
    940 				}
    941 			case 2:
    942 				c.Cache.L2 = size
    943 			case 3:
    944 				c.Cache.L3 = size
    945 			}
    946 		}
    947 	case AMD, Hygon:
    948 		// Untested.
    949 		if maxExtendedFunction() < 0x80000005 {
    950 			return
    951 		}
    952 		_, _, ecx, edx := cpuid(0x80000005)
    953 		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
    954 		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
    955 
    956 		if maxExtendedFunction() < 0x80000006 {
    957 			return
    958 		}
    959 		_, _, ecx, _ = cpuid(0x80000006)
    960 		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
    961 
    962 		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
    963 		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
    964 			return
    965 		}
    966 
    967 		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
    968 		// Hack: When we encounter the same entry 100 times we break.
    969 		nSame := 0
    970 		var last uint32
    971 		for i := uint32(0); i < math.MaxUint32; i++ {
    972 			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
    973 
    974 			level := (eax >> 5) & 7
    975 			cacheNumSets := ecx + 1
    976 			cacheLineSize := 1 + (ebx & 2047)
    977 			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
    978 			cacheNumWays := 1 + ((ebx >> 22) & 511)
    979 
    980 			typ := eax & 15
    981 			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
    982 			if typ == 0 {
    983 				return
    984 			}
    985 
    986 			// Check for the same value repeated.
    987 			comb := eax ^ ebx ^ ecx
    988 			if comb == last {
    989 				nSame++
    990 				if nSame == 100 {
    991 					return
    992 				}
    993 			}
    994 			last = comb
    995 
    996 			switch level {
    997 			case 1:
    998 				switch typ {
    999 				case 1:
   1000 					// Data cache
   1001 					c.Cache.L1D = size
   1002 				case 2:
   1003 					// Inst cache
   1004 					c.Cache.L1I = size
   1005 				default:
   1006 					if c.Cache.L1D < 0 {
   1007 						c.Cache.L1I = size
   1008 					}
   1009 					if c.Cache.L1I < 0 {
   1010 						c.Cache.L1I = size
   1011 					}
   1012 				}
   1013 			case 2:
   1014 				c.Cache.L2 = size
   1015 			case 3:
   1016 				c.Cache.L3 = size
   1017 			}
   1018 		}
   1019 	}
   1020 }
   1021 
   1022 type SGXEPCSection struct {
   1023 	BaseAddress uint64
   1024 	EPCSize     uint64
   1025 }
   1026 
   1027 type SGXSupport struct {
   1028 	Available           bool
   1029 	LaunchControl       bool
   1030 	SGX1Supported       bool
   1031 	SGX2Supported       bool
   1032 	MaxEnclaveSizeNot64 int64
   1033 	MaxEnclaveSize64    int64
   1034 	EPCSections         []SGXEPCSection
   1035 }
   1036 
   1037 func hasSGX(available, lc bool) (rval SGXSupport) {
   1038 	rval.Available = available
   1039 
   1040 	if !available {
   1041 		return
   1042 	}
   1043 
   1044 	rval.LaunchControl = lc
   1045 
   1046 	a, _, _, d := cpuidex(0x12, 0)
   1047 	rval.SGX1Supported = a&0x01 != 0
   1048 	rval.SGX2Supported = a&0x02 != 0
   1049 	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
   1050 	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
   1051 	rval.EPCSections = make([]SGXEPCSection, 0)
   1052 
   1053 	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
   1054 		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
   1055 		leafType := eax & 0xf
   1056 
   1057 		if leafType == 0 {
   1058 			// Invalid subleaf, stop iterating
   1059 			break
   1060 		} else if leafType == 1 {
   1061 			// EPC Section subleaf
   1062 			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
   1063 			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
   1064 
   1065 			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
   1066 			rval.EPCSections = append(rval.EPCSections, section)
   1067 		}
   1068 	}
   1069 
   1070 	return
   1071 }
   1072 
   1073 func support() flagSet {
   1074 	var fs flagSet
   1075 	mfi := maxFunctionID()
   1076 	vend, _ := vendorID()
   1077 	if mfi < 0x1 {
   1078 		return fs
   1079 	}
   1080 	family, model, _ := familyModel()
   1081 
   1082 	_, _, c, d := cpuid(1)
   1083 	fs.setIf((d&(1<<0)) != 0, X87)
   1084 	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
   1085 	fs.setIf((d&(1<<11)) != 0, SYSEE)
   1086 	fs.setIf((d&(1<<15)) != 0, CMOV)
   1087 	fs.setIf((d&(1<<23)) != 0, MMX)
   1088 	fs.setIf((d&(1<<24)) != 0, FXSR)
   1089 	fs.setIf((d&(1<<25)) != 0, FXSROPT)
   1090 	fs.setIf((d&(1<<25)) != 0, SSE)
   1091 	fs.setIf((d&(1<<26)) != 0, SSE2)
   1092 	fs.setIf((c&1) != 0, SSE3)
   1093 	fs.setIf((c&(1<<5)) != 0, VMX)
   1094 	fs.setIf((c&(1<<9)) != 0, SSSE3)
   1095 	fs.setIf((c&(1<<19)) != 0, SSE4)
   1096 	fs.setIf((c&(1<<20)) != 0, SSE42)
   1097 	fs.setIf((c&(1<<25)) != 0, AESNI)
   1098 	fs.setIf((c&(1<<1)) != 0, CLMUL)
   1099 	fs.setIf(c&(1<<22) != 0, MOVBE)
   1100 	fs.setIf(c&(1<<23) != 0, POPCNT)
   1101 	fs.setIf(c&(1<<30) != 0, RDRAND)
   1102 
   1103 	// This bit has been reserved by Intel & AMD for use by hypervisors,
   1104 	// and indicates the presence of a hypervisor.
   1105 	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
   1106 	fs.setIf(c&(1<<29) != 0, F16C)
   1107 	fs.setIf(c&(1<<13) != 0, CX16)
   1108 
   1109 	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
   1110 		fs.setIf(threadsPerCore() > 1, HTT)
   1111 	}
   1112 	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
   1113 		fs.setIf(threadsPerCore() > 1, HTT)
   1114 	}
   1115 	fs.setIf(c&1<<26 != 0, XSAVE)
   1116 	fs.setIf(c&1<<27 != 0, OSXSAVE)
   1117 	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
   1118 	const avxCheck = 1<<26 | 1<<27 | 1<<28
   1119 	if c&avxCheck == avxCheck {
   1120 		// Check for OS support
   1121 		eax, _ := xgetbv(0)
   1122 		if (eax & 0x6) == 0x6 {
   1123 			fs.set(AVX)
   1124 			switch vend {
   1125 			case Intel:
   1126 				// Older than Haswell.
   1127 				fs.setIf(family == 6 && model < 60, AVXSLOW)
   1128 			case AMD:
   1129 				// Older than Zen 2
   1130 				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
   1131 			}
   1132 		}
   1133 	}
   1134 	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
   1135 	// fma3 and OSXSAVE needed.
   1136 	const fma3Check = 1<<12 | 1<<27
   1137 	fs.setIf(c&fma3Check == fma3Check, FMA3)
   1138 
   1139 	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
   1140 	if mfi >= 7 {
   1141 		_, ebx, ecx, edx := cpuidex(7, 0)
   1142 		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
   1143 			fs.set(AVX2)
   1144 		}
   1145 		// CPUID.(EAX=7, ECX=0).EBX
   1146 		if (ebx & 0x00000008) != 0 {
   1147 			fs.set(BMI1)
   1148 			fs.setIf((ebx&0x00000100) != 0, BMI2)
   1149 		}
   1150 		fs.setIf(ebx&(1<<2) != 0, SGX)
   1151 		fs.setIf(ebx&(1<<4) != 0, HLE)
   1152 		fs.setIf(ebx&(1<<9) != 0, ERMS)
   1153 		fs.setIf(ebx&(1<<11) != 0, RTM)
   1154 		fs.setIf(ebx&(1<<14) != 0, MPX)
   1155 		fs.setIf(ebx&(1<<18) != 0, RDSEED)
   1156 		fs.setIf(ebx&(1<<19) != 0, ADX)
   1157 		fs.setIf(ebx&(1<<29) != 0, SHA)
   1158 
   1159 		// CPUID.(EAX=7, ECX=0).ECX
   1160 		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
   1161 		fs.setIf(ecx&(1<<7) != 0, CETSS)
   1162 		fs.setIf(ecx&(1<<8) != 0, GFNI)
   1163 		fs.setIf(ecx&(1<<9) != 0, VAES)
   1164 		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
   1165 		fs.setIf(ecx&(1<<13) != 0, TME)
   1166 		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
   1167 		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
   1168 		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
   1169 		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
   1170 		fs.setIf(ecx&(1<<30) != 0, SGXLC)
   1171 
   1172 		// CPUID.(EAX=7, ECX=0).EDX
   1173 		fs.setIf(edx&(1<<4) != 0, FSRM)
   1174 		fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
   1175 		fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
   1176 		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
   1177 		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
   1178 		fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
   1179 		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
   1180 		fs.setIf(edx&(1<<18) != 0, PCONFIG)
   1181 		fs.setIf(edx&(1<<20) != 0, CETIBT)
   1182 		fs.setIf(edx&(1<<26) != 0, IBPB)
   1183 		fs.setIf(edx&(1<<27) != 0, STIBP)
   1184 		fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
   1185 		fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
   1186 		fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
   1187 		fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
   1188 
   1189 		// CPUID.(EAX=7, ECX=1).EDX
   1190 		fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
   1191 		fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
   1192 		fs.setIf(edx&(1<<14) != 0, PREFETCHI)
   1193 
   1194 		// CPUID.(EAX=7, ECX=1).EAX
   1195 		eax1, _, _, _ := cpuidex(7, 1)
   1196 		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
   1197 		fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
   1198 		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
   1199 		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
   1200 		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
   1201 		fs.setIf(eax1&(1<<22) != 0, HRESET)
   1202 		fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
   1203 		fs.setIf(eax1&(1<<26) != 0, LAM)
   1204 
   1205 		// Only detect AVX-512 features if XGETBV is supported
   1206 		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
   1207 			// Check for OS support
   1208 			eax, _ := xgetbv(0)
   1209 
   1210 			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
   1211 			// ZMM16-ZMM31 state are enabled by OS)
   1212 			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
   1213 			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
   1214 			if runtime.GOOS == "darwin" {
   1215 				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
   1216 			}
   1217 			if hasAVX512 {
   1218 				fs.setIf(ebx&(1<<16) != 0, AVX512F)
   1219 				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
   1220 				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
   1221 				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
   1222 				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
   1223 				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
   1224 				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
   1225 				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
   1226 				// ecx
   1227 				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
   1228 				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
   1229 				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
   1230 				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
   1231 				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
   1232 				// edx
   1233 				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
   1234 				fs.setIf(edx&(1<<22) != 0, AMXBF16)
   1235 				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
   1236 				fs.setIf(edx&(1<<24) != 0, AMXTILE)
   1237 				fs.setIf(edx&(1<<25) != 0, AMXINT8)
   1238 				// eax1 = CPUID.(EAX=7, ECX=1).EAX
   1239 				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
   1240 				fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
   1241 				fs.setIf(eax1&(1<<21) != 0, AMXFP16)
   1242 				fs.setIf(eax1&(1<<27) != 0, MSRLIST)
   1243 			}
   1244 		}
   1245 
   1246 		// CPUID.(EAX=7, ECX=2)
   1247 		_, _, _, edx = cpuidex(7, 2)
   1248 		fs.setIf(edx&(1<<0) != 0, PSFD)
   1249 		fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
   1250 		fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
   1251 		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
   1252 		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
   1253 
   1254 	}
   1255 
   1256 	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
   1257 	// EAX
   1258 	// Bit 00: XSAVEOPT is available.
   1259 	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
   1260 	// Bit 02: Supports XGETBV with ECX = 1 if set.
   1261 	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
   1262 	// Bits 31 - 04: Reserved.
   1263 	// EBX
   1264 	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
   1265 	// ECX
   1266 	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
   1267 	// EDX?
   1268 	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
   1269 	if mfi >= 0xd {
   1270 		if fs.inSet(XSAVE) {
   1271 			eax, _, _, _ := cpuidex(0xd, 1)
   1272 			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
   1273 			fs.setIf(eax&(1<<1) != 0, XSAVEC)
   1274 			fs.setIf(eax&(1<<2) != 0, XGETBV1)
   1275 			fs.setIf(eax&(1<<3) != 0, XSAVES)
   1276 		}
   1277 	}
   1278 	if maxExtendedFunction() >= 0x80000001 {
   1279 		_, _, c, d := cpuid(0x80000001)
   1280 		if (c & (1 << 5)) != 0 {
   1281 			fs.set(LZCNT)
   1282 			fs.set(POPCNT)
   1283 		}
   1284 		// ECX
   1285 		fs.setIf((c&(1<<0)) != 0, LAHF)
   1286 		fs.setIf((c&(1<<2)) != 0, SVM)
   1287 		fs.setIf((c&(1<<6)) != 0, SSE4A)
   1288 		fs.setIf((c&(1<<10)) != 0, IBS)
   1289 		fs.setIf((c&(1<<22)) != 0, TOPEXT)
   1290 
   1291 		// EDX
   1292 		fs.setIf(d&(1<<11) != 0, SYSCALL)
   1293 		fs.setIf(d&(1<<20) != 0, NX)
   1294 		fs.setIf(d&(1<<22) != 0, MMXEXT)
   1295 		fs.setIf(d&(1<<23) != 0, MMX)
   1296 		fs.setIf(d&(1<<24) != 0, FXSR)
   1297 		fs.setIf(d&(1<<25) != 0, FXSROPT)
   1298 		fs.setIf(d&(1<<27) != 0, RDTSCP)
   1299 		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
   1300 		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
   1301 
   1302 		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
   1303 		 * used unless the OS has AVX support. */
   1304 		if fs.inSet(AVX) {
   1305 			fs.setIf((c&(1<<11)) != 0, XOP)
   1306 			fs.setIf((c&(1<<16)) != 0, FMA4)
   1307 		}
   1308 
   1309 	}
   1310 	if maxExtendedFunction() >= 0x80000007 {
   1311 		_, b, _, d := cpuid(0x80000007)
   1312 		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
   1313 		fs.setIf((b&(1<<1)) != 0, SUCCOR)
   1314 		fs.setIf((b&(1<<2)) != 0, HWA)
   1315 		fs.setIf((d&(1<<9)) != 0, CPBOOST)
   1316 	}
   1317 
   1318 	if maxExtendedFunction() >= 0x80000008 {
   1319 		_, b, _, _ := cpuid(0x80000008)
   1320 		fs.setIf(b&(1<<28) != 0, PSFD)
   1321 		fs.setIf(b&(1<<27) != 0, CPPC)
   1322 		fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
   1323 		fs.setIf(b&(1<<23) != 0, PPIN)
   1324 		fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
   1325 		fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
   1326 		fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
   1327 		fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
   1328 		fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
   1329 		fs.setIf(b&(1<<15) != 0, STIBP)
   1330 		fs.setIf(b&(1<<14) != 0, IBRS)
   1331 		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
   1332 		fs.setIf(b&(1<<12) != 0, IBPB)
   1333 		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
   1334 		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
   1335 		fs.setIf((b&(1<<4)) != 0, RDPRU)
   1336 		fs.setIf((b&(1<<3)) != 0, INVLPGB)
   1337 		fs.setIf((b&(1<<1)) != 0, MSRIRC)
   1338 		fs.setIf((b&(1<<0)) != 0, CLZERO)
   1339 	}
   1340 
   1341 	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
   1342 		_, _, _, edx := cpuid(0x8000000A)
   1343 		fs.setIf((edx>>0)&1 == 1, SVMNP)
   1344 		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
   1345 		fs.setIf((edx>>2)&1 == 1, SVML)
   1346 		fs.setIf((edx>>3)&1 == 1, NRIPS)
   1347 		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
   1348 		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
   1349 		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
   1350 		fs.setIf((edx>>7)&1 == 1, SVMDA)
   1351 		fs.setIf((edx>>10)&1 == 1, SVMPF)
   1352 		fs.setIf((edx>>12)&1 == 1, SVMPFT)
   1353 	}
   1354 
   1355 	if maxExtendedFunction() >= 0x8000001a {
   1356 		eax, _, _, _ := cpuid(0x8000001a)
   1357 		fs.setIf((eax>>0)&1 == 1, FP128)
   1358 		fs.setIf((eax>>1)&1 == 1, MOVU)
   1359 		fs.setIf((eax>>2)&1 == 1, FP256)
   1360 	}
   1361 
   1362 	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
   1363 		eax, _, _, _ := cpuid(0x8000001b)
   1364 		fs.setIf((eax>>0)&1 == 1, IBSFFV)
   1365 		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
   1366 		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
   1367 		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
   1368 		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
   1369 		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
   1370 		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
   1371 		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
   1372 		fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
   1373 		fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
   1374 		fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
   1375 		fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
   1376 	}
   1377 
   1378 	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
   1379 		a, _, _, _ := cpuid(0x8000001f)
   1380 		fs.setIf((a>>0)&1 == 1, SME)
   1381 		fs.setIf((a>>1)&1 == 1, SEV)
   1382 		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
   1383 		fs.setIf((a>>3)&1 == 1, SEV_ES)
   1384 		fs.setIf((a>>4)&1 == 1, SEV_SNP)
   1385 		fs.setIf((a>>5)&1 == 1, VMPL)
   1386 		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
   1387 		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
   1388 		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
   1389 		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
   1390 		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
   1391 		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
   1392 		fs.setIf((a>>16)&1 == 1, VTE)
   1393 		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
   1394 	}
   1395 
   1396 	return fs
   1397 }
   1398 
   1399 func valAsString(values ...uint32) []byte {
   1400 	r := make([]byte, 4*len(values))
   1401 	for i, v := range values {
   1402 		dst := r[i*4:]
   1403 		dst[0] = byte(v & 0xff)
   1404 		dst[1] = byte((v >> 8) & 0xff)
   1405 		dst[2] = byte((v >> 16) & 0xff)
   1406 		dst[3] = byte((v >> 24) & 0xff)
   1407 		switch {
   1408 		case dst[0] == 0:
   1409 			return r[:i*4]
   1410 		case dst[1] == 0:
   1411 			return r[:i*4+1]
   1412 		case dst[2] == 0:
   1413 			return r[:i*4+2]
   1414 		case dst[3] == 0:
   1415 			return r[:i*4+3]
   1416 		}
   1417 	}
   1418 	return r
   1419 }