cpuid.go (48515B)
1 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3 // Package cpuid provides information about the CPU running the current program. 4 // 5 // CPU features are detected on startup, and kept for fast access through the life of the application. 6 // Currently x86 / x64 (AMD64) as well as arm64 is supported. 7 // 8 // You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9 // 10 // Package home: https://github.com/klauspost/cpuid 11 package cpuid 12 13 import ( 14 "flag" 15 "fmt" 16 "math" 17 "math/bits" 18 "os" 19 "runtime" 20 "strings" 21 ) 22 23 // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf 24 // and Processor Programming Reference (PPR) 25 26 // Vendor is a representation of a CPU vendor. 27 type Vendor int 28 29 const ( 30 VendorUnknown Vendor = iota 31 Intel 32 AMD 33 VIA 34 Transmeta 35 NSC 36 KVM // Kernel-based Virtual Machine 37 MSVM // Microsoft Hyper-V or Windows Virtual PC 38 VMware 39 XenHVM 40 Bhyve 41 Hygon 42 SiS 43 RDC 44 45 Ampere 46 ARM 47 Broadcom 48 Cavium 49 DEC 50 Fujitsu 51 Infineon 52 Motorola 53 NVIDIA 54 AMCC 55 Qualcomm 56 Marvell 57 58 lastVendor 59 ) 60 61 //go:generate stringer -type=FeatureID,Vendor 62 63 // FeatureID is the ID of a specific cpu feature. 64 type FeatureID int 65 66 const ( 67 // Keep index -1 as unknown 68 UNKNOWN = -1 69 70 // Add features 71 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 72 AESNI // Advanced Encryption Standard New Instructions 73 AMD3DNOW // AMD 3DNOW 74 AMD3DNOWEXT // AMD 3DNowExt 75 AMXBF16 // Tile computational operations on BFLOAT16 numbers 76 AMXFP16 // Tile computational operations on FP16 numbers 77 AMXINT8 // Tile computational operations on 8-bit integers 78 AMXTILE // Tile architecture 79 AVX // AVX functions 80 AVX2 // AVX2 functions 81 AVX512BF16 // AVX-512 BFLOAT16 Instructions 82 AVX512BITALG // AVX-512 Bit Algorithms 83 AVX512BW // AVX-512 Byte and Word Instructions 84 AVX512CD // AVX-512 Conflict Detection Instructions 85 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 86 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 87 AVX512F // AVX-512 Foundation 88 AVX512FP16 // AVX-512 FP16 Instructions 89 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 90 AVX512PF // AVX-512 Prefetch Instructions 91 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 92 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 93 AVX512VL // AVX-512 Vector Length Extensions 94 AVX512VNNI // AVX-512 Vector Neural Network Instructions 95 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q 96 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword 97 AVXIFMA // AVX-IFMA instructions 98 AVXNECONVERT // AVX-NE-CONVERT instructions 99 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one 100 AVXVNNI // AVX (VEX encoded) VNNI neural network instructions 101 AVXVNNIINT8 // AVX-VNNI-INT8 instructions 102 BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 103 BMI1 // Bit Manipulation Instruction Set 1 104 BMI2 // Bit Manipulation Instruction Set 2 105 CETIBT // Intel CET Indirect Branch Tracking 106 CETSS // Intel CET Shadow Stack 107 CLDEMOTE // Cache Line Demote 108 CLMUL // Carry-less Multiplication 109 CLZERO // CLZERO instruction supported 110 CMOV // i686 CMOV 111 CMPCCXADD // CMPCCXADD instructions 112 CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB 113 CMPXCHG8 // CMPXCHG8 instruction 114 CPBOOST // Core Performance Boost 115 CPPC // AMD: Collaborative Processor Performance Control 116 CX16 // CMPXCHG16B Instruction 117 EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ 118 ENQCMD // Enqueue Command 119 ERMS // Enhanced REP MOVSB/STOSB 120 F16C // Half-precision floating-point conversion 121 FLUSH_L1D // Flush L1D cache 122 FMA3 // Intel FMA 3. Does not imply AVX. 123 FMA4 // Bulldozer FMA4 functions 124 FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide 125 FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide 126 FSRM // Fast Short Rep Mov 127 FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 128 FXSROPT // FXSAVE/FXRSTOR optimizations 129 GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. 130 HLE // Hardware Lock Elision 131 HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR 132 HTT // Hyperthreading (enabled) 133 HWA // Hardware assert supported. Indicates support for MSRC001_10 134 HYBRID_CPU // This part has CPUs of more than one type. 135 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors 136 IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) 137 IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR 138 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 139 IBRS // AMD: Indirect Branch Restricted Speculation 140 IBRS_PREFERRED // AMD: IBRS is preferred over software solution 141 IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection 142 IBS // Instruction Based Sampling (AMD) 143 IBSBRNTRGT // Instruction Based Sampling Feature (AMD) 144 IBSFETCHSAM // Instruction Based Sampling Feature (AMD) 145 IBSFFV // Instruction Based Sampling Feature (AMD) 146 IBSOPCNT // Instruction Based Sampling Feature (AMD) 147 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) 148 IBSOPSAM // Instruction Based Sampling Feature (AMD) 149 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) 150 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) 151 IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported 152 IBS_OPDATA4 // AMD: IBS op data 4 MSR supported 153 IBS_OPFUSE // AMD: Indicates support for IbsOpFuse 154 IBS_PREVENTHOST // Disallowing IBS use by the host supported 155 IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 156 IDPRED_CTRL // IPRED_DIS 157 INT_WBINVD // WBINVD/WBNOINVD are interruptible. 158 INVLPGB // NVLPGB and TLBSYNC instruction supported 159 LAHF // LAHF/SAHF in long mode 160 LAM // If set, CPU supports Linear Address Masking 161 LBRVIRT // LBR virtualization 162 LZCNT // LZCNT instruction 163 MCAOVERFLOW // MCA overflow recovery support. 164 MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. 165 MCOMMIT // MCOMMIT instruction supported 166 MD_CLEAR // VERW clears CPU buffers 167 MMX // standard MMX 168 MMXEXT // SSE integer functions or AMD MMX ext 169 MOVBE // MOVBE instruction (big-endian) 170 MOVDIR64B // Move 64 Bytes as Direct Store 171 MOVDIRI // Move Doubleword as Direct Store 172 MOVSB_ZL // Fast Zero-Length MOVSB 173 MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD 174 MPX // Intel MPX (Memory Protection Extensions) 175 MSRIRC // Instruction Retired Counter MSR available 176 MSRLIST // Read/Write List of Model Specific Registers 177 MSR_PAGEFLUSH // Page Flush MSR available 178 NRIPS // Indicates support for NRIP save on VMEXIT 179 NX // NX (No-Execute) bit 180 OSXSAVE // XSAVE enabled by OS 181 PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption 182 POPCNT // POPCNT instruction 183 PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled 184 PREFETCHI // PREFETCHIT0/1 instructions 185 PSFD // Predictive Store Forward Disable 186 RDPRU // RDPRU instruction supported 187 RDRAND // RDRAND instruction is available 188 RDSEED // RDSEED instruction is available 189 RDTSCP // RDTSCP Instruction 190 RRSBA_CTRL // Restricted RSB Alternate 191 RTM // Restricted Transactional Memory 192 RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. 193 SERIALIZE // Serialize Instruction Execution 194 SEV // AMD Secure Encrypted Virtualization supported 195 SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host 196 SEV_ALTERNATIVE // AMD SEV Alternate Injection supported 197 SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests 198 SEV_ES // AMD SEV Encrypted State supported 199 SEV_RESTRICTED // AMD SEV Restricted Injection supported 200 SEV_SNP // AMD SEV Secure Nested Paging supported 201 SGX // Software Guard Extensions 202 SGXLC // Software Guard Extensions Launch Control 203 SHA // Intel SHA Extensions 204 SME // AMD Secure Memory Encryption supported 205 SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced 206 SPEC_CTRL_SSBD // Speculative Store Bypass Disable 207 SRBDS_CTRL // SRBDS mitigation MSR available 208 SSE // SSE functions 209 SSE2 // P4 SSE functions 210 SSE3 // Prescott SSE3 functions 211 SSE4 // Penryn SSE4.1 functions 212 SSE42 // Nehalem SSE4.2 functions 213 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 214 SSSE3 // Conroe SSSE3 functions 215 STIBP // Single Thread Indirect Branch Predictors 216 STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On 217 STOSB_SHORT // Fast short STOSB 218 SUCCOR // Software uncorrectable error containment and recovery capability. 219 SVM // AMD Secure Virtual Machine 220 SVMDA // Indicates support for the SVM decode assists. 221 SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control 222 SVML // AMD SVM lock. Indicates support for SVM-Lock. 223 SVMNP // AMD SVM nested paging 224 SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter 225 SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold 226 SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. 227 SYSEE // SYSENTER and SYSEXIT instructions 228 TBM // AMD Trailing Bit Manipulation 229 TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations 230 TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. 231 TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. 232 TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 233 TSXLDTRK // Intel TSX Suspend Load Address Tracking 234 VAES // Vector AES. AVX(512) versions requires additional checks. 235 VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. 236 VMPL // AMD VM Permission Levels supported 237 VMSA_REGPROT // AMD VMSA Register Protection supported 238 VMX // Virtual Machine Extensions 239 VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. 240 VTE // AMD Virtual Transparent Encryption supported 241 WAITPKG // TPAUSE, UMONITOR, UMWAIT 242 WBNOINVD // Write Back and Do Not Invalidate Cache 243 WRMSRNS // Non-Serializing Write to Model Specific Register 244 X87 // FPU 245 XGETBV1 // Supports XGETBV with ECX = 1 246 XOP // Bulldozer XOP functions 247 XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV 248 XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. 249 XSAVEOPT // XSAVEOPT available 250 XSAVES // Supports XSAVES/XRSTORS and IA32_XSS 251 252 // ARM features: 253 AESARM // AES instructions 254 ARMCPUID // Some CPU ID registers readable at user-level 255 ASIMD // Advanced SIMD 256 ASIMDDP // SIMD Dot Product 257 ASIMDHP // Advanced SIMD half-precision floating point 258 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) 259 ATOMICS // Large System Extensions (LSE) 260 CRC32 // CRC32/CRC32C instructions 261 DCPOP // Data cache clean to Point of Persistence (DC CVAP) 262 EVTSTRM // Generic timer 263 FCMA // Floatin point complex number addition and multiplication 264 FP // Single-precision and double-precision floating point 265 FPHP // Half-precision floating point 266 GPA // Generic Pointer Authentication 267 JSCVT // Javascript-style double->int convert (FJCVTZS) 268 LRCPC // Weaker release consistency (LDAPR, etc) 269 PMULL // Polynomial Multiply instructions (PMULL/PMULL2) 270 SHA1 // SHA-1 instructions (SHA1C, etc) 271 SHA2 // SHA-2 instructions (SHA256H, etc) 272 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) 273 SHA512 // SHA512 instructions 274 SM3 // SM3 instructions 275 SM4 // SM4 instructions 276 SVE // Scalable Vector Extension 277 // Keep it last. It automatically defines the size of []flagSet 278 lastID 279 280 firstID FeatureID = UNKNOWN + 1 281 ) 282 283 // CPUInfo contains information about the detected system CPU. 284 type CPUInfo struct { 285 BrandName string // Brand name reported by the CPU 286 VendorID Vendor // Comparable CPU vendor ID 287 VendorString string // Raw vendor string. 288 featureSet flagSet // Features of the CPU 289 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 290 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 291 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 292 Family int // CPU family number 293 Model int // CPU model number 294 Stepping int // CPU stepping info 295 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 296 Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. 297 BoostFreq int64 // Max clock speed, if known, 0 otherwise 298 Cache struct { 299 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 300 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 301 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 302 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected 303 } 304 SGX SGXSupport 305 maxFunc uint32 306 maxExFunc uint32 307 } 308 309 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 310 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 311 var xgetbv func(index uint32) (eax, edx uint32) 312 var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 313 var darwinHasAVX512 = func() bool { return false } 314 315 // CPU contains information about the CPU as detected on startup, 316 // or when Detect last was called. 317 // 318 // Use this as the primary entry point to you data. 319 var CPU CPUInfo 320 321 func init() { 322 initCPU() 323 Detect() 324 } 325 326 // Detect will re-detect current CPU info. 327 // This will replace the content of the exported CPU variable. 328 // 329 // Unless you expect the CPU to change while you are running your program 330 // you should not need to call this function. 331 // If you call this, you must ensure that no other goroutine is accessing the 332 // exported CPU variable. 333 func Detect() { 334 // Set defaults 335 CPU.ThreadsPerCore = 1 336 CPU.Cache.L1I = -1 337 CPU.Cache.L1D = -1 338 CPU.Cache.L2 = -1 339 CPU.Cache.L3 = -1 340 safe := true 341 if detectArmFlag != nil { 342 safe = !*detectArmFlag 343 } 344 addInfo(&CPU, safe) 345 if displayFeats != nil && *displayFeats { 346 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) 347 // Exit with non-zero so tests will print value. 348 os.Exit(1) 349 } 350 if disableFlag != nil { 351 s := strings.Split(*disableFlag, ",") 352 for _, feat := range s { 353 feat := ParseFeature(strings.TrimSpace(feat)) 354 if feat != UNKNOWN { 355 CPU.featureSet.unset(feat) 356 } 357 } 358 } 359 } 360 361 // DetectARM will detect ARM64 features. 362 // This is NOT done automatically since it can potentially crash 363 // if the OS does not handle the command. 364 // If in the future this can be done safely this function may not 365 // do anything. 366 func DetectARM() { 367 addInfo(&CPU, false) 368 } 369 370 var detectArmFlag *bool 371 var displayFeats *bool 372 var disableFlag *string 373 374 // Flags will enable flags. 375 // This must be called *before* flag.Parse AND 376 // Detect must be called after the flags have been parsed. 377 // Note that this means that any detection used in init() functions 378 // will not contain these flags. 379 func Flags() { 380 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") 381 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") 382 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") 383 } 384 385 // Supports returns whether the CPU supports all of the requested features. 386 func (c CPUInfo) Supports(ids ...FeatureID) bool { 387 for _, id := range ids { 388 if !c.featureSet.inSet(id) { 389 return false 390 } 391 } 392 return true 393 } 394 395 // Has allows for checking a single feature. 396 // Should be inlined by the compiler. 397 func (c *CPUInfo) Has(id FeatureID) bool { 398 return c.featureSet.inSet(id) 399 } 400 401 // AnyOf returns whether the CPU supports one or more of the requested features. 402 func (c CPUInfo) AnyOf(ids ...FeatureID) bool { 403 for _, id := range ids { 404 if c.featureSet.inSet(id) { 405 return true 406 } 407 } 408 return false 409 } 410 411 // Features contains several features combined for a fast check using 412 // CpuInfo.HasAll 413 type Features *flagSet 414 415 // CombineFeatures allows to combine several features for a close to constant time lookup. 416 func CombineFeatures(ids ...FeatureID) Features { 417 var v flagSet 418 for _, id := range ids { 419 v.set(id) 420 } 421 return &v 422 } 423 424 func (c *CPUInfo) HasAll(f Features) bool { 425 return c.featureSet.hasSetP(f) 426 } 427 428 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 429 var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) 430 var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) 431 var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) 432 var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) 433 var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) 434 435 // X64Level returns the microarchitecture level detected on the CPU. 436 // If features are lacking or non x64 mode, 0 is returned. 437 // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 438 func (c CPUInfo) X64Level() int { 439 if !c.featureSet.hasOneOf(oneOfLevel) { 440 return 0 441 } 442 if c.featureSet.hasSetP(level4Features) { 443 return 4 444 } 445 if c.featureSet.hasSetP(level3Features) { 446 return 3 447 } 448 if c.featureSet.hasSetP(level2Features) { 449 return 2 450 } 451 if c.featureSet.hasSetP(level1Features) { 452 return 1 453 } 454 return 0 455 } 456 457 // Disable will disable one or several features. 458 func (c *CPUInfo) Disable(ids ...FeatureID) bool { 459 for _, id := range ids { 460 c.featureSet.unset(id) 461 } 462 return true 463 } 464 465 // Enable will disable one or several features even if they were undetected. 466 // This is of course not recommended for obvious reasons. 467 func (c *CPUInfo) Enable(ids ...FeatureID) bool { 468 for _, id := range ids { 469 c.featureSet.set(id) 470 } 471 return true 472 } 473 474 // IsVendor returns true if vendor is recognized as Intel 475 func (c CPUInfo) IsVendor(v Vendor) bool { 476 return c.VendorID == v 477 } 478 479 // FeatureSet returns all available features as strings. 480 func (c CPUInfo) FeatureSet() []string { 481 s := make([]string, 0, c.featureSet.nEnabled()) 482 s = append(s, c.featureSet.Strings()...) 483 return s 484 } 485 486 // RTCounter returns the 64-bit time-stamp counter 487 // Uses the RDTSCP instruction. The value 0 is returned 488 // if the CPU does not support the instruction. 489 func (c CPUInfo) RTCounter() uint64 { 490 if !c.Supports(RDTSCP) { 491 return 0 492 } 493 a, _, _, d := rdtscpAsm() 494 return uint64(a) | (uint64(d) << 32) 495 } 496 497 // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 498 // This variable is OS dependent, but on Linux contains information 499 // about the current cpu/core the code is running on. 500 // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 501 func (c CPUInfo) Ia32TscAux() uint32 { 502 if !c.Supports(RDTSCP) { 503 return 0 504 } 505 _, _, ecx, _ := rdtscpAsm() 506 return ecx 507 } 508 509 // LogicalCPU will return the Logical CPU the code is currently executing on. 510 // This is likely to change when the OS re-schedules the running thread 511 // to another CPU. 512 // If the current core cannot be detected, -1 will be returned. 513 func (c CPUInfo) LogicalCPU() int { 514 if c.maxFunc < 1 { 515 return -1 516 } 517 _, ebx, _, _ := cpuid(1) 518 return int(ebx >> 24) 519 } 520 521 // frequencies tries to compute the clock speed of the CPU. If leaf 15 is 522 // supported, use it, otherwise parse the brand string. Yes, really. 523 func (c *CPUInfo) frequencies() { 524 c.Hz, c.BoostFreq = 0, 0 525 mfi := maxFunctionID() 526 if mfi >= 0x15 { 527 eax, ebx, ecx, _ := cpuid(0x15) 528 if eax != 0 && ebx != 0 && ecx != 0 { 529 c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) 530 } 531 } 532 if mfi >= 0x16 { 533 a, b, _, _ := cpuid(0x16) 534 // Base... 535 if a&0xffff > 0 { 536 c.Hz = int64(a&0xffff) * 1_000_000 537 } 538 // Boost... 539 if b&0xffff > 0 { 540 c.BoostFreq = int64(b&0xffff) * 1_000_000 541 } 542 } 543 if c.Hz > 0 { 544 return 545 } 546 547 // computeHz determines the official rated speed of a CPU from its brand 548 // string. This insanity is *actually the official documented way to do 549 // this according to Intel*, prior to leaf 0x15 existing. The official 550 // documentation only shows this working for exactly `x.xx` or `xxxx` 551 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other 552 // sizes. 553 model := c.BrandName 554 hz := strings.LastIndex(model, "Hz") 555 if hz < 3 { 556 return 557 } 558 var multiplier int64 559 switch model[hz-1] { 560 case 'M': 561 multiplier = 1000 * 1000 562 case 'G': 563 multiplier = 1000 * 1000 * 1000 564 case 'T': 565 multiplier = 1000 * 1000 * 1000 * 1000 566 } 567 if multiplier == 0 { 568 return 569 } 570 freq := int64(0) 571 divisor := int64(0) 572 decimalShift := int64(1) 573 var i int 574 for i = hz - 2; i >= 0 && model[i] != ' '; i-- { 575 if model[i] >= '0' && model[i] <= '9' { 576 freq += int64(model[i]-'0') * decimalShift 577 decimalShift *= 10 578 } else if model[i] == '.' { 579 if divisor != 0 { 580 return 581 } 582 divisor = decimalShift 583 } else { 584 return 585 } 586 } 587 // we didn't find a space 588 if i < 0 { 589 return 590 } 591 if divisor != 0 { 592 c.Hz = (freq * multiplier) / divisor 593 return 594 } 595 c.Hz = freq * multiplier 596 } 597 598 // VM Will return true if the cpu id indicates we are in 599 // a virtual machine. 600 func (c CPUInfo) VM() bool { 601 return CPU.featureSet.inSet(HYPERVISOR) 602 } 603 604 // flags contains detected cpu features and characteristics 605 type flags uint64 606 607 // log2(bits_in_uint64) 608 const flagBitsLog2 = 6 609 const flagBits = 1 << flagBitsLog2 610 const flagMask = flagBits - 1 611 612 // flagSet contains detected cpu features and characteristics in an array of flags 613 type flagSet [(lastID + flagMask) / flagBits]flags 614 615 func (s *flagSet) inSet(feat FeatureID) bool { 616 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 617 } 618 619 func (s *flagSet) set(feat FeatureID) { 620 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) 621 } 622 623 // setIf will set a feature if boolean is true. 624 func (s *flagSet) setIf(cond bool, features ...FeatureID) { 625 if cond { 626 for _, offset := range features { 627 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) 628 } 629 } 630 } 631 632 func (s *flagSet) unset(offset FeatureID) { 633 bit := flags(1 << (offset & flagMask)) 634 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit 635 } 636 637 // or with another flagset. 638 func (s *flagSet) or(other flagSet) { 639 for i, v := range other[:] { 640 s[i] |= v 641 } 642 } 643 644 // hasSet returns whether all features are present. 645 func (s *flagSet) hasSet(other flagSet) bool { 646 for i, v := range other[:] { 647 if s[i]&v != v { 648 return false 649 } 650 } 651 return true 652 } 653 654 // hasSet returns whether all features are present. 655 func (s *flagSet) hasSetP(other *flagSet) bool { 656 for i, v := range other[:] { 657 if s[i]&v != v { 658 return false 659 } 660 } 661 return true 662 } 663 664 // hasOneOf returns whether one or more features are present. 665 func (s *flagSet) hasOneOf(other *flagSet) bool { 666 for i, v := range other[:] { 667 if s[i]&v != 0 { 668 return true 669 } 670 } 671 return false 672 } 673 674 // nEnabled will return the number of enabled flags. 675 func (s *flagSet) nEnabled() (n int) { 676 for _, v := range s[:] { 677 n += bits.OnesCount64(uint64(v)) 678 } 679 return n 680 } 681 682 func flagSetWith(feat ...FeatureID) flagSet { 683 var res flagSet 684 for _, f := range feat { 685 res.set(f) 686 } 687 return res 688 } 689 690 // ParseFeature will parse the string and return the ID of the matching feature. 691 // Will return UNKNOWN if not found. 692 func ParseFeature(s string) FeatureID { 693 s = strings.ToUpper(s) 694 for i := firstID; i < lastID; i++ { 695 if i.String() == s { 696 return i 697 } 698 } 699 return UNKNOWN 700 } 701 702 // Strings returns an array of the detected features for FlagsSet. 703 func (s flagSet) Strings() []string { 704 if len(s) == 0 { 705 return []string{""} 706 } 707 r := make([]string, 0) 708 for i := firstID; i < lastID; i++ { 709 if s.inSet(i) { 710 r = append(r, i.String()) 711 } 712 } 713 return r 714 } 715 716 func maxExtendedFunction() uint32 { 717 eax, _, _, _ := cpuid(0x80000000) 718 return eax 719 } 720 721 func maxFunctionID() uint32 { 722 a, _, _, _ := cpuid(0) 723 return a 724 } 725 726 func brandName() string { 727 if maxExtendedFunction() >= 0x80000004 { 728 v := make([]uint32, 0, 48) 729 for i := uint32(0); i < 3; i++ { 730 a, b, c, d := cpuid(0x80000002 + i) 731 v = append(v, a, b, c, d) 732 } 733 return strings.Trim(string(valAsString(v...)), " ") 734 } 735 return "unknown" 736 } 737 738 func threadsPerCore() int { 739 mfi := maxFunctionID() 740 vend, _ := vendorID() 741 742 if mfi < 0x4 || (vend != Intel && vend != AMD) { 743 return 1 744 } 745 746 if mfi < 0xb { 747 if vend != Intel { 748 return 1 749 } 750 _, b, _, d := cpuid(1) 751 if (d & (1 << 28)) != 0 { 752 // v will contain logical core count 753 v := (b >> 16) & 255 754 if v > 1 { 755 a4, _, _, _ := cpuid(4) 756 // physical cores 757 v2 := (a4 >> 26) + 1 758 if v2 > 0 { 759 return int(v) / int(v2) 760 } 761 } 762 } 763 return 1 764 } 765 _, b, _, _ := cpuidex(0xb, 0) 766 if b&0xffff == 0 { 767 if vend == AMD { 768 // Workaround for AMD returning 0, assume 2 if >= Zen 2 769 // It will be more correct than not. 770 fam, _, _ := familyModel() 771 _, _, _, d := cpuid(1) 772 if (d&(1<<28)) != 0 && fam >= 23 { 773 return 2 774 } 775 } 776 return 1 777 } 778 return int(b & 0xffff) 779 } 780 781 func logicalCores() int { 782 mfi := maxFunctionID() 783 v, _ := vendorID() 784 switch v { 785 case Intel: 786 // Use this on old Intel processors 787 if mfi < 0xb { 788 if mfi < 1 { 789 return 0 790 } 791 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 792 // that can be assigned to logical processors in a physical package. 793 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 794 _, ebx, _, _ := cpuid(1) 795 logical := (ebx >> 16) & 0xff 796 return int(logical) 797 } 798 _, b, _, _ := cpuidex(0xb, 1) 799 return int(b & 0xffff) 800 case AMD, Hygon: 801 _, b, _, _ := cpuid(1) 802 return int((b >> 16) & 0xff) 803 default: 804 return 0 805 } 806 } 807 808 func familyModel() (family, model, stepping int) { 809 if maxFunctionID() < 0x1 { 810 return 0, 0, 0 811 } 812 eax, _, _, _ := cpuid(1) 813 // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. 814 family = int((eax >> 8) & 0xf) 815 extFam := family == 0x6 // Intel is 0x6, needs extended model. 816 if family == 0xf { 817 // Add ExtFamily 818 family += int((eax >> 20) & 0xff) 819 extFam = true 820 } 821 // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. 822 model = int((eax >> 4) & 0xf) 823 if extFam { 824 // Add ExtModel 825 model += int((eax >> 12) & 0xf0) 826 } 827 stepping = int(eax & 0xf) 828 return family, model, stepping 829 } 830 831 func physicalCores() int { 832 v, _ := vendorID() 833 switch v { 834 case Intel: 835 return logicalCores() / threadsPerCore() 836 case AMD, Hygon: 837 lc := logicalCores() 838 tpc := threadsPerCore() 839 if lc > 0 && tpc > 0 { 840 return lc / tpc 841 } 842 843 // The following is inaccurate on AMD EPYC 7742 64-Core Processor 844 if maxExtendedFunction() >= 0x80000008 { 845 _, _, c, _ := cpuid(0x80000008) 846 if c&0xff > 0 { 847 return int(c&0xff) + 1 848 } 849 } 850 } 851 return 0 852 } 853 854 // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 855 var vendorMapping = map[string]Vendor{ 856 "AMDisbetter!": AMD, 857 "AuthenticAMD": AMD, 858 "CentaurHauls": VIA, 859 "GenuineIntel": Intel, 860 "TransmetaCPU": Transmeta, 861 "GenuineTMx86": Transmeta, 862 "Geode by NSC": NSC, 863 "VIA VIA VIA ": VIA, 864 "KVMKVMKVMKVM": KVM, 865 "Microsoft Hv": MSVM, 866 "VMwareVMware": VMware, 867 "XenVMMXenVMM": XenHVM, 868 "bhyve bhyve ": Bhyve, 869 "HygonGenuine": Hygon, 870 "Vortex86 SoC": SiS, 871 "SiS SiS SiS ": SiS, 872 "RiseRiseRise": SiS, 873 "Genuine RDC": RDC, 874 } 875 876 func vendorID() (Vendor, string) { 877 _, b, c, d := cpuid(0) 878 v := string(valAsString(b, d, c)) 879 vend, ok := vendorMapping[v] 880 if !ok { 881 return VendorUnknown, v 882 } 883 return vend, v 884 } 885 886 func cacheLine() int { 887 if maxFunctionID() < 0x1 { 888 return 0 889 } 890 891 _, ebx, _, _ := cpuid(1) 892 cache := (ebx & 0xff00) >> 5 // cflush size 893 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 894 _, _, ecx, _ := cpuid(0x80000006) 895 cache = ecx & 0xff // cacheline size 896 } 897 // TODO: Read from Cache and TLB Information 898 return int(cache) 899 } 900 901 func (c *CPUInfo) cacheSize() { 902 c.Cache.L1D = -1 903 c.Cache.L1I = -1 904 c.Cache.L2 = -1 905 c.Cache.L3 = -1 906 vendor, _ := vendorID() 907 switch vendor { 908 case Intel: 909 if maxFunctionID() < 4 { 910 return 911 } 912 c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 913 for i := uint32(0); ; i++ { 914 eax, ebx, ecx, _ := cpuidex(4, i) 915 cacheType := eax & 15 916 if cacheType == 0 { 917 break 918 } 919 cacheLevel := (eax >> 5) & 7 920 coherency := int(ebx&0xfff) + 1 921 partitions := int((ebx>>12)&0x3ff) + 1 922 associativity := int((ebx>>22)&0x3ff) + 1 923 sets := int(ecx) + 1 924 size := associativity * partitions * coherency * sets 925 switch cacheLevel { 926 case 1: 927 if cacheType == 1 { 928 // 1 = Data Cache 929 c.Cache.L1D = size 930 } else if cacheType == 2 { 931 // 2 = Instruction Cache 932 c.Cache.L1I = size 933 } else { 934 if c.Cache.L1D < 0 { 935 c.Cache.L1I = size 936 } 937 if c.Cache.L1I < 0 { 938 c.Cache.L1I = size 939 } 940 } 941 case 2: 942 c.Cache.L2 = size 943 case 3: 944 c.Cache.L3 = size 945 } 946 } 947 case AMD, Hygon: 948 // Untested. 949 if maxExtendedFunction() < 0x80000005 { 950 return 951 } 952 _, _, ecx, edx := cpuid(0x80000005) 953 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 954 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 955 956 if maxExtendedFunction() < 0x80000006 { 957 return 958 } 959 _, _, ecx, _ = cpuid(0x80000006) 960 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 961 962 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties 963 if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { 964 return 965 } 966 967 // Xen Hypervisor is buggy and returns the same entry no matter ECX value. 968 // Hack: When we encounter the same entry 100 times we break. 969 nSame := 0 970 var last uint32 971 for i := uint32(0); i < math.MaxUint32; i++ { 972 eax, ebx, ecx, _ := cpuidex(0x8000001D, i) 973 974 level := (eax >> 5) & 7 975 cacheNumSets := ecx + 1 976 cacheLineSize := 1 + (ebx & 2047) 977 cachePhysPartitions := 1 + ((ebx >> 12) & 511) 978 cacheNumWays := 1 + ((ebx >> 22) & 511) 979 980 typ := eax & 15 981 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) 982 if typ == 0 { 983 return 984 } 985 986 // Check for the same value repeated. 987 comb := eax ^ ebx ^ ecx 988 if comb == last { 989 nSame++ 990 if nSame == 100 { 991 return 992 } 993 } 994 last = comb 995 996 switch level { 997 case 1: 998 switch typ { 999 case 1: 1000 // Data cache 1001 c.Cache.L1D = size 1002 case 2: 1003 // Inst cache 1004 c.Cache.L1I = size 1005 default: 1006 if c.Cache.L1D < 0 { 1007 c.Cache.L1I = size 1008 } 1009 if c.Cache.L1I < 0 { 1010 c.Cache.L1I = size 1011 } 1012 } 1013 case 2: 1014 c.Cache.L2 = size 1015 case 3: 1016 c.Cache.L3 = size 1017 } 1018 } 1019 } 1020 } 1021 1022 type SGXEPCSection struct { 1023 BaseAddress uint64 1024 EPCSize uint64 1025 } 1026 1027 type SGXSupport struct { 1028 Available bool 1029 LaunchControl bool 1030 SGX1Supported bool 1031 SGX2Supported bool 1032 MaxEnclaveSizeNot64 int64 1033 MaxEnclaveSize64 int64 1034 EPCSections []SGXEPCSection 1035 } 1036 1037 func hasSGX(available, lc bool) (rval SGXSupport) { 1038 rval.Available = available 1039 1040 if !available { 1041 return 1042 } 1043 1044 rval.LaunchControl = lc 1045 1046 a, _, _, d := cpuidex(0x12, 0) 1047 rval.SGX1Supported = a&0x01 != 0 1048 rval.SGX2Supported = a&0x02 != 0 1049 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 1050 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 1051 rval.EPCSections = make([]SGXEPCSection, 0) 1052 1053 for subleaf := uint32(2); subleaf < 2+8; subleaf++ { 1054 eax, ebx, ecx, edx := cpuidex(0x12, subleaf) 1055 leafType := eax & 0xf 1056 1057 if leafType == 0 { 1058 // Invalid subleaf, stop iterating 1059 break 1060 } else if leafType == 1 { 1061 // EPC Section subleaf 1062 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) 1063 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) 1064 1065 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} 1066 rval.EPCSections = append(rval.EPCSections, section) 1067 } 1068 } 1069 1070 return 1071 } 1072 1073 func support() flagSet { 1074 var fs flagSet 1075 mfi := maxFunctionID() 1076 vend, _ := vendorID() 1077 if mfi < 0x1 { 1078 return fs 1079 } 1080 family, model, _ := familyModel() 1081 1082 _, _, c, d := cpuid(1) 1083 fs.setIf((d&(1<<0)) != 0, X87) 1084 fs.setIf((d&(1<<8)) != 0, CMPXCHG8) 1085 fs.setIf((d&(1<<11)) != 0, SYSEE) 1086 fs.setIf((d&(1<<15)) != 0, CMOV) 1087 fs.setIf((d&(1<<23)) != 0, MMX) 1088 fs.setIf((d&(1<<24)) != 0, FXSR) 1089 fs.setIf((d&(1<<25)) != 0, FXSROPT) 1090 fs.setIf((d&(1<<25)) != 0, SSE) 1091 fs.setIf((d&(1<<26)) != 0, SSE2) 1092 fs.setIf((c&1) != 0, SSE3) 1093 fs.setIf((c&(1<<5)) != 0, VMX) 1094 fs.setIf((c&(1<<9)) != 0, SSSE3) 1095 fs.setIf((c&(1<<19)) != 0, SSE4) 1096 fs.setIf((c&(1<<20)) != 0, SSE42) 1097 fs.setIf((c&(1<<25)) != 0, AESNI) 1098 fs.setIf((c&(1<<1)) != 0, CLMUL) 1099 fs.setIf(c&(1<<22) != 0, MOVBE) 1100 fs.setIf(c&(1<<23) != 0, POPCNT) 1101 fs.setIf(c&(1<<30) != 0, RDRAND) 1102 1103 // This bit has been reserved by Intel & AMD for use by hypervisors, 1104 // and indicates the presence of a hypervisor. 1105 fs.setIf(c&(1<<31) != 0, HYPERVISOR) 1106 fs.setIf(c&(1<<29) != 0, F16C) 1107 fs.setIf(c&(1<<13) != 0, CX16) 1108 1109 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 1110 fs.setIf(threadsPerCore() > 1, HTT) 1111 } 1112 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { 1113 fs.setIf(threadsPerCore() > 1, HTT) 1114 } 1115 fs.setIf(c&1<<26 != 0, XSAVE) 1116 fs.setIf(c&1<<27 != 0, OSXSAVE) 1117 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits 1118 const avxCheck = 1<<26 | 1<<27 | 1<<28 1119 if c&avxCheck == avxCheck { 1120 // Check for OS support 1121 eax, _ := xgetbv(0) 1122 if (eax & 0x6) == 0x6 { 1123 fs.set(AVX) 1124 switch vend { 1125 case Intel: 1126 // Older than Haswell. 1127 fs.setIf(family == 6 && model < 60, AVXSLOW) 1128 case AMD: 1129 // Older than Zen 2 1130 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) 1131 } 1132 } 1133 } 1134 // FMA3 can be used with SSE registers, so no OS support is strictly needed. 1135 // fma3 and OSXSAVE needed. 1136 const fma3Check = 1<<12 | 1<<27 1137 fs.setIf(c&fma3Check == fma3Check, FMA3) 1138 1139 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 1140 if mfi >= 7 { 1141 _, ebx, ecx, edx := cpuidex(7, 0) 1142 if fs.inSet(AVX) && (ebx&0x00000020) != 0 { 1143 fs.set(AVX2) 1144 } 1145 // CPUID.(EAX=7, ECX=0).EBX 1146 if (ebx & 0x00000008) != 0 { 1147 fs.set(BMI1) 1148 fs.setIf((ebx&0x00000100) != 0, BMI2) 1149 } 1150 fs.setIf(ebx&(1<<2) != 0, SGX) 1151 fs.setIf(ebx&(1<<4) != 0, HLE) 1152 fs.setIf(ebx&(1<<9) != 0, ERMS) 1153 fs.setIf(ebx&(1<<11) != 0, RTM) 1154 fs.setIf(ebx&(1<<14) != 0, MPX) 1155 fs.setIf(ebx&(1<<18) != 0, RDSEED) 1156 fs.setIf(ebx&(1<<19) != 0, ADX) 1157 fs.setIf(ebx&(1<<29) != 0, SHA) 1158 1159 // CPUID.(EAX=7, ECX=0).ECX 1160 fs.setIf(ecx&(1<<5) != 0, WAITPKG) 1161 fs.setIf(ecx&(1<<7) != 0, CETSS) 1162 fs.setIf(ecx&(1<<8) != 0, GFNI) 1163 fs.setIf(ecx&(1<<9) != 0, VAES) 1164 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) 1165 fs.setIf(ecx&(1<<13) != 0, TME) 1166 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) 1167 fs.setIf(ecx&(1<<27) != 0, MOVDIRI) 1168 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) 1169 fs.setIf(ecx&(1<<29) != 0, ENQCMD) 1170 fs.setIf(ecx&(1<<30) != 0, SGXLC) 1171 1172 // CPUID.(EAX=7, ECX=0).EDX 1173 fs.setIf(edx&(1<<4) != 0, FSRM) 1174 fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) 1175 fs.setIf(edx&(1<<10) != 0, MD_CLEAR) 1176 fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) 1177 fs.setIf(edx&(1<<14) != 0, SERIALIZE) 1178 fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) 1179 fs.setIf(edx&(1<<16) != 0, TSXLDTRK) 1180 fs.setIf(edx&(1<<18) != 0, PCONFIG) 1181 fs.setIf(edx&(1<<20) != 0, CETIBT) 1182 fs.setIf(edx&(1<<26) != 0, IBPB) 1183 fs.setIf(edx&(1<<27) != 0, STIBP) 1184 fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) 1185 fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) 1186 fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) 1187 fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) 1188 1189 // CPUID.(EAX=7, ECX=1).EDX 1190 fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8) 1191 fs.setIf(edx&(1<<5) != 0, AVXNECONVERT) 1192 fs.setIf(edx&(1<<14) != 0, PREFETCHI) 1193 1194 // CPUID.(EAX=7, ECX=1).EAX 1195 eax1, _, _, _ := cpuidex(7, 1) 1196 fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) 1197 fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) 1198 fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) 1199 fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) 1200 fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) 1201 fs.setIf(eax1&(1<<22) != 0, HRESET) 1202 fs.setIf(eax1&(1<<23) != 0, AVXIFMA) 1203 fs.setIf(eax1&(1<<26) != 0, LAM) 1204 1205 // Only detect AVX-512 features if XGETBV is supported 1206 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 1207 // Check for OS support 1208 eax, _ := xgetbv(0) 1209 1210 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 1211 // ZMM16-ZMM31 state are enabled by OS) 1212 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 1213 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 1214 if runtime.GOOS == "darwin" { 1215 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() 1216 } 1217 if hasAVX512 { 1218 fs.setIf(ebx&(1<<16) != 0, AVX512F) 1219 fs.setIf(ebx&(1<<17) != 0, AVX512DQ) 1220 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) 1221 fs.setIf(ebx&(1<<26) != 0, AVX512PF) 1222 fs.setIf(ebx&(1<<27) != 0, AVX512ER) 1223 fs.setIf(ebx&(1<<28) != 0, AVX512CD) 1224 fs.setIf(ebx&(1<<30) != 0, AVX512BW) 1225 fs.setIf(ebx&(1<<31) != 0, AVX512VL) 1226 // ecx 1227 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) 1228 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) 1229 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) 1230 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) 1231 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) 1232 // edx 1233 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) 1234 fs.setIf(edx&(1<<22) != 0, AMXBF16) 1235 fs.setIf(edx&(1<<23) != 0, AVX512FP16) 1236 fs.setIf(edx&(1<<24) != 0, AMXTILE) 1237 fs.setIf(edx&(1<<25) != 0, AMXINT8) 1238 // eax1 = CPUID.(EAX=7, ECX=1).EAX 1239 fs.setIf(eax1&(1<<5) != 0, AVX512BF16) 1240 fs.setIf(eax1&(1<<19) != 0, WRMSRNS) 1241 fs.setIf(eax1&(1<<21) != 0, AMXFP16) 1242 fs.setIf(eax1&(1<<27) != 0, MSRLIST) 1243 } 1244 } 1245 1246 // CPUID.(EAX=7, ECX=2) 1247 _, _, _, edx = cpuidex(7, 2) 1248 fs.setIf(edx&(1<<0) != 0, PSFD) 1249 fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) 1250 fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) 1251 fs.setIf(edx&(1<<4) != 0, BHI_CTRL) 1252 fs.setIf(edx&(1<<5) != 0, MCDT_NO) 1253 1254 } 1255 1256 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) 1257 // EAX 1258 // Bit 00: XSAVEOPT is available. 1259 // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. 1260 // Bit 02: Supports XGETBV with ECX = 1 if set. 1261 // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. 1262 // Bits 31 - 04: Reserved. 1263 // EBX 1264 // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. 1265 // ECX 1266 // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. 1267 // EDX? 1268 // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. 1269 if mfi >= 0xd { 1270 if fs.inSet(XSAVE) { 1271 eax, _, _, _ := cpuidex(0xd, 1) 1272 fs.setIf(eax&(1<<0) != 0, XSAVEOPT) 1273 fs.setIf(eax&(1<<1) != 0, XSAVEC) 1274 fs.setIf(eax&(1<<2) != 0, XGETBV1) 1275 fs.setIf(eax&(1<<3) != 0, XSAVES) 1276 } 1277 } 1278 if maxExtendedFunction() >= 0x80000001 { 1279 _, _, c, d := cpuid(0x80000001) 1280 if (c & (1 << 5)) != 0 { 1281 fs.set(LZCNT) 1282 fs.set(POPCNT) 1283 } 1284 // ECX 1285 fs.setIf((c&(1<<0)) != 0, LAHF) 1286 fs.setIf((c&(1<<2)) != 0, SVM) 1287 fs.setIf((c&(1<<6)) != 0, SSE4A) 1288 fs.setIf((c&(1<<10)) != 0, IBS) 1289 fs.setIf((c&(1<<22)) != 0, TOPEXT) 1290 1291 // EDX 1292 fs.setIf(d&(1<<11) != 0, SYSCALL) 1293 fs.setIf(d&(1<<20) != 0, NX) 1294 fs.setIf(d&(1<<22) != 0, MMXEXT) 1295 fs.setIf(d&(1<<23) != 0, MMX) 1296 fs.setIf(d&(1<<24) != 0, FXSR) 1297 fs.setIf(d&(1<<25) != 0, FXSROPT) 1298 fs.setIf(d&(1<<27) != 0, RDTSCP) 1299 fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) 1300 fs.setIf(d&(1<<31) != 0, AMD3DNOW) 1301 1302 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 1303 * used unless the OS has AVX support. */ 1304 if fs.inSet(AVX) { 1305 fs.setIf((c&(1<<11)) != 0, XOP) 1306 fs.setIf((c&(1<<16)) != 0, FMA4) 1307 } 1308 1309 } 1310 if maxExtendedFunction() >= 0x80000007 { 1311 _, b, _, d := cpuid(0x80000007) 1312 fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) 1313 fs.setIf((b&(1<<1)) != 0, SUCCOR) 1314 fs.setIf((b&(1<<2)) != 0, HWA) 1315 fs.setIf((d&(1<<9)) != 0, CPBOOST) 1316 } 1317 1318 if maxExtendedFunction() >= 0x80000008 { 1319 _, b, _, _ := cpuid(0x80000008) 1320 fs.setIf(b&(1<<28) != 0, PSFD) 1321 fs.setIf(b&(1<<27) != 0, CPPC) 1322 fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) 1323 fs.setIf(b&(1<<23) != 0, PPIN) 1324 fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) 1325 fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) 1326 fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) 1327 fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) 1328 fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) 1329 fs.setIf(b&(1<<15) != 0, STIBP) 1330 fs.setIf(b&(1<<14) != 0, IBRS) 1331 fs.setIf((b&(1<<13)) != 0, INT_WBINVD) 1332 fs.setIf(b&(1<<12) != 0, IBPB) 1333 fs.setIf((b&(1<<9)) != 0, WBNOINVD) 1334 fs.setIf((b&(1<<8)) != 0, MCOMMIT) 1335 fs.setIf((b&(1<<4)) != 0, RDPRU) 1336 fs.setIf((b&(1<<3)) != 0, INVLPGB) 1337 fs.setIf((b&(1<<1)) != 0, MSRIRC) 1338 fs.setIf((b&(1<<0)) != 0, CLZERO) 1339 } 1340 1341 if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { 1342 _, _, _, edx := cpuid(0x8000000A) 1343 fs.setIf((edx>>0)&1 == 1, SVMNP) 1344 fs.setIf((edx>>1)&1 == 1, LBRVIRT) 1345 fs.setIf((edx>>2)&1 == 1, SVML) 1346 fs.setIf((edx>>3)&1 == 1, NRIPS) 1347 fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) 1348 fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) 1349 fs.setIf((edx>>6)&1 == 1, SVMFBASID) 1350 fs.setIf((edx>>7)&1 == 1, SVMDA) 1351 fs.setIf((edx>>10)&1 == 1, SVMPF) 1352 fs.setIf((edx>>12)&1 == 1, SVMPFT) 1353 } 1354 1355 if maxExtendedFunction() >= 0x8000001a { 1356 eax, _, _, _ := cpuid(0x8000001a) 1357 fs.setIf((eax>>0)&1 == 1, FP128) 1358 fs.setIf((eax>>1)&1 == 1, MOVU) 1359 fs.setIf((eax>>2)&1 == 1, FP256) 1360 } 1361 1362 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { 1363 eax, _, _, _ := cpuid(0x8000001b) 1364 fs.setIf((eax>>0)&1 == 1, IBSFFV) 1365 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) 1366 fs.setIf((eax>>2)&1 == 1, IBSOPSAM) 1367 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) 1368 fs.setIf((eax>>4)&1 == 1, IBSOPCNT) 1369 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) 1370 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) 1371 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) 1372 fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) 1373 fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) 1374 fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. 1375 fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) 1376 } 1377 1378 if maxExtendedFunction() >= 0x8000001f && vend == AMD { 1379 a, _, _, _ := cpuid(0x8000001f) 1380 fs.setIf((a>>0)&1 == 1, SME) 1381 fs.setIf((a>>1)&1 == 1, SEV) 1382 fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) 1383 fs.setIf((a>>3)&1 == 1, SEV_ES) 1384 fs.setIf((a>>4)&1 == 1, SEV_SNP) 1385 fs.setIf((a>>5)&1 == 1, VMPL) 1386 fs.setIf((a>>10)&1 == 1, SME_COHERENT) 1387 fs.setIf((a>>11)&1 == 1, SEV_64BIT) 1388 fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) 1389 fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) 1390 fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) 1391 fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) 1392 fs.setIf((a>>16)&1 == 1, VTE) 1393 fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) 1394 } 1395 1396 return fs 1397 } 1398 1399 func valAsString(values ...uint32) []byte { 1400 r := make([]byte, 4*len(values)) 1401 for i, v := range values { 1402 dst := r[i*4:] 1403 dst[0] = byte(v & 0xff) 1404 dst[1] = byte((v >> 8) & 0xff) 1405 dst[2] = byte((v >> 16) & 0xff) 1406 dst[3] = byte((v >> 24) & 0xff) 1407 switch { 1408 case dst[0] == 0: 1409 return r[:i*4] 1410 case dst[1] == 0: 1411 return r[:i*4+1] 1412 case dst[2] == 0: 1413 return r[:i*4+2] 1414 case dst[3] == 0: 1415 return r[:i*4+3] 1416 } 1417 } 1418 return r 1419 }