decode.go (46203B)
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Table-driven decoding of x86 instructions. 6 7 package x86asm 8 9 import ( 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "runtime" 14 ) 15 16 // Set trace to true to cause the decoder to print the PC sequence 17 // of the executed instruction codes. This is typically only useful 18 // when you are running a test of a single input case. 19 const trace = false 20 21 // A decodeOp is a single instruction in the decoder bytecode program. 22 // 23 // The decodeOps correspond to consuming and conditionally branching 24 // on input bytes, consuming additional fields, and then interpreting 25 // consumed data as instruction arguments. The names of the xRead and xArg 26 // operations are taken from the Intel manual conventions, for example 27 // Volume 2, Section 3.1.1, page 487 of 28 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf 29 // 30 // The actual decoding program is generated by ../x86map. 31 // 32 // TODO(rsc): We may be able to merge various of the memory operands 33 // since we don't care about, say, the distinction between m80dec and m80bcd. 34 // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1. 35 36 type decodeOp uint16 37 38 const ( 39 xFail decodeOp = iota // invalid instruction (return) 40 xMatch // completed match 41 xJump // jump to pc 42 43 xCondByte // switch on instruction byte value 44 xCondSlashR // read and switch on instruction /r value 45 xCondPrefix // switch on presence of instruction prefix 46 xCondIs64 // switch on 64-bit processor mode 47 xCondDataSize // switch on operand size 48 xCondAddrSize // switch on address size 49 xCondIsMem // switch on memory vs register argument 50 51 xSetOp // set instruction opcode 52 53 xReadSlashR // read /r 54 xReadIb // read ib 55 xReadIw // read iw 56 xReadId // read id 57 xReadIo // read io 58 xReadCb // read cb 59 xReadCw // read cw 60 xReadCd // read cd 61 xReadCp // read cp 62 xReadCm // read cm 63 64 xArg1 // arg 1 65 xArg3 // arg 3 66 xArgAL // arg AL 67 xArgAX // arg AX 68 xArgCL // arg CL 69 xArgCR0dashCR7 // arg CR0-CR7 70 xArgCS // arg CS 71 xArgDR0dashDR7 // arg DR0-DR7 72 xArgDS // arg DS 73 xArgDX // arg DX 74 xArgEAX // arg EAX 75 xArgEDX // arg EDX 76 xArgES // arg ES 77 xArgFS // arg FS 78 xArgGS // arg GS 79 xArgImm16 // arg imm16 80 xArgImm32 // arg imm32 81 xArgImm64 // arg imm64 82 xArgImm8 // arg imm8 83 xArgImm8u // arg imm8 but record as unsigned 84 xArgImm16u // arg imm8 but record as unsigned 85 xArgM // arg m 86 xArgM128 // arg m128 87 xArgM256 // arg m256 88 xArgM1428byte // arg m14/28byte 89 xArgM16 // arg m16 90 xArgM16and16 // arg m16&16 91 xArgM16and32 // arg m16&32 92 xArgM16and64 // arg m16&64 93 xArgM16colon16 // arg m16:16 94 xArgM16colon32 // arg m16:32 95 xArgM16colon64 // arg m16:64 96 xArgM16int // arg m16int 97 xArgM2byte // arg m2byte 98 xArgM32 // arg m32 99 xArgM32and32 // arg m32&32 100 xArgM32fp // arg m32fp 101 xArgM32int // arg m32int 102 xArgM512byte // arg m512byte 103 xArgM64 // arg m64 104 xArgM64fp // arg m64fp 105 xArgM64int // arg m64int 106 xArgM8 // arg m8 107 xArgM80bcd // arg m80bcd 108 xArgM80dec // arg m80dec 109 xArgM80fp // arg m80fp 110 xArgM94108byte // arg m94/108byte 111 xArgMm // arg mm 112 xArgMm1 // arg mm1 113 xArgMm2 // arg mm2 114 xArgMm2M64 // arg mm2/m64 115 xArgMmM32 // arg mm/m32 116 xArgMmM64 // arg mm/m64 117 xArgMem // arg mem 118 xArgMoffs16 // arg moffs16 119 xArgMoffs32 // arg moffs32 120 xArgMoffs64 // arg moffs64 121 xArgMoffs8 // arg moffs8 122 xArgPtr16colon16 // arg ptr16:16 123 xArgPtr16colon32 // arg ptr16:32 124 xArgR16 // arg r16 125 xArgR16op // arg r16 with +rw in opcode 126 xArgR32 // arg r32 127 xArgR32M16 // arg r32/m16 128 xArgR32M8 // arg r32/m8 129 xArgR32op // arg r32 with +rd in opcode 130 xArgR64 // arg r64 131 xArgR64M16 // arg r64/m16 132 xArgR64op // arg r64 with +rd in opcode 133 xArgR8 // arg r8 134 xArgR8op // arg r8 with +rb in opcode 135 xArgRAX // arg RAX 136 xArgRDX // arg RDX 137 xArgRM // arg r/m 138 xArgRM16 // arg r/m16 139 xArgRM32 // arg r/m32 140 xArgRM64 // arg r/m64 141 xArgRM8 // arg r/m8 142 xArgReg // arg reg 143 xArgRegM16 // arg reg/m16 144 xArgRegM32 // arg reg/m32 145 xArgRegM8 // arg reg/m8 146 xArgRel16 // arg rel16 147 xArgRel32 // arg rel32 148 xArgRel8 // arg rel8 149 xArgSS // arg SS 150 xArgST // arg ST, aka ST(0) 151 xArgSTi // arg ST(i) with +i in opcode 152 xArgSreg // arg Sreg 153 xArgTR0dashTR7 // arg TR0-TR7 154 xArgXmm // arg xmm 155 xArgXMM0 // arg <XMM0> 156 xArgXmm1 // arg xmm1 157 xArgXmm2 // arg xmm2 158 xArgXmm2M128 // arg xmm2/m128 159 xArgYmm2M256 // arg ymm2/m256 160 xArgXmm2M16 // arg xmm2/m16 161 xArgXmm2M32 // arg xmm2/m32 162 xArgXmm2M64 // arg xmm2/m64 163 xArgXmmM128 // arg xmm/m128 164 xArgXmmM32 // arg xmm/m32 165 xArgXmmM64 // arg xmm/m64 166 xArgYmm1 // arg ymm1 167 xArgRmf16 // arg r/m16 but force mod=3 168 xArgRmf32 // arg r/m32 but force mod=3 169 xArgRmf64 // arg r/m64 but force mod=3 170 ) 171 172 // instPrefix returns an Inst describing just one prefix byte. 173 // It is only used if there is a prefix followed by an unintelligible 174 // or invalid instruction byte sequence. 175 func instPrefix(b byte, mode int) (Inst, error) { 176 // When tracing it is useful to see what called instPrefix to report an error. 177 if trace { 178 _, file, line, _ := runtime.Caller(1) 179 fmt.Printf("%s:%d\n", file, line) 180 } 181 p := Prefix(b) 182 switch p { 183 case PrefixDataSize: 184 if mode == 16 { 185 p = PrefixData32 186 } else { 187 p = PrefixData16 188 } 189 case PrefixAddrSize: 190 if mode == 32 { 191 p = PrefixAddr16 192 } else { 193 p = PrefixAddr32 194 } 195 } 196 // Note: using composite literal with Prefix key confuses 'bundle' tool. 197 inst := Inst{Len: 1} 198 inst.Prefix = Prefixes{p} 199 return inst, nil 200 } 201 202 // truncated reports a truncated instruction. 203 // For now we use instPrefix but perhaps later we will return 204 // a specific error here. 205 func truncated(src []byte, mode int) (Inst, error) { 206 if len(src) == 0 { 207 return Inst{}, ErrTruncated 208 } 209 return instPrefix(src[0], mode) // too long 210 } 211 212 // These are the errors returned by Decode. 213 var ( 214 ErrInvalidMode = errors.New("invalid x86 mode in Decode") 215 ErrTruncated = errors.New("truncated instruction") 216 ErrUnrecognized = errors.New("unrecognized instruction") 217 ) 218 219 // decoderCover records coverage information for which parts 220 // of the byte code have been executed. 221 var decoderCover []bool 222 223 // Decode decodes the leading bytes in src as a single instruction. 224 // The mode arguments specifies the assumed processor mode: 225 // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes. 226 func Decode(src []byte, mode int) (inst Inst, err error) { 227 return decode1(src, mode, false) 228 } 229 230 // decode1 is the implementation of Decode but takes an extra 231 // gnuCompat flag to cause it to change its behavior to mimic 232 // bugs (or at least unique features) of GNU libopcodes as used 233 // by objdump. We don't believe that logic is the right thing to do 234 // in general, but when testing against libopcodes it simplifies the 235 // comparison if we adjust a few small pieces of logic. 236 // The affected logic is in the conditional branch for "mandatory" prefixes, 237 // case xCondPrefix. 238 func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) { 239 switch mode { 240 case 16, 32, 64: 241 // ok 242 // TODO(rsc): 64-bit mode not tested, probably not working. 243 default: 244 return Inst{}, ErrInvalidMode 245 } 246 247 // Maximum instruction size is 15 bytes. 248 // If we need to read more, return 'truncated instruction. 249 if len(src) > 15 { 250 src = src[:15] 251 } 252 253 var ( 254 // prefix decoding information 255 pos = 0 // position reading src 256 nprefix = 0 // number of prefixes 257 lockIndex = -1 // index of LOCK prefix in src and inst.Prefix 258 repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix 259 segIndex = -1 // index of Group 2 prefix in src and inst.Prefix 260 dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix 261 addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix 262 rex Prefix // rex byte if present (or 0) 263 rexUsed Prefix // bits used in rex byte 264 rexIndex = -1 // index of rex byte 265 vex Prefix // use vex encoding 266 vexIndex = -1 // index of vex prefix 267 268 addrMode = mode // address mode (width in bits) 269 dataMode = mode // operand mode (width in bits) 270 271 // decoded ModR/M fields 272 haveModrm bool 273 modrm int 274 mod int 275 regop int 276 rm int 277 278 // if ModR/M is memory reference, Mem form 279 mem Mem 280 haveMem bool 281 282 // decoded SIB fields 283 haveSIB bool 284 sib int 285 scale int 286 index int 287 base int 288 displen int 289 dispoff int 290 291 // decoded immediate values 292 imm int64 293 imm8 int8 294 immc int64 295 immcpos int 296 297 // output 298 opshift int 299 inst Inst 300 narg int // number of arguments written to inst 301 ) 302 303 if mode == 64 { 304 dataMode = 32 305 } 306 307 // Prefixes are certainly the most complex and underspecified part of 308 // decoding x86 instructions. Although the manuals say things like 309 // up to four prefixes, one from each group, nearly everyone seems to 310 // agree that in practice as many prefixes as possible, including multiple 311 // from a particular group or repetitions of a given prefix, can be used on 312 // an instruction, provided the total instruction length including prefixes 313 // does not exceed the agreed-upon maximum of 15 bytes. 314 // Everyone also agrees that if one of these prefixes is the LOCK prefix 315 // and the instruction is not one of the instructions that can be used with 316 // the LOCK prefix or if the destination is not a memory operand, 317 // then the instruction is invalid and produces the #UD exception. 318 // However, that is the end of any semblance of agreement. 319 // 320 // What happens if prefixes are given that conflict with other prefixes? 321 // For example, the memory segment overrides CS, DS, ES, FS, GS, SS 322 // conflict with each other: only one segment can be in effect. 323 // Disassemblers seem to agree that later prefixes take priority over 324 // earlier ones. I have not taken the time to write assembly programs 325 // to check to see if the hardware agrees. 326 // 327 // What happens if prefixes are given that have no meaning for the 328 // specific instruction to which they are attached? It depends. 329 // If they really have no meaning, they are ignored. However, a future 330 // processor may assign a different meaning. As a disassembler, we 331 // don't really know whether we're seeing a meaningless prefix or one 332 // whose meaning we simply haven't been told yet. 333 // 334 // Combining the two questions, what happens when conflicting 335 // extension prefixes are given? No one seems to know for sure. 336 // For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r, 337 // and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'? 338 // Which prefix wins? See the xCondPrefix prefix for more. 339 // 340 // Writing assembly test cases to divine which interpretation the 341 // CPU uses might clarify the situation, but more likely it would 342 // make the situation even less clear. 343 344 // Read non-REX prefixes. 345 ReadPrefixes: 346 for ; pos < len(src); pos++ { 347 p := Prefix(src[pos]) 348 switch p { 349 default: 350 nprefix = pos 351 break ReadPrefixes 352 353 // Group 1 - lock and repeat prefixes 354 // According to Intel, there should only be one from this set, 355 // but according to AMD both can be present. 356 case 0xF0: 357 if lockIndex >= 0 { 358 inst.Prefix[lockIndex] |= PrefixIgnored 359 } 360 lockIndex = pos 361 case 0xF2, 0xF3: 362 if repIndex >= 0 { 363 inst.Prefix[repIndex] |= PrefixIgnored 364 } 365 repIndex = pos 366 367 // Group 2 - segment override / branch hints 368 case 0x26, 0x2E, 0x36, 0x3E: 369 if mode == 64 { 370 p |= PrefixIgnored 371 break 372 } 373 fallthrough 374 case 0x64, 0x65: 375 if segIndex >= 0 { 376 inst.Prefix[segIndex] |= PrefixIgnored 377 } 378 segIndex = pos 379 380 // Group 3 - operand size override 381 case 0x66: 382 if mode == 16 { 383 dataMode = 32 384 p = PrefixData32 385 } else { 386 dataMode = 16 387 p = PrefixData16 388 } 389 if dataSizeIndex >= 0 { 390 inst.Prefix[dataSizeIndex] |= PrefixIgnored 391 } 392 dataSizeIndex = pos 393 394 // Group 4 - address size override 395 case 0x67: 396 if mode == 32 { 397 addrMode = 16 398 p = PrefixAddr16 399 } else { 400 addrMode = 32 401 p = PrefixAddr32 402 } 403 if addrSizeIndex >= 0 { 404 inst.Prefix[addrSizeIndex] |= PrefixIgnored 405 } 406 addrSizeIndex = pos 407 408 //Group 5 - Vex encoding 409 case 0xC5: 410 if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) { 411 vex = p 412 vexIndex = pos 413 inst.Prefix[pos] = p 414 inst.Prefix[pos+1] = Prefix(src[pos+1]) 415 pos += 1 416 continue 417 } else { 418 nprefix = pos 419 break ReadPrefixes 420 } 421 case 0xC4: 422 if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) { 423 vex = p 424 vexIndex = pos 425 inst.Prefix[pos] = p 426 inst.Prefix[pos+1] = Prefix(src[pos+1]) 427 inst.Prefix[pos+2] = Prefix(src[pos+2]) 428 pos += 2 429 continue 430 } else { 431 nprefix = pos 432 break ReadPrefixes 433 } 434 } 435 436 if pos >= len(inst.Prefix) { 437 return instPrefix(src[0], mode) // too long 438 } 439 440 inst.Prefix[pos] = p 441 } 442 443 // Read REX prefix. 444 if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 { 445 rex = Prefix(src[pos]) 446 rexIndex = pos 447 if pos >= len(inst.Prefix) { 448 return instPrefix(src[0], mode) // too long 449 } 450 inst.Prefix[pos] = rex 451 pos++ 452 if rex&PrefixREXW != 0 { 453 dataMode = 64 454 if dataSizeIndex >= 0 { 455 inst.Prefix[dataSizeIndex] |= PrefixIgnored 456 } 457 } 458 } 459 460 // Decode instruction stream, interpreting decoding instructions. 461 // opshift gives the shift to use when saving the next 462 // opcode byte into inst.Opcode. 463 opshift = 24 464 465 // Decode loop, executing decoder program. 466 var oldPC, prevPC int 467 Decode: 468 for pc := 1; ; { // TODO uint 469 oldPC = prevPC 470 prevPC = pc 471 if trace { 472 println("run", pc) 473 } 474 x := decoder[pc] 475 if decoderCover != nil { 476 decoderCover[pc] = true 477 } 478 pc++ 479 480 // Read and decode ModR/M if needed by opcode. 481 switch decodeOp(x) { 482 case xCondSlashR, xReadSlashR: 483 if haveModrm { 484 return Inst{Len: pos}, errInternal 485 } 486 haveModrm = true 487 if pos >= len(src) { 488 return truncated(src, mode) 489 } 490 modrm = int(src[pos]) 491 pos++ 492 if opshift >= 0 { 493 inst.Opcode |= uint32(modrm) << uint(opshift) 494 opshift -= 8 495 } 496 mod = modrm >> 6 497 regop = (modrm >> 3) & 07 498 rm = modrm & 07 499 if rex&PrefixREXR != 0 { 500 rexUsed |= PrefixREXR 501 regop |= 8 502 } 503 if addrMode == 16 { 504 // 16-bit modrm form 505 if mod != 3 { 506 haveMem = true 507 mem = addr16[rm] 508 if rm == 6 && mod == 0 { 509 mem.Base = 0 510 } 511 512 // Consume disp16 if present. 513 if mod == 0 && rm == 6 || mod == 2 { 514 if pos+2 > len(src) { 515 return truncated(src, mode) 516 } 517 mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:])) 518 pos += 2 519 } 520 521 // Consume disp8 if present. 522 if mod == 1 { 523 if pos >= len(src) { 524 return truncated(src, mode) 525 } 526 mem.Disp = int64(int8(src[pos])) 527 pos++ 528 } 529 } 530 } else { 531 haveMem = mod != 3 532 533 // 32-bit or 64-bit form 534 // Consume SIB encoding if present. 535 if rm == 4 && mod != 3 { 536 haveSIB = true 537 if pos >= len(src) { 538 return truncated(src, mode) 539 } 540 sib = int(src[pos]) 541 pos++ 542 if opshift >= 0 { 543 inst.Opcode |= uint32(sib) << uint(opshift) 544 opshift -= 8 545 } 546 scale = sib >> 6 547 index = (sib >> 3) & 07 548 base = sib & 07 549 if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 { 550 rexUsed |= PrefixREXB 551 base |= 8 552 } 553 if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 { 554 rexUsed |= PrefixREXX 555 index |= 8 556 } 557 558 mem.Scale = 1 << uint(scale) 559 if index == 4 { 560 // no mem.Index 561 } else { 562 mem.Index = baseRegForBits(addrMode) + Reg(index) 563 } 564 if base&7 == 5 && mod == 0 { 565 // no mem.Base 566 } else { 567 mem.Base = baseRegForBits(addrMode) + Reg(base) 568 } 569 } else { 570 if rex&PrefixREXB != 0 { 571 rexUsed |= PrefixREXB 572 rm |= 8 573 } 574 if mod == 0 && rm&7 == 5 || rm&7 == 4 { 575 // base omitted 576 } else if mod != 3 { 577 mem.Base = baseRegForBits(addrMode) + Reg(rm) 578 } 579 } 580 581 // Consume disp32 if present. 582 if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 { 583 if pos+4 > len(src) { 584 return truncated(src, mode) 585 } 586 dispoff = pos 587 displen = 4 588 mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:])) 589 pos += 4 590 } 591 592 // Consume disp8 if present. 593 if mod == 1 { 594 if pos >= len(src) { 595 return truncated(src, mode) 596 } 597 dispoff = pos 598 displen = 1 599 mem.Disp = int64(int8(src[pos])) 600 pos++ 601 } 602 603 // In 64-bit, mod=0 rm=5 is PC-relative instead of just disp. 604 // See Vol 2A. Table 2-7. 605 if mode == 64 && mod == 0 && rm&7 == 5 { 606 if addrMode == 32 { 607 mem.Base = EIP 608 } else { 609 mem.Base = RIP 610 } 611 } 612 } 613 614 if segIndex >= 0 { 615 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 616 } 617 } 618 619 // Execute single opcode. 620 switch decodeOp(x) { 621 default: 622 println("bad op", x, "at", pc-1, "from", oldPC) 623 return Inst{Len: pos}, errInternal 624 625 case xFail: 626 inst.Op = 0 627 break Decode 628 629 case xMatch: 630 break Decode 631 632 case xJump: 633 pc = int(decoder[pc]) 634 635 // Conditional branches. 636 637 case xCondByte: 638 if pos >= len(src) { 639 return truncated(src, mode) 640 } 641 b := src[pos] 642 n := int(decoder[pc]) 643 pc++ 644 for i := 0; i < n; i++ { 645 xb, xpc := decoder[pc], int(decoder[pc+1]) 646 pc += 2 647 if b == byte(xb) { 648 pc = xpc 649 pos++ 650 if opshift >= 0 { 651 inst.Opcode |= uint32(b) << uint(opshift) 652 opshift -= 8 653 } 654 continue Decode 655 } 656 } 657 // xCondByte is the only conditional with a fall through, 658 // so that it can be used to pick off special cases before 659 // an xCondSlash. If the fallthrough instruction is xFail, 660 // advance the position so that the decoded instruction 661 // size includes the byte we just compared against. 662 if decodeOp(decoder[pc]) == xJump { 663 pc = int(decoder[pc+1]) 664 } 665 if decodeOp(decoder[pc]) == xFail { 666 pos++ 667 } 668 669 case xCondIs64: 670 if mode == 64 { 671 pc = int(decoder[pc+1]) 672 } else { 673 pc = int(decoder[pc]) 674 } 675 676 case xCondIsMem: 677 mem := haveMem 678 if !haveModrm { 679 if pos >= len(src) { 680 return instPrefix(src[0], mode) // too long 681 } 682 mem = src[pos]>>6 != 3 683 } 684 if mem { 685 pc = int(decoder[pc+1]) 686 } else { 687 pc = int(decoder[pc]) 688 } 689 690 case xCondDataSize: 691 switch dataMode { 692 case 16: 693 if dataSizeIndex >= 0 { 694 inst.Prefix[dataSizeIndex] |= PrefixImplicit 695 } 696 pc = int(decoder[pc]) 697 case 32: 698 if dataSizeIndex >= 0 { 699 inst.Prefix[dataSizeIndex] |= PrefixImplicit 700 } 701 pc = int(decoder[pc+1]) 702 case 64: 703 rexUsed |= PrefixREXW 704 pc = int(decoder[pc+2]) 705 } 706 707 case xCondAddrSize: 708 switch addrMode { 709 case 16: 710 if addrSizeIndex >= 0 { 711 inst.Prefix[addrSizeIndex] |= PrefixImplicit 712 } 713 pc = int(decoder[pc]) 714 case 32: 715 if addrSizeIndex >= 0 { 716 inst.Prefix[addrSizeIndex] |= PrefixImplicit 717 } 718 pc = int(decoder[pc+1]) 719 case 64: 720 pc = int(decoder[pc+2]) 721 } 722 723 case xCondPrefix: 724 // Conditional branch based on presence or absence of prefixes. 725 // The conflict cases here are completely undocumented and 726 // differ significantly between GNU libopcodes and Intel xed. 727 // I have not written assembly code to divine what various CPUs 728 // do, but it wouldn't surprise me if they are not consistent either. 729 // 730 // The basic idea is to switch on the presence of a prefix, so that 731 // for example: 732 // 733 // xCondPrefix, 4 734 // 0xF3, 123, 735 // 0xF2, 234, 736 // 0x66, 345, 737 // 0, 456 738 // 739 // branch to 123 if the F3 prefix is present, 234 if the F2 prefix 740 // is present, 66 if the 345 prefix is present, and 456 otherwise. 741 // The prefixes are given in descending order so that the 0 will be last. 742 // 743 // It is unclear what should happen if multiple conditions are 744 // satisfied: what if F2 and F3 are both present, or if 66 and F2 745 // are present, or if all three are present? The one chosen becomes 746 // part of the opcode and the others do not. Perhaps the answer 747 // depends on the specific opcodes in question. 748 // 749 // The only clear example is that CRC32 is F2 0F 38 F1 /r, and 750 // it comes in 16-bit and 32-bit forms based on the 66 prefix, 751 // so 66 F2 0F 38 F1 /r should be treated as F2 taking priority, 752 // with the 66 being only an operand size override, and probably 753 // F2 66 0F 38 F1 /r should be treated the same. 754 // Perhaps that rule is specific to the case of CRC32, since no 755 // 66 0F 38 F1 instruction is defined (today) (that we know of). 756 // However, both libopcodes and xed seem to generalize this 757 // example and choose F2/F3 in preference to 66, and we 758 // do the same. 759 // 760 // Next, what if both F2 and F3 are present? Which wins? 761 // The Intel xed rule, and ours, is that the one that occurs last wins. 762 // The GNU libopcodes rule, which we implement only in gnuCompat mode, 763 // is that F3 beats F2 unless F3 has no special meaning, in which 764 // case F3 can be a modified on an F2 special meaning. 765 // 766 // Concretely, 767 // 66 0F D6 /r is MOVQ 768 // F2 0F D6 /r is MOVDQ2Q 769 // F3 0F D6 /r is MOVQ2DQ. 770 // 771 // F2 66 0F D6 /r is 66 + MOVDQ2Q always. 772 // 66 F2 0F D6 /r is 66 + MOVDQ2Q always. 773 // F3 66 0F D6 /r is 66 + MOVQ2DQ always. 774 // 66 F3 0F D6 /r is 66 + MOVQ2DQ always. 775 // F2 F3 0F D6 /r is F2 + MOVQ2DQ always. 776 // F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes. 777 // Adding 66 anywhere in the prefix section of the 778 // last two cases does not change the outcome. 779 // 780 // Finally, what if there is a variant in which 66 is a mandatory 781 // prefix rather than an operand size override, but we know of 782 // no corresponding F2/F3 form, and we see both F2/F3 and 66. 783 // Does F2/F3 still take priority, so that the result is an unknown 784 // instruction, or does the 66 take priority, so that the extended 785 // 66 instruction should be interpreted as having a REP/REPN prefix? 786 // Intel xed does the former and GNU libopcodes does the latter. 787 // We side with Intel xed, unless we are trying to match libopcodes 788 // more closely during the comparison-based test suite. 789 // 790 // In 64-bit mode REX.W is another valid prefix to test for, but 791 // there is less ambiguity about that. When present, REX.W is 792 // always the first entry in the table. 793 n := int(decoder[pc]) 794 pc++ 795 sawF3 := false 796 for j := 0; j < n; j++ { 797 prefix := Prefix(decoder[pc+2*j]) 798 if prefix.IsREX() { 799 rexUsed |= prefix 800 if rex&prefix == prefix { 801 pc = int(decoder[pc+2*j+1]) 802 continue Decode 803 } 804 continue 805 } 806 ok := false 807 if prefix == 0 { 808 ok = true 809 } else if prefix.IsREX() { 810 rexUsed |= prefix 811 if rex&prefix == prefix { 812 ok = true 813 } 814 } else if prefix == 0xC5 || prefix == 0xC4 { 815 if vex == prefix { 816 ok = true 817 } 818 } else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A || 819 prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) { 820 var vexM, vexP Prefix 821 if vex == 0xC5 { 822 vexM = 1 // 2 byte vex always implies 0F 823 vexP = inst.Prefix[vexIndex+1] 824 } else { 825 vexM = inst.Prefix[vexIndex+1] 826 vexP = inst.Prefix[vexIndex+2] 827 } 828 switch prefix { 829 case 0x66: 830 ok = vexP&3 == 1 831 case 0xF3: 832 ok = vexP&3 == 2 833 case 0xF2: 834 ok = vexP&3 == 3 835 case 0x0F: 836 ok = vexM&3 == 1 837 case 0x0F38: 838 ok = vexM&3 == 2 839 case 0x0F3A: 840 ok = vexM&3 == 3 841 } 842 } else { 843 if prefix == 0xF3 { 844 sawF3 = true 845 } 846 switch prefix { 847 case PrefixLOCK: 848 if lockIndex >= 0 { 849 inst.Prefix[lockIndex] |= PrefixImplicit 850 ok = true 851 } 852 case PrefixREP, PrefixREPN: 853 if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix { 854 inst.Prefix[repIndex] |= PrefixImplicit 855 ok = true 856 } 857 if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) { 858 // Check to see if earlier prefix F3 is present. 859 for i := repIndex - 1; i >= 0; i-- { 860 if inst.Prefix[i]&0xFF == prefix { 861 inst.Prefix[i] |= PrefixImplicit 862 ok = true 863 } 864 } 865 } 866 if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 { 867 // Check to see if earlier prefix F2 is present. 868 for i := repIndex - 1; i >= 0; i-- { 869 if inst.Prefix[i]&0xFF == prefix { 870 inst.Prefix[i] |= PrefixImplicit 871 ok = true 872 } 873 } 874 } 875 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 876 if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix { 877 inst.Prefix[segIndex] |= PrefixImplicit 878 ok = true 879 } 880 case PrefixDataSize: 881 // Looking for 66 mandatory prefix. 882 // The F2/F3 mandatory prefixes take priority when both are present. 883 // If we got this far in the xCondPrefix table and an F2/F3 is present, 884 // it means the table didn't have any entry for that prefix. But if 66 has 885 // special meaning, perhaps F2/F3 have special meaning that we don't know. 886 // Intel xed works this way, treating the F2/F3 as inhibiting the 66. 887 // GNU libopcodes allows the 66 to match. We do what Intel xed does 888 // except in gnuCompat mode. 889 if repIndex >= 0 && !gnuCompat { 890 inst.Op = 0 891 break Decode 892 } 893 if dataSizeIndex >= 0 { 894 inst.Prefix[dataSizeIndex] |= PrefixImplicit 895 ok = true 896 } 897 case PrefixAddrSize: 898 if addrSizeIndex >= 0 { 899 inst.Prefix[addrSizeIndex] |= PrefixImplicit 900 ok = true 901 } 902 } 903 } 904 if ok { 905 pc = int(decoder[pc+2*j+1]) 906 continue Decode 907 } 908 } 909 inst.Op = 0 910 break Decode 911 912 case xCondSlashR: 913 pc = int(decoder[pc+regop&7]) 914 915 // Input. 916 917 case xReadSlashR: 918 // done above 919 920 case xReadIb: 921 if pos >= len(src) { 922 return truncated(src, mode) 923 } 924 imm8 = int8(src[pos]) 925 pos++ 926 927 case xReadIw: 928 if pos+2 > len(src) { 929 return truncated(src, mode) 930 } 931 imm = int64(binary.LittleEndian.Uint16(src[pos:])) 932 pos += 2 933 934 case xReadId: 935 if pos+4 > len(src) { 936 return truncated(src, mode) 937 } 938 imm = int64(binary.LittleEndian.Uint32(src[pos:])) 939 pos += 4 940 941 case xReadIo: 942 if pos+8 > len(src) { 943 return truncated(src, mode) 944 } 945 imm = int64(binary.LittleEndian.Uint64(src[pos:])) 946 pos += 8 947 948 case xReadCb: 949 if pos >= len(src) { 950 return truncated(src, mode) 951 } 952 immcpos = pos 953 immc = int64(src[pos]) 954 pos++ 955 956 case xReadCw: 957 if pos+2 > len(src) { 958 return truncated(src, mode) 959 } 960 immcpos = pos 961 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 962 pos += 2 963 964 case xReadCm: 965 immcpos = pos 966 if addrMode == 16 { 967 if pos+2 > len(src) { 968 return truncated(src, mode) 969 } 970 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 971 pos += 2 972 } else if addrMode == 32 { 973 if pos+4 > len(src) { 974 return truncated(src, mode) 975 } 976 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 977 pos += 4 978 } else { 979 if pos+8 > len(src) { 980 return truncated(src, mode) 981 } 982 immc = int64(binary.LittleEndian.Uint64(src[pos:])) 983 pos += 8 984 } 985 case xReadCd: 986 immcpos = pos 987 if pos+4 > len(src) { 988 return truncated(src, mode) 989 } 990 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 991 pos += 4 992 993 case xReadCp: 994 immcpos = pos 995 if pos+6 > len(src) { 996 return truncated(src, mode) 997 } 998 w := binary.LittleEndian.Uint32(src[pos:]) 999 w2 := binary.LittleEndian.Uint16(src[pos+4:]) 1000 immc = int64(w2)<<32 | int64(w) 1001 pos += 6 1002 1003 // Output. 1004 1005 case xSetOp: 1006 inst.Op = Op(decoder[pc]) 1007 pc++ 1008 1009 case xArg1, 1010 xArg3, 1011 xArgAL, 1012 xArgAX, 1013 xArgCL, 1014 xArgCS, 1015 xArgDS, 1016 xArgDX, 1017 xArgEAX, 1018 xArgEDX, 1019 xArgES, 1020 xArgFS, 1021 xArgGS, 1022 xArgRAX, 1023 xArgRDX, 1024 xArgSS, 1025 xArgST, 1026 xArgXMM0: 1027 inst.Args[narg] = fixedArg[x] 1028 narg++ 1029 1030 case xArgImm8: 1031 inst.Args[narg] = Imm(imm8) 1032 narg++ 1033 1034 case xArgImm8u: 1035 inst.Args[narg] = Imm(uint8(imm8)) 1036 narg++ 1037 1038 case xArgImm16: 1039 inst.Args[narg] = Imm(int16(imm)) 1040 narg++ 1041 1042 case xArgImm16u: 1043 inst.Args[narg] = Imm(uint16(imm)) 1044 narg++ 1045 1046 case xArgImm32: 1047 inst.Args[narg] = Imm(int32(imm)) 1048 narg++ 1049 1050 case xArgImm64: 1051 inst.Args[narg] = Imm(imm) 1052 narg++ 1053 1054 case xArgM, 1055 xArgM128, 1056 xArgM256, 1057 xArgM1428byte, 1058 xArgM16, 1059 xArgM16and16, 1060 xArgM16and32, 1061 xArgM16and64, 1062 xArgM16colon16, 1063 xArgM16colon32, 1064 xArgM16colon64, 1065 xArgM16int, 1066 xArgM2byte, 1067 xArgM32, 1068 xArgM32and32, 1069 xArgM32fp, 1070 xArgM32int, 1071 xArgM512byte, 1072 xArgM64, 1073 xArgM64fp, 1074 xArgM64int, 1075 xArgM8, 1076 xArgM80bcd, 1077 xArgM80dec, 1078 xArgM80fp, 1079 xArgM94108byte, 1080 xArgMem: 1081 if !haveMem { 1082 inst.Op = 0 1083 break Decode 1084 } 1085 inst.Args[narg] = mem 1086 inst.MemBytes = int(memBytes[decodeOp(x)]) 1087 if mem.Base == RIP { 1088 inst.PCRel = displen 1089 inst.PCRelOff = dispoff 1090 } 1091 narg++ 1092 1093 case xArgPtr16colon16: 1094 inst.Args[narg] = Imm(immc >> 16) 1095 inst.Args[narg+1] = Imm(immc & (1<<16 - 1)) 1096 narg += 2 1097 1098 case xArgPtr16colon32: 1099 inst.Args[narg] = Imm(immc >> 32) 1100 inst.Args[narg+1] = Imm(immc & (1<<32 - 1)) 1101 narg += 2 1102 1103 case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64: 1104 // TODO(rsc): Can address be 64 bits? 1105 mem = Mem{Disp: int64(immc)} 1106 if segIndex >= 0 { 1107 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 1108 inst.Prefix[segIndex] |= PrefixImplicit 1109 } 1110 inst.Args[narg] = mem 1111 inst.MemBytes = int(memBytes[decodeOp(x)]) 1112 if mem.Base == RIP { 1113 inst.PCRel = displen 1114 inst.PCRelOff = dispoff 1115 } 1116 narg++ 1117 1118 case xArgYmm1: 1119 base := baseReg[x] 1120 index := Reg(regop) 1121 if inst.Prefix[vexIndex+1]&0x80 == 0 { 1122 index += 8 1123 } 1124 inst.Args[narg] = base + index 1125 narg++ 1126 1127 case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7: 1128 base := baseReg[x] 1129 index := Reg(regop) 1130 if rex != 0 && base == AL && index >= 4 { 1131 rexUsed |= PrefixREX 1132 index -= 4 1133 base = SPB 1134 } 1135 inst.Args[narg] = base + index 1136 narg++ 1137 1138 case xArgMm, xArgMm1, xArgTR0dashTR7: 1139 inst.Args[narg] = baseReg[x] + Reg(regop&7) 1140 narg++ 1141 1142 case xArgCR0dashCR7: 1143 // AMD documents an extension that the LOCK prefix 1144 // can be used in place of a REX prefix in order to access 1145 // CR8 from 32-bit mode. The LOCK prefix is allowed in 1146 // all modes, provided the corresponding CPUID bit is set. 1147 if lockIndex >= 0 { 1148 inst.Prefix[lockIndex] |= PrefixImplicit 1149 regop += 8 1150 } 1151 inst.Args[narg] = CR0 + Reg(regop) 1152 narg++ 1153 1154 case xArgSreg: 1155 regop &= 7 1156 if regop >= 6 { 1157 inst.Op = 0 1158 break Decode 1159 } 1160 inst.Args[narg] = ES + Reg(regop) 1161 narg++ 1162 1163 case xArgRmf16, xArgRmf32, xArgRmf64: 1164 base := baseReg[x] 1165 index := Reg(modrm & 07) 1166 if rex&PrefixREXB != 0 { 1167 rexUsed |= PrefixREXB 1168 index += 8 1169 } 1170 inst.Args[narg] = base + index 1171 narg++ 1172 1173 case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi: 1174 n := inst.Opcode >> uint(opshift+8) & 07 1175 base := baseReg[x] 1176 index := Reg(n) 1177 if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi { 1178 rexUsed |= PrefixREXB 1179 index += 8 1180 } 1181 if rex != 0 && base == AL && index >= 4 { 1182 rexUsed |= PrefixREX 1183 index -= 4 1184 base = SPB 1185 } 1186 inst.Args[narg] = base + index 1187 narg++ 1188 case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16, 1189 xArgMmM32, xArgMmM64, xArgMm2M64, 1190 xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128, 1191 xArgYmm2M256: 1192 if haveMem { 1193 inst.Args[narg] = mem 1194 inst.MemBytes = int(memBytes[decodeOp(x)]) 1195 if mem.Base == RIP { 1196 inst.PCRel = displen 1197 inst.PCRelOff = dispoff 1198 } 1199 } else { 1200 base := baseReg[x] 1201 index := Reg(rm) 1202 switch decodeOp(x) { 1203 case xArgMmM32, xArgMmM64, xArgMm2M64: 1204 // There are only 8 MMX registers, so these ignore the REX.X bit. 1205 index &= 7 1206 case xArgRM8: 1207 if rex != 0 && index >= 4 { 1208 rexUsed |= PrefixREX 1209 index -= 4 1210 base = SPB 1211 } 1212 case xArgYmm2M256: 1213 if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 { 1214 index += 8 1215 } 1216 } 1217 inst.Args[narg] = base + index 1218 } 1219 narg++ 1220 1221 case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1222 if haveMem { 1223 inst.Op = 0 1224 break Decode 1225 } 1226 inst.Args[narg] = baseReg[x] + Reg(rm&7) 1227 narg++ 1228 1229 case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1230 if haveMem { 1231 inst.Op = 0 1232 break Decode 1233 } 1234 inst.Args[narg] = baseReg[x] + Reg(rm) 1235 narg++ 1236 1237 case xArgRel8: 1238 inst.PCRelOff = immcpos 1239 inst.PCRel = 1 1240 inst.Args[narg] = Rel(int8(immc)) 1241 narg++ 1242 1243 case xArgRel16: 1244 inst.PCRelOff = immcpos 1245 inst.PCRel = 2 1246 inst.Args[narg] = Rel(int16(immc)) 1247 narg++ 1248 1249 case xArgRel32: 1250 inst.PCRelOff = immcpos 1251 inst.PCRel = 4 1252 inst.Args[narg] = Rel(int32(immc)) 1253 narg++ 1254 } 1255 } 1256 1257 if inst.Op == 0 { 1258 // Invalid instruction. 1259 if nprefix > 0 { 1260 return instPrefix(src[0], mode) // invalid instruction 1261 } 1262 return Inst{Len: pos}, ErrUnrecognized 1263 } 1264 1265 // Matched! Hooray! 1266 1267 // 90 decodes as XCHG EAX, EAX but is NOP. 1268 // 66 90 decodes as XCHG AX, AX and is NOP too. 1269 // 48 90 decodes as XCHG RAX, RAX and is NOP too. 1270 // 43 90 decodes as XCHG R8D, EAX and is *not* NOP. 1271 // F3 90 decodes as REP XCHG EAX, EAX but is PAUSE. 1272 // It's all too special to handle in the decoding tables, at least for now. 1273 if inst.Op == XCHG && inst.Opcode>>24 == 0x90 { 1274 if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX { 1275 inst.Op = NOP 1276 if dataSizeIndex >= 0 { 1277 inst.Prefix[dataSizeIndex] &^= PrefixImplicit 1278 } 1279 inst.Args[0] = nil 1280 inst.Args[1] = nil 1281 } 1282 if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 { 1283 inst.Prefix[repIndex] |= PrefixImplicit 1284 inst.Op = PAUSE 1285 inst.Args[0] = nil 1286 inst.Args[1] = nil 1287 } else if gnuCompat { 1288 for i := nprefix - 1; i >= 0; i-- { 1289 if inst.Prefix[i]&0xFF == 0xF3 { 1290 inst.Prefix[i] |= PrefixImplicit 1291 inst.Op = PAUSE 1292 inst.Args[0] = nil 1293 inst.Args[1] = nil 1294 break 1295 } 1296 } 1297 } 1298 } 1299 1300 // defaultSeg returns the default segment for an implicit 1301 // memory reference: the final override if present, or else DS. 1302 defaultSeg := func() Reg { 1303 if segIndex >= 0 { 1304 inst.Prefix[segIndex] |= PrefixImplicit 1305 return prefixToSegment(inst.Prefix[segIndex]) 1306 } 1307 return DS 1308 } 1309 1310 // Add implicit arguments not present in the tables. 1311 // Normally we shy away from making implicit arguments explicit, 1312 // following the Intel manuals, but adding the arguments seems 1313 // the best way to express the effect of the segment override prefixes. 1314 // TODO(rsc): Perhaps add these to the tables and 1315 // create bytecode instructions for them. 1316 usedAddrSize := false 1317 switch inst.Op { 1318 case INSB, INSW, INSD: 1319 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1320 inst.Args[1] = DX 1321 usedAddrSize = true 1322 1323 case OUTSB, OUTSW, OUTSD: 1324 inst.Args[0] = DX 1325 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1326 usedAddrSize = true 1327 1328 case MOVSB, MOVSW, MOVSD, MOVSQ: 1329 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1330 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1331 usedAddrSize = true 1332 1333 case CMPSB, CMPSW, CMPSD, CMPSQ: 1334 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1335 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1336 usedAddrSize = true 1337 1338 case LODSB, LODSW, LODSD, LODSQ: 1339 switch inst.Op { 1340 case LODSB: 1341 inst.Args[0] = AL 1342 case LODSW: 1343 inst.Args[0] = AX 1344 case LODSD: 1345 inst.Args[0] = EAX 1346 case LODSQ: 1347 inst.Args[0] = RAX 1348 } 1349 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1350 usedAddrSize = true 1351 1352 case STOSB, STOSW, STOSD, STOSQ: 1353 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1354 switch inst.Op { 1355 case STOSB: 1356 inst.Args[1] = AL 1357 case STOSW: 1358 inst.Args[1] = AX 1359 case STOSD: 1360 inst.Args[1] = EAX 1361 case STOSQ: 1362 inst.Args[1] = RAX 1363 } 1364 usedAddrSize = true 1365 1366 case SCASB, SCASW, SCASD, SCASQ: 1367 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1368 switch inst.Op { 1369 case SCASB: 1370 inst.Args[0] = AL 1371 case SCASW: 1372 inst.Args[0] = AX 1373 case SCASD: 1374 inst.Args[0] = EAX 1375 case SCASQ: 1376 inst.Args[0] = RAX 1377 } 1378 usedAddrSize = true 1379 1380 case XLATB: 1381 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX} 1382 usedAddrSize = true 1383 } 1384 1385 // If we used the address size annotation to construct the 1386 // argument list, mark that prefix as implicit: it doesn't need 1387 // to be shown when printing the instruction. 1388 if haveMem || usedAddrSize { 1389 if addrSizeIndex >= 0 { 1390 inst.Prefix[addrSizeIndex] |= PrefixImplicit 1391 } 1392 } 1393 1394 // Similarly, if there's some memory operand, the segment 1395 // will be shown there and doesn't need to be shown as an 1396 // explicit prefix. 1397 if haveMem { 1398 if segIndex >= 0 { 1399 inst.Prefix[segIndex] |= PrefixImplicit 1400 } 1401 } 1402 1403 // Branch predict prefixes are overloaded segment prefixes, 1404 // since segment prefixes don't make sense on conditional jumps. 1405 // Rewrite final instance to prediction prefix. 1406 // The set of instructions to which the prefixes apply (other then the 1407 // Jcc conditional jumps) is not 100% clear from the manuals, but 1408 // the disassemblers seem to agree about the LOOP and JCXZ instructions, 1409 // so we'll follow along. 1410 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1411 if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ { 1412 PredictLoop: 1413 for i := nprefix - 1; i >= 0; i-- { 1414 p := inst.Prefix[i] 1415 switch p & 0xFF { 1416 case PrefixCS: 1417 inst.Prefix[i] = PrefixPN 1418 break PredictLoop 1419 case PrefixDS: 1420 inst.Prefix[i] = PrefixPT 1421 break PredictLoop 1422 } 1423 } 1424 } 1425 1426 // The BND prefix is part of the Intel Memory Protection Extensions (MPX). 1427 // A REPN applied to certain control transfers is a BND prefix to bound 1428 // the range of possible destinations. There's surprisingly little documentation 1429 // about this, so we just do what libopcodes and xed agree on. 1430 // In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions 1431 // does not turn into a BND. 1432 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1433 if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET { 1434 for i := nprefix - 1; i >= 0; i-- { 1435 p := inst.Prefix[i] 1436 if p&^PrefixIgnored == PrefixREPN { 1437 inst.Prefix[i] = PrefixBND 1438 break 1439 } 1440 } 1441 } 1442 1443 // The LOCK prefix only applies to certain instructions, and then only 1444 // to instances of the instruction with a memory destination. 1445 // Other uses of LOCK are invalid and cause a processor exception, 1446 // in contrast to the "just ignore it" spirit applied to all other prefixes. 1447 // Mark invalid lock prefixes. 1448 hasLock := false 1449 if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 { 1450 switch inst.Op { 1451 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1452 case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG: 1453 if isMem(inst.Args[0]) { 1454 hasLock = true 1455 break 1456 } 1457 fallthrough 1458 default: 1459 inst.Prefix[lockIndex] |= PrefixInvalid 1460 } 1461 } 1462 1463 // In certain cases, all of which require a memory destination, 1464 // the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE 1465 // from the Intel Transactional Synchroniation Extensions (TSX). 1466 // 1467 // The specific rules are: 1468 // (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE. 1469 // (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE. 1470 // (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE. 1471 if isMem(inst.Args[0]) { 1472 if inst.Op == XCHG { 1473 hasLock = true 1474 } 1475 1476 for i := len(inst.Prefix) - 1; i >= 0; i-- { 1477 p := inst.Prefix[i] &^ PrefixIgnored 1478 switch p { 1479 case PrefixREPN: 1480 if hasLock { 1481 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE 1482 } 1483 1484 case PrefixREP: 1485 if hasLock { 1486 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1487 } 1488 1489 if inst.Op == MOV { 1490 op := (inst.Opcode >> 24) &^ 1 1491 if op == 0x88 || op == 0xC6 { 1492 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1493 } 1494 } 1495 } 1496 } 1497 } 1498 1499 // If REP is used on a non-REP-able instruction, mark the prefix as ignored. 1500 if repIndex >= 0 { 1501 switch inst.Prefix[repIndex] { 1502 case PrefixREP, PrefixREPN: 1503 switch inst.Op { 1504 // According to the manuals, the REP/REPE prefix applies to all of these, 1505 // while the REPN applies only to some of them. However, both libopcodes 1506 // and xed show both prefixes explicitly for all instructions, so we do the same. 1507 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1508 case INSB, INSW, INSD, 1509 MOVSB, MOVSW, MOVSD, MOVSQ, 1510 OUTSB, OUTSW, OUTSD, 1511 LODSB, LODSW, LODSD, LODSQ, 1512 CMPSB, CMPSW, CMPSD, CMPSQ, 1513 SCASB, SCASW, SCASD, SCASQ, 1514 STOSB, STOSW, STOSD, STOSQ: 1515 // ok 1516 default: 1517 inst.Prefix[repIndex] |= PrefixIgnored 1518 } 1519 } 1520 } 1521 1522 // If REX was present, mark implicit if all the 1 bits were consumed. 1523 if rexIndex >= 0 { 1524 if rexUsed != 0 { 1525 rexUsed |= PrefixREX 1526 } 1527 if rex&^rexUsed == 0 { 1528 inst.Prefix[rexIndex] |= PrefixImplicit 1529 } 1530 } 1531 1532 inst.DataSize = dataMode 1533 inst.AddrSize = addrMode 1534 inst.Mode = mode 1535 inst.Len = pos 1536 return inst, nil 1537 } 1538 1539 var errInternal = errors.New("internal error") 1540 1541 // addr16 records the eight 16-bit addressing modes. 1542 var addr16 = [8]Mem{ 1543 {Base: BX, Scale: 1, Index: SI}, 1544 {Base: BX, Scale: 1, Index: DI}, 1545 {Base: BP, Scale: 1, Index: SI}, 1546 {Base: BP, Scale: 1, Index: DI}, 1547 {Base: SI}, 1548 {Base: DI}, 1549 {Base: BP}, 1550 {Base: BX}, 1551 } 1552 1553 // baseRegForBits returns the base register for a given register size in bits. 1554 func baseRegForBits(bits int) Reg { 1555 switch bits { 1556 case 8: 1557 return AL 1558 case 16: 1559 return AX 1560 case 32: 1561 return EAX 1562 case 64: 1563 return RAX 1564 } 1565 return 0 1566 } 1567 1568 // baseReg records the base register for argument types that specify 1569 // a range of registers indexed by op, regop, or rm. 1570 var baseReg = [...]Reg{ 1571 xArgDR0dashDR7: DR0, 1572 xArgMm1: M0, 1573 xArgMm2: M0, 1574 xArgMm2M64: M0, 1575 xArgMm: M0, 1576 xArgMmM32: M0, 1577 xArgMmM64: M0, 1578 xArgR16: AX, 1579 xArgR16op: AX, 1580 xArgR32: EAX, 1581 xArgR32M16: EAX, 1582 xArgR32M8: EAX, 1583 xArgR32op: EAX, 1584 xArgR64: RAX, 1585 xArgR64M16: RAX, 1586 xArgR64op: RAX, 1587 xArgR8: AL, 1588 xArgR8op: AL, 1589 xArgRM16: AX, 1590 xArgRM32: EAX, 1591 xArgRM64: RAX, 1592 xArgRM8: AL, 1593 xArgRmf16: AX, 1594 xArgRmf32: EAX, 1595 xArgRmf64: RAX, 1596 xArgSTi: F0, 1597 xArgTR0dashTR7: TR0, 1598 xArgXmm1: X0, 1599 xArgYmm1: X0, 1600 xArgXmm2: X0, 1601 xArgXmm2M128: X0, 1602 xArgYmm2M256: X0, 1603 xArgXmm2M16: X0, 1604 xArgXmm2M32: X0, 1605 xArgXmm2M64: X0, 1606 xArgXmm: X0, 1607 xArgXmmM128: X0, 1608 xArgXmmM32: X0, 1609 xArgXmmM64: X0, 1610 } 1611 1612 // prefixToSegment returns the segment register 1613 // corresponding to a particular segment prefix. 1614 func prefixToSegment(p Prefix) Reg { 1615 switch p &^ PrefixImplicit { 1616 case PrefixCS: 1617 return CS 1618 case PrefixDS: 1619 return DS 1620 case PrefixES: 1621 return ES 1622 case PrefixFS: 1623 return FS 1624 case PrefixGS: 1625 return GS 1626 case PrefixSS: 1627 return SS 1628 } 1629 return 0 1630 } 1631 1632 // fixedArg records the fixed arguments corresponding to the given bytecodes. 1633 var fixedArg = [...]Arg{ 1634 xArg1: Imm(1), 1635 xArg3: Imm(3), 1636 xArgAL: AL, 1637 xArgAX: AX, 1638 xArgDX: DX, 1639 xArgEAX: EAX, 1640 xArgEDX: EDX, 1641 xArgRAX: RAX, 1642 xArgRDX: RDX, 1643 xArgCL: CL, 1644 xArgCS: CS, 1645 xArgDS: DS, 1646 xArgES: ES, 1647 xArgFS: FS, 1648 xArgGS: GS, 1649 xArgSS: SS, 1650 xArgST: F0, 1651 xArgXMM0: X0, 1652 } 1653 1654 // memBytes records the size of the memory pointed at 1655 // by a memory argument of the given form. 1656 var memBytes = [...]int8{ 1657 xArgM128: 128 / 8, 1658 xArgM256: 256 / 8, 1659 xArgM16: 16 / 8, 1660 xArgM16and16: (16 + 16) / 8, 1661 xArgM16colon16: (16 + 16) / 8, 1662 xArgM16colon32: (16 + 32) / 8, 1663 xArgM16int: 16 / 8, 1664 xArgM2byte: 2, 1665 xArgM32: 32 / 8, 1666 xArgM32and32: (32 + 32) / 8, 1667 xArgM32fp: 32 / 8, 1668 xArgM32int: 32 / 8, 1669 xArgM64: 64 / 8, 1670 xArgM64fp: 64 / 8, 1671 xArgM64int: 64 / 8, 1672 xArgMm2M64: 64 / 8, 1673 xArgMmM32: 32 / 8, 1674 xArgMmM64: 64 / 8, 1675 xArgMoffs16: 16 / 8, 1676 xArgMoffs32: 32 / 8, 1677 xArgMoffs64: 64 / 8, 1678 xArgMoffs8: 8 / 8, 1679 xArgR32M16: 16 / 8, 1680 xArgR32M8: 8 / 8, 1681 xArgR64M16: 16 / 8, 1682 xArgRM16: 16 / 8, 1683 xArgRM32: 32 / 8, 1684 xArgRM64: 64 / 8, 1685 xArgRM8: 8 / 8, 1686 xArgXmm2M128: 128 / 8, 1687 xArgYmm2M256: 256 / 8, 1688 xArgXmm2M16: 16 / 8, 1689 xArgXmm2M32: 32 / 8, 1690 xArgXmm2M64: 64 / 8, 1691 xArgXmm: 128 / 8, 1692 xArgXmmM128: 128 / 8, 1693 xArgXmmM32: 32 / 8, 1694 xArgXmmM64: 64 / 8, 1695 } 1696 1697 // isCondJmp records the conditional jumps. 1698 var isCondJmp = [maxOp + 1]bool{ 1699 JA: true, 1700 JAE: true, 1701 JB: true, 1702 JBE: true, 1703 JE: true, 1704 JG: true, 1705 JGE: true, 1706 JL: true, 1707 JLE: true, 1708 JNE: true, 1709 JNO: true, 1710 JNP: true, 1711 JNS: true, 1712 JO: true, 1713 JP: true, 1714 JS: true, 1715 } 1716 1717 // isLoop records the loop operators. 1718 var isLoop = [maxOp + 1]bool{ 1719 LOOP: true, 1720 LOOPE: true, 1721 LOOPNE: true, 1722 JECXZ: true, 1723 JRCXZ: true, 1724 }