asm6.go (149461B)
1 // Inferno utils/6l/span.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "github.com/twitchyliquid64/golang-asm/obj" 35 "github.com/twitchyliquid64/golang-asm/objabi" 36 "github.com/twitchyliquid64/golang-asm/sys" 37 "encoding/binary" 38 "fmt" 39 "log" 40 "strings" 41 ) 42 43 var ( 44 plan9privates *obj.LSym 45 deferreturn *obj.LSym 46 ) 47 48 // Instruction layout. 49 50 // Loop alignment constants: 51 // want to align loop entry to loopAlign-byte boundary, 52 // and willing to insert at most maxLoopPad bytes of NOP to do so. 53 // We define a loop entry as the target of a backward jump. 54 // 55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56 // and it aligns all jump targets, not just backward jump targets. 57 // 58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59 // is very slight but negative, so the alignment is disabled by 60 // setting MaxLoopPad = 0. The code is here for reference and 61 // for future experiments. 62 // 63 const ( 64 loopAlign = 16 65 maxLoopPad = 0 66 ) 67 68 // Bit flags that are used to express jump target properties. 69 const ( 70 // branchBackwards marks targets that are located behind. 71 // Used to express jumps to loop headers. 72 branchBackwards = (1 << iota) 73 // branchShort marks branches those target is close, 74 // with offset is in -128..127 range. 75 branchShort 76 // branchLoopHead marks loop entry. 77 // Used to insert padding for misaligned loops. 78 branchLoopHead 79 ) 80 81 // opBytes holds optab encoding bytes. 82 // Each ytab reserves fixed amount of bytes in this array. 83 // 84 // The size should be the minimal number of bytes that 85 // are enough to hold biggest optab op lines. 86 type opBytes [31]uint8 87 88 type Optab struct { 89 as obj.As 90 ytab []ytab 91 prefix uint8 92 op opBytes 93 } 94 95 type movtab struct { 96 as obj.As 97 ft uint8 98 f3t uint8 99 tt uint8 100 code uint8 101 op [4]uint8 102 } 103 104 const ( 105 Yxxx = iota 106 Ynone 107 Yi0 // $0 108 Yi1 // $1 109 Yu2 // $x, x fits in uint2 110 Yi8 // $x, x fits in int8 111 Yu8 // $x, x fits in uint8 112 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 113 Ys32 114 Yi32 115 Yi64 116 Yiauto 117 Yal 118 Ycl 119 Yax 120 Ycx 121 Yrb 122 Yrl 123 Yrl32 // Yrl on 32-bit system 124 Yrf 125 Yf0 126 Yrx 127 Ymb 128 Yml 129 Ym 130 Ybr 131 Ycs 132 Yss 133 Yds 134 Yes 135 Yfs 136 Ygs 137 Ygdtr 138 Yidtr 139 Yldtr 140 Ymsw 141 Ytask 142 Ycr0 143 Ycr1 144 Ycr2 145 Ycr3 146 Ycr4 147 Ycr5 148 Ycr6 149 Ycr7 150 Ycr8 151 Ydr0 152 Ydr1 153 Ydr2 154 Ydr3 155 Ydr4 156 Ydr5 157 Ydr6 158 Ydr7 159 Ytr0 160 Ytr1 161 Ytr2 162 Ytr3 163 Ytr4 164 Ytr5 165 Ytr6 166 Ytr7 167 Ymr 168 Ymm 169 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 170 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 171 Yxr // X0..X15 172 YxrEvex // X0..X31 173 Yxm 174 YxmEvex // YxrEvex+Ym 175 Yxvm // VSIB vector array; vm32x/vm64x 176 YxvmEvex // Yxvm which permits High-16 X register as index. 177 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 178 Yyr // Y0..Y15 179 YyrEvex // Y0..Y31 180 Yym 181 YymEvex // YyrEvex+Ym 182 Yyvm // VSIB vector array; vm32y/vm64y 183 YyvmEvex // Yyvm which permits High-16 Y register as index. 184 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 185 Yzr // Z0..Z31 186 Yzm // Yzr+Ym 187 Yzvm // VSIB vector array; vm32z/vm64z 188 Yk0 // K0 189 Yknot0 // K1..K7; write mask 190 Yk // K0..K7; used for KOP 191 Ykm // Yk+Ym; used for KOP 192 Ytls 193 Ytextsize 194 Yindir 195 Ymax 196 ) 197 198 const ( 199 Zxxx = iota 200 Zlit 201 Zlitm_r 202 Zlitr_m 203 Zlit_m_r 204 Z_rp 205 Zbr 206 Zcall 207 Zcallcon 208 Zcallduff 209 Zcallind 210 Zcallindreg 211 Zib_ 212 Zib_rp 213 Zibo_m 214 Zibo_m_xm 215 Zil_ 216 Zil_rp 217 Ziq_rp 218 Zilo_m 219 Zjmp 220 Zjmpcon 221 Zloop 222 Zo_iw 223 Zm_o 224 Zm_r 225 Z_m_r 226 Zm2_r 227 Zm_r_xm 228 Zm_r_i_xm 229 Zm_r_xm_nr 230 Zr_m_xm_nr 231 Zibm_r // mmx1,mmx2/mem64,imm8 232 Zibr_m 233 Zmb_r 234 Zaut_r 235 Zo_m 236 Zo_m64 237 Zpseudo 238 Zr_m 239 Zr_m_xm 240 Zrp_ 241 Z_ib 242 Z_il 243 Zm_ibo 244 Zm_ilo 245 Zib_rr 246 Zil_rr 247 Zbyte 248 249 Zvex_rm_v_r 250 Zvex_rm_v_ro 251 Zvex_r_v_rm 252 Zvex_i_rm_vo 253 Zvex_v_rm_r 254 Zvex_i_rm_r 255 Zvex_i_r_v 256 Zvex_i_rm_v_r 257 Zvex 258 Zvex_rm_r_vo 259 Zvex_i_r_rm 260 Zvex_hr_rm_v_r 261 262 Zevex_first 263 Zevex_i_r_k_rm 264 Zevex_i_r_rm 265 Zevex_i_rm_k_r 266 Zevex_i_rm_k_vo 267 Zevex_i_rm_r 268 Zevex_i_rm_v_k_r 269 Zevex_i_rm_v_r 270 Zevex_i_rm_vo 271 Zevex_k_rmo 272 Zevex_r_k_rm 273 Zevex_r_v_k_rm 274 Zevex_r_v_rm 275 Zevex_rm_k_r 276 Zevex_rm_v_k_r 277 Zevex_rm_v_r 278 Zevex_last 279 280 Zmax 281 ) 282 283 const ( 284 Px = 0 285 Px1 = 1 // symbolic; exact value doesn't matter 286 P32 = 0x32 // 32-bit only 287 Pe = 0x66 // operand escape 288 Pm = 0x0f // 2byte opcode escape 289 Pq = 0xff // both escapes: 66 0f 290 Pb = 0xfe // byte operands 291 Pf2 = 0xf2 // xmm escape 1: f2 0f 292 Pf3 = 0xf3 // xmm escape 2: f3 0f 293 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 294 Pq3 = 0x67 // xmm escape 3: 66 48 0f 295 Pq4 = 0x68 // xmm escape 4: 66 0F 38 296 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 297 Pq5 = 0x6a // xmm escape 5: F3 0F 38 298 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 299 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 300 Pw = 0x48 // Rex.w 301 Pw8 = 0x90 // symbolic; exact value doesn't matter 302 Py = 0x80 // defaults to 64-bit mode 303 Py1 = 0x81 // symbolic; exact value doesn't matter 304 Py3 = 0x83 // symbolic; exact value doesn't matter 305 Pavx = 0x84 // symbolic: exact value doesn't matter 306 307 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 308 Rxw = 1 << 3 // =1, 64-bit operand size 309 Rxr = 1 << 2 // extend modrm reg 310 Rxx = 1 << 1 // extend sib index 311 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 312 ) 313 314 const ( 315 // Encoding for VEX prefix in tables. 316 // The P, L, and W fields are chosen to match 317 // their eventual locations in the VEX prefix bytes. 318 319 // Encoding for VEX prefix in tables. 320 // The P, L, and W fields are chosen to match 321 // their eventual locations in the VEX prefix bytes. 322 323 // Using spare bit to make leading [E]VEX encoding byte different from 324 // 0x0f even if all other VEX fields are 0. 325 avxEscape = 1 << 6 326 327 // P field - 2 bits 328 vex66 = 1 << 0 329 vexF3 = 2 << 0 330 vexF2 = 3 << 0 331 // L field - 1 bit 332 vexLZ = 0 << 2 333 vexLIG = 0 << 2 334 vex128 = 0 << 2 335 vex256 = 1 << 2 336 // W field - 1 bit 337 vexWIG = 0 << 7 338 vexW0 = 0 << 7 339 vexW1 = 1 << 7 340 // M field - 5 bits, but mostly reserved; we can store up to 3 341 vex0F = 1 << 3 342 vex0F38 = 2 << 3 343 vex0F3A = 3 << 3 344 ) 345 346 var ycover [Ymax * Ymax]uint8 347 348 var reg [MAXREG]int 349 350 var regrex [MAXREG + 1]int 351 352 var ynone = []ytab{ 353 {Zlit, 1, argList{}}, 354 } 355 356 var ytext = []ytab{ 357 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 358 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 359 } 360 361 var ynop = []ytab{ 362 {Zpseudo, 0, argList{}}, 363 {Zpseudo, 0, argList{Yiauto}}, 364 {Zpseudo, 0, argList{Yml}}, 365 {Zpseudo, 0, argList{Yrf}}, 366 {Zpseudo, 0, argList{Yxr}}, 367 {Zpseudo, 0, argList{Yiauto}}, 368 {Zpseudo, 0, argList{Yml}}, 369 {Zpseudo, 0, argList{Yrf}}, 370 {Zpseudo, 1, argList{Yxr}}, 371 } 372 373 var yfuncdata = []ytab{ 374 {Zpseudo, 0, argList{Yi32, Ym}}, 375 } 376 377 var ypcdata = []ytab{ 378 {Zpseudo, 0, argList{Yi32, Yi32}}, 379 } 380 381 var yxorb = []ytab{ 382 {Zib_, 1, argList{Yi32, Yal}}, 383 {Zibo_m, 2, argList{Yi32, Ymb}}, 384 {Zr_m, 1, argList{Yrb, Ymb}}, 385 {Zm_r, 1, argList{Ymb, Yrb}}, 386 } 387 388 var yaddl = []ytab{ 389 {Zibo_m, 2, argList{Yi8, Yml}}, 390 {Zil_, 1, argList{Yi32, Yax}}, 391 {Zilo_m, 2, argList{Yi32, Yml}}, 392 {Zr_m, 1, argList{Yrl, Yml}}, 393 {Zm_r, 1, argList{Yml, Yrl}}, 394 } 395 396 var yincl = []ytab{ 397 {Z_rp, 1, argList{Yrl}}, 398 {Zo_m, 2, argList{Yml}}, 399 } 400 401 var yincq = []ytab{ 402 {Zo_m, 2, argList{Yml}}, 403 } 404 405 var ycmpb = []ytab{ 406 {Z_ib, 1, argList{Yal, Yi32}}, 407 {Zm_ibo, 2, argList{Ymb, Yi32}}, 408 {Zm_r, 1, argList{Ymb, Yrb}}, 409 {Zr_m, 1, argList{Yrb, Ymb}}, 410 } 411 412 var ycmpl = []ytab{ 413 {Zm_ibo, 2, argList{Yml, Yi8}}, 414 {Z_il, 1, argList{Yax, Yi32}}, 415 {Zm_ilo, 2, argList{Yml, Yi32}}, 416 {Zm_r, 1, argList{Yml, Yrl}}, 417 {Zr_m, 1, argList{Yrl, Yml}}, 418 } 419 420 var yshb = []ytab{ 421 {Zo_m, 2, argList{Yi1, Ymb}}, 422 {Zibo_m, 2, argList{Yu8, Ymb}}, 423 {Zo_m, 2, argList{Ycx, Ymb}}, 424 } 425 426 var yshl = []ytab{ 427 {Zo_m, 2, argList{Yi1, Yml}}, 428 {Zibo_m, 2, argList{Yu8, Yml}}, 429 {Zo_m, 2, argList{Ycl, Yml}}, 430 {Zo_m, 2, argList{Ycx, Yml}}, 431 } 432 433 var ytestl = []ytab{ 434 {Zil_, 1, argList{Yi32, Yax}}, 435 {Zilo_m, 2, argList{Yi32, Yml}}, 436 {Zr_m, 1, argList{Yrl, Yml}}, 437 {Zm_r, 1, argList{Yml, Yrl}}, 438 } 439 440 var ymovb = []ytab{ 441 {Zr_m, 1, argList{Yrb, Ymb}}, 442 {Zm_r, 1, argList{Ymb, Yrb}}, 443 {Zib_rp, 1, argList{Yi32, Yrb}}, 444 {Zibo_m, 2, argList{Yi32, Ymb}}, 445 } 446 447 var ybtl = []ytab{ 448 {Zibo_m, 2, argList{Yi8, Yml}}, 449 {Zr_m, 1, argList{Yrl, Yml}}, 450 } 451 452 var ymovw = []ytab{ 453 {Zr_m, 1, argList{Yrl, Yml}}, 454 {Zm_r, 1, argList{Yml, Yrl}}, 455 {Zil_rp, 1, argList{Yi32, Yrl}}, 456 {Zilo_m, 2, argList{Yi32, Yml}}, 457 {Zaut_r, 2, argList{Yiauto, Yrl}}, 458 } 459 460 var ymovl = []ytab{ 461 {Zr_m, 1, argList{Yrl, Yml}}, 462 {Zm_r, 1, argList{Yml, Yrl}}, 463 {Zil_rp, 1, argList{Yi32, Yrl}}, 464 {Zilo_m, 2, argList{Yi32, Yml}}, 465 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 466 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 467 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 468 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 469 {Zaut_r, 2, argList{Yiauto, Yrl}}, 470 } 471 472 var yret = []ytab{ 473 {Zo_iw, 1, argList{}}, 474 {Zo_iw, 1, argList{Yi32}}, 475 } 476 477 var ymovq = []ytab{ 478 // valid in 32-bit mode 479 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 480 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 481 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 482 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 483 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 484 485 // valid only in 64-bit mode, usually with 64-bit prefix 486 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 487 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 488 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 489 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 490 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 491 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 492 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 493 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 494 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 495 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 496 } 497 498 var ymovbe = []ytab{ 499 {Zlitm_r, 3, argList{Ym, Yrl}}, 500 {Zlitr_m, 3, argList{Yrl, Ym}}, 501 } 502 503 var ym_rl = []ytab{ 504 {Zm_r, 1, argList{Ym, Yrl}}, 505 } 506 507 var yrl_m = []ytab{ 508 {Zr_m, 1, argList{Yrl, Ym}}, 509 } 510 511 var ymb_rl = []ytab{ 512 {Zmb_r, 1, argList{Ymb, Yrl}}, 513 } 514 515 var yml_rl = []ytab{ 516 {Zm_r, 1, argList{Yml, Yrl}}, 517 } 518 519 var yrl_ml = []ytab{ 520 {Zr_m, 1, argList{Yrl, Yml}}, 521 } 522 523 var yml_mb = []ytab{ 524 {Zr_m, 1, argList{Yrb, Ymb}}, 525 {Zm_r, 1, argList{Ymb, Yrb}}, 526 } 527 528 var yrb_mb = []ytab{ 529 {Zr_m, 1, argList{Yrb, Ymb}}, 530 } 531 532 var yxchg = []ytab{ 533 {Z_rp, 1, argList{Yax, Yrl}}, 534 {Zrp_, 1, argList{Yrl, Yax}}, 535 {Zr_m, 1, argList{Yrl, Yml}}, 536 {Zm_r, 1, argList{Yml, Yrl}}, 537 } 538 539 var ydivl = []ytab{ 540 {Zm_o, 2, argList{Yml}}, 541 } 542 543 var ydivb = []ytab{ 544 {Zm_o, 2, argList{Ymb}}, 545 } 546 547 var yimul = []ytab{ 548 {Zm_o, 2, argList{Yml}}, 549 {Zib_rr, 1, argList{Yi8, Yrl}}, 550 {Zil_rr, 1, argList{Yi32, Yrl}}, 551 {Zm_r, 2, argList{Yml, Yrl}}, 552 } 553 554 var yimul3 = []ytab{ 555 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 556 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 557 } 558 559 var ybyte = []ytab{ 560 {Zbyte, 1, argList{Yi64}}, 561 } 562 563 var yin = []ytab{ 564 {Zib_, 1, argList{Yi32}}, 565 {Zlit, 1, argList{}}, 566 } 567 568 var yint = []ytab{ 569 {Zib_, 1, argList{Yi32}}, 570 } 571 572 var ypushl = []ytab{ 573 {Zrp_, 1, argList{Yrl}}, 574 {Zm_o, 2, argList{Ym}}, 575 {Zib_, 1, argList{Yi8}}, 576 {Zil_, 1, argList{Yi32}}, 577 } 578 579 var ypopl = []ytab{ 580 {Z_rp, 1, argList{Yrl}}, 581 {Zo_m, 2, argList{Ym}}, 582 } 583 584 var ywrfsbase = []ytab{ 585 {Zm_o, 2, argList{Yrl}}, 586 } 587 588 var yrdrand = []ytab{ 589 {Zo_m, 2, argList{Yrl}}, 590 } 591 592 var yclflush = []ytab{ 593 {Zo_m, 2, argList{Ym}}, 594 } 595 596 var ybswap = []ytab{ 597 {Z_rp, 2, argList{Yrl}}, 598 } 599 600 var yscond = []ytab{ 601 {Zo_m, 2, argList{Ymb}}, 602 } 603 604 var yjcond = []ytab{ 605 {Zbr, 0, argList{Ybr}}, 606 {Zbr, 0, argList{Yi0, Ybr}}, 607 {Zbr, 1, argList{Yi1, Ybr}}, 608 } 609 610 var yloop = []ytab{ 611 {Zloop, 1, argList{Ybr}}, 612 } 613 614 var ycall = []ytab{ 615 {Zcallindreg, 0, argList{Yml}}, 616 {Zcallindreg, 2, argList{Yrx, Yrx}}, 617 {Zcallind, 2, argList{Yindir}}, 618 {Zcall, 0, argList{Ybr}}, 619 {Zcallcon, 1, argList{Yi32}}, 620 } 621 622 var yduff = []ytab{ 623 {Zcallduff, 1, argList{Yi32}}, 624 } 625 626 var yjmp = []ytab{ 627 {Zo_m64, 2, argList{Yml}}, 628 {Zjmp, 0, argList{Ybr}}, 629 {Zjmpcon, 1, argList{Yi32}}, 630 } 631 632 var yfmvd = []ytab{ 633 {Zm_o, 2, argList{Ym, Yf0}}, 634 {Zo_m, 2, argList{Yf0, Ym}}, 635 {Zm_o, 2, argList{Yrf, Yf0}}, 636 {Zo_m, 2, argList{Yf0, Yrf}}, 637 } 638 639 var yfmvdp = []ytab{ 640 {Zo_m, 2, argList{Yf0, Ym}}, 641 {Zo_m, 2, argList{Yf0, Yrf}}, 642 } 643 644 var yfmvf = []ytab{ 645 {Zm_o, 2, argList{Ym, Yf0}}, 646 {Zo_m, 2, argList{Yf0, Ym}}, 647 } 648 649 var yfmvx = []ytab{ 650 {Zm_o, 2, argList{Ym, Yf0}}, 651 } 652 653 var yfmvp = []ytab{ 654 {Zo_m, 2, argList{Yf0, Ym}}, 655 } 656 657 var yfcmv = []ytab{ 658 {Zm_o, 2, argList{Yrf, Yf0}}, 659 } 660 661 var yfadd = []ytab{ 662 {Zm_o, 2, argList{Ym, Yf0}}, 663 {Zm_o, 2, argList{Yrf, Yf0}}, 664 {Zo_m, 2, argList{Yf0, Yrf}}, 665 } 666 667 var yfxch = []ytab{ 668 {Zo_m, 2, argList{Yf0, Yrf}}, 669 {Zm_o, 2, argList{Yrf, Yf0}}, 670 } 671 672 var ycompp = []ytab{ 673 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 674 } 675 676 var ystsw = []ytab{ 677 {Zo_m, 2, argList{Ym}}, 678 {Zlit, 1, argList{Yax}}, 679 } 680 681 var ysvrs_mo = []ytab{ 682 {Zm_o, 2, argList{Ym}}, 683 } 684 685 // unaryDst version of "ysvrs_mo". 686 var ysvrs_om = []ytab{ 687 {Zo_m, 2, argList{Ym}}, 688 } 689 690 var ymm = []ytab{ 691 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 692 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 693 } 694 695 var yxm = []ytab{ 696 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 697 } 698 699 var yxm_q4 = []ytab{ 700 {Zm_r, 1, argList{Yxm, Yxr}}, 701 } 702 703 var yxcvm1 = []ytab{ 704 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 705 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 706 } 707 708 var yxcvm2 = []ytab{ 709 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 710 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 711 } 712 713 var yxr = []ytab{ 714 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 715 } 716 717 var yxr_ml = []ytab{ 718 {Zr_m_xm, 1, argList{Yxr, Yml}}, 719 } 720 721 var ymr = []ytab{ 722 {Zm_r, 1, argList{Ymr, Ymr}}, 723 } 724 725 var ymr_ml = []ytab{ 726 {Zr_m_xm, 1, argList{Ymr, Yml}}, 727 } 728 729 var yxcmpi = []ytab{ 730 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 731 } 732 733 var yxmov = []ytab{ 734 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 735 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 736 } 737 738 var yxcvfl = []ytab{ 739 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 740 } 741 742 var yxcvlf = []ytab{ 743 {Zm_r_xm, 1, argList{Yml, Yxr}}, 744 } 745 746 var yxcvfq = []ytab{ 747 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 748 } 749 750 var yxcvqf = []ytab{ 751 {Zm_r_xm, 2, argList{Yml, Yxr}}, 752 } 753 754 var yps = []ytab{ 755 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 756 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 757 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 758 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 759 } 760 761 var yxrrl = []ytab{ 762 {Zm_r, 1, argList{Yxr, Yrl}}, 763 } 764 765 var ymrxr = []ytab{ 766 {Zm_r, 1, argList{Ymr, Yxr}}, 767 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 768 } 769 770 var ymshuf = []ytab{ 771 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 772 } 773 774 var ymshufb = []ytab{ 775 {Zm2_r, 2, argList{Yxm, Yxr}}, 776 } 777 778 // It should never have more than 1 entry, 779 // because some optab entries you opcode secuences that 780 // are longer than 2 bytes (zoffset=2 here), 781 // ROUNDPD and ROUNDPS and recently added BLENDPD, 782 // to name a few. 783 var yxshuf = []ytab{ 784 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 785 } 786 787 var yextrw = []ytab{ 788 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 789 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 790 } 791 792 var yextr = []ytab{ 793 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 794 } 795 796 var yinsrw = []ytab{ 797 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 798 } 799 800 var yinsr = []ytab{ 801 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 802 } 803 804 var ypsdq = []ytab{ 805 {Zibo_m, 2, argList{Yi8, Yxr}}, 806 } 807 808 var ymskb = []ytab{ 809 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 810 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 811 } 812 813 var ycrc32l = []ytab{ 814 {Zlitm_r, 0, argList{Yml, Yrl}}, 815 } 816 817 var ycrc32b = []ytab{ 818 {Zlitm_r, 0, argList{Ymb, Yrl}}, 819 } 820 821 var yprefetch = []ytab{ 822 {Zm_o, 2, argList{Ym}}, 823 } 824 825 var yaes = []ytab{ 826 {Zlitm_r, 2, argList{Yxm, Yxr}}, 827 } 828 829 var yxbegin = []ytab{ 830 {Zjmp, 1, argList{Ybr}}, 831 } 832 833 var yxabort = []ytab{ 834 {Zib_, 1, argList{Yu8}}, 835 } 836 837 var ylddqu = []ytab{ 838 {Zm_r, 1, argList{Ym, Yxr}}, 839 } 840 841 var ypalignr = []ytab{ 842 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 843 } 844 845 var ysha256rnds2 = []ytab{ 846 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 847 } 848 849 var yblendvpd = []ytab{ 850 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 851 } 852 853 var ymmxmm0f38 = []ytab{ 854 {Zlitm_r, 3, argList{Ymm, Ymr}}, 855 {Zlitm_r, 5, argList{Yxm, Yxr}}, 856 } 857 858 var yextractps = []ytab{ 859 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 860 } 861 862 var ysha1rnds4 = []ytab{ 863 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 864 } 865 866 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 867 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 868 // to find the entry with the given p.As and then looks through the ytable for 869 // that instruction (the second field in the optab struct) for a line whose 870 // first two values match the Ytypes of the p.From and p.To operands. The 871 // function oclass computes the specific Ytype of an operand and then the set 872 // of more general Ytypes that it satisfies is implied by the ycover table, set 873 // up in instinit. For example, oclass distinguishes the constants 0 and 1 874 // from the more general 8-bit constants, but instinit says 875 // 876 // ycover[Yi0*Ymax+Ys32] = 1 877 // ycover[Yi1*Ymax+Ys32] = 1 878 // ycover[Yi8*Ymax+Ys32] = 1 879 // 880 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 881 // if that's what an instruction can handle. 882 // 883 // In parallel with the scan through the ytable for the appropriate line, there 884 // is a z pointer that starts out pointing at the strange magic byte list in 885 // the Optab struct. With each step past a non-matching ytable line, z 886 // advances by the 4th entry in the line. When a matching line is found, that 887 // z pointer has the extra data to use in laying down the instruction bytes. 888 // The actual bytes laid down are a function of the 3rd entry in the line (that 889 // is, the Ztype) and the z bytes. 890 // 891 // For example, let's look at AADDL. The optab line says: 892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893 // 894 // and yaddl says 895 // var yaddl = []ytab{ 896 // {Yi8, Ynone, Yml, Zibo_m, 2}, 897 // {Yi32, Ynone, Yax, Zil_, 1}, 898 // {Yi32, Ynone, Yml, Zilo_m, 2}, 899 // {Yrl, Ynone, Yml, Zr_m, 1}, 900 // {Yml, Ynone, Yrl, Zm_r, 1}, 901 // } 902 // 903 // so there are 5 possible types of ADDL instruction that can be laid down, and 904 // possible states used to lay them down (Ztype and z pointer, assuming z 905 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 906 // 907 // Yi8, Yml -> Zibo_m, z (0x83, 00) 908 // Yi32, Yax -> Zil_, z+2 (0x05) 909 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 910 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 911 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 912 // 913 // The Pconstant in the optab line controls the prefix bytes to emit. That's 914 // relatively straightforward as this program goes. 915 // 916 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 917 // example, is an opcode byte (z[0]) then an asmando (which is some kind of 918 // encoded addressing mode for the Yml arg), and then a single immediate byte. 919 // Zilo_m is the same but a long (32-bit) immediate. 920 var optab = 921 // as, ytab, andproto, opcode 922 [...]Optab{ 923 {obj.AXXX, nil, 0, opBytes{}}, 924 {AAAA, ynone, P32, opBytes{0x37}}, 925 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 926 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 927 {AAAS, ynone, P32, opBytes{0x3f}}, 928 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 929 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 930 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 933 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 934 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 935 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 936 {AADDPD, yxm, Pq, opBytes{0x58}}, 937 {AADDPS, yxm, Pm, opBytes{0x58}}, 938 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 939 {AADDSD, yxm, Pf2, opBytes{0x58}}, 940 {AADDSS, yxm, Pf3, opBytes{0x58}}, 941 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 942 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 943 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 944 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 945 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 946 {AADJSP, nil, 0, opBytes{}}, 947 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 948 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 949 {AANDNPD, yxm, Pq, opBytes{0x55}}, 950 {AANDNPS, yxm, Pm, opBytes{0x55}}, 951 {AANDPD, yxm, Pq, opBytes{0x54}}, 952 {AANDPS, yxm, Pm, opBytes{0x54}}, 953 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 954 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AARPL, yrl_ml, P32, opBytes{0x63}}, 956 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 957 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 958 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 959 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 960 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 961 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 962 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 963 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 964 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 965 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 966 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 967 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 968 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 969 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 970 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 971 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 972 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 973 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 974 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 975 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 976 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 977 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 978 {ABYTE, ybyte, Px, opBytes{1}}, 979 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 980 {ACBW, ynone, Pe, opBytes{0x98}}, 981 {ACDQ, ynone, Px, opBytes{0x99}}, 982 {ACDQE, ynone, Pw, opBytes{0x98}}, 983 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 984 {ACLC, ynone, Px, opBytes{0xf8}}, 985 {ACLD, ynone, Px, opBytes{0xfc}}, 986 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 987 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 988 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 989 {ACLI, ynone, Px, opBytes{0xfa}}, 990 {ACLTS, ynone, Pm, opBytes{0x06}}, 991 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 992 {ACMC, ynone, Px, opBytes{0xf5}}, 993 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 994 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 995 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 996 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 997 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 998 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 999 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1000 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1001 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1002 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1003 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1004 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1005 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1006 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1007 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1008 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1009 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1010 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1011 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1012 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1013 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1014 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1015 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1016 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1017 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1018 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1019 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1020 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1021 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1022 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1023 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1024 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1025 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1026 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1027 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1028 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1029 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1030 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1031 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1032 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1033 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1034 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1035 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1036 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1037 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1038 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1039 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1040 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1041 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1042 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1043 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1044 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1045 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1046 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1047 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1048 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1049 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1050 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1051 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1052 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1053 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1054 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1055 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1056 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1057 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1058 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1059 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1060 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1061 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1062 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1063 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1064 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1065 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1066 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1067 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1068 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1069 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1070 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1071 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1072 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1073 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1074 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1075 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1076 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1077 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1078 {ACWD, ynone, Pe, opBytes{0x99}}, 1079 {ACWDE, ynone, Px, opBytes{0x98}}, 1080 {ACQO, ynone, Pw, opBytes{0x99}}, 1081 {ADAA, ynone, P32, opBytes{0x27}}, 1082 {ADAS, ynone, P32, opBytes{0x2f}}, 1083 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1084 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1085 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1086 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1087 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1088 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1089 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1090 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1091 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1092 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1093 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1094 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1095 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1096 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1097 {AEMMS, ynone, Pm, opBytes{0x77}}, 1098 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1099 {AENTER, nil, 0, opBytes{}}, // botch 1100 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1101 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1102 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1103 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1104 {AHLT, ynone, Px, opBytes{0xf4}}, 1105 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1106 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1107 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1108 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1109 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1110 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1111 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1114 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1117 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1118 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1119 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1120 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1121 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1122 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1123 {AINSB, ynone, Pb, opBytes{0x6c}}, 1124 {AINSL, ynone, Px, opBytes{0x6d}}, 1125 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1126 {AINSW, ynone, Pe, opBytes{0x6d}}, 1127 {AICEBP, ynone, Px, opBytes{0xf1}}, 1128 {AINT, yint, Px, opBytes{0xcd}}, 1129 {AINTO, ynone, P32, opBytes{0xce}}, 1130 {AIRETL, ynone, Px, opBytes{0xcf}}, 1131 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1132 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1133 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1134 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1135 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1136 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1137 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1138 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1139 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1140 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1141 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1142 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1143 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1144 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1145 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1146 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1147 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1148 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1149 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1150 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1151 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1152 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1153 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1154 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1155 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1156 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1157 {ALAHF, ynone, Px, opBytes{0x9f}}, 1158 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1159 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1160 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1161 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1162 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1163 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1164 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1165 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1166 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1167 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1168 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1169 {ALOCK, ynone, Px, opBytes{0xf0}}, 1170 {ALODSB, ynone, Pb, opBytes{0xac}}, 1171 {ALODSL, ynone, Px, opBytes{0xad}}, 1172 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1173 {ALODSW, ynone, Pe, opBytes{0xad}}, 1174 {ALONG, ybyte, Px, opBytes{4}}, 1175 {ALOOP, yloop, Px, opBytes{0xe2}}, 1176 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1177 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1178 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1179 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1180 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1181 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1182 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1183 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1184 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1185 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1186 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1187 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1188 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1189 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1190 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1191 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1192 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1193 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1194 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1195 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1196 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1197 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1198 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1199 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1200 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1201 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1202 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1203 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1204 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1205 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1206 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1207 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1208 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1209 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1210 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1211 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1212 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1213 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1214 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1215 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1216 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1217 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1218 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1219 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1220 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1221 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1222 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1223 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1224 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1225 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1226 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1227 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1228 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1229 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1230 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1231 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1232 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1233 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1234 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1235 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1236 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1237 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1238 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1239 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1240 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1241 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1242 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1243 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1244 {AMULPD, yxm, Pe, opBytes{0x59}}, 1245 {AMULPS, yxm, Ym, opBytes{0x59}}, 1246 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1247 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1248 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1249 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1250 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1251 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1252 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1253 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1254 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1255 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1256 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1257 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1258 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1259 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1260 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1261 {AORPD, yxm, Pq, opBytes{0x56}}, 1262 {AORPS, yxm, Pm, opBytes{0x56}}, 1263 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1264 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1265 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1266 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1267 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1268 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1269 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1270 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1271 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1272 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1273 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1274 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1275 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1276 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1277 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1278 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1279 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1280 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1281 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1282 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1283 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1284 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1285 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1286 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1287 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1288 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1289 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1290 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1291 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1292 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1293 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1294 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1295 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1296 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1297 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1298 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1299 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1300 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1301 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1302 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1303 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1304 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1305 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1306 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1307 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1308 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1309 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1310 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1311 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1312 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1313 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1314 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1315 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1316 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1317 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1318 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1319 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1320 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1321 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1322 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1323 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1324 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1325 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1326 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1327 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1328 {APMINSW, yxm, Pe, opBytes{0xea}}, 1329 {APMINUB, yxm, Pe, opBytes{0xda}}, 1330 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1331 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1332 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1333 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1334 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1335 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1336 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1337 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1338 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1339 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1340 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1341 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1342 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1343 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1344 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1345 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1346 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1347 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1348 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1349 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1350 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1351 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1352 {APOPAL, ynone, P32, opBytes{0x61}}, 1353 {APOPAW, ynone, Pe, opBytes{0x61}}, 1354 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1355 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1356 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1357 {APOPFL, ynone, P32, opBytes{0x9d}}, 1358 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1359 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1360 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1361 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1362 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1363 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1364 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1365 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1366 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1367 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1368 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1369 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1370 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1371 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1372 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1373 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1374 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1375 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1376 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1377 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1378 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1379 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1380 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1381 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1382 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1383 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1384 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1385 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1386 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1387 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1388 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1389 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1390 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1391 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1392 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1393 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1394 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1395 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1396 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1397 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1398 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1399 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1400 {APUSHAL, ynone, P32, opBytes{0x60}}, 1401 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1402 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1403 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1404 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1405 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1406 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1409 {AQUAD, ybyte, Px, opBytes{8}}, 1410 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1411 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1412 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1415 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1416 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1417 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1418 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {AREP, ynone, Px, opBytes{0xf3}}, 1421 {AREPN, ynone, Px, opBytes{0xf2}}, 1422 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1423 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1424 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1425 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1426 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1427 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1428 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1431 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1432 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1435 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1436 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1437 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1438 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1439 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1442 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1443 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1446 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1447 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASCASB, ynone, Pb, opBytes{0xae}}, 1450 {ASCASL, ynone, Px, opBytes{0xaf}}, 1451 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1452 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1453 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1454 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1455 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1456 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1457 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1458 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1459 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1460 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1461 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1462 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1463 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1464 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1465 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1466 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1467 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1468 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1469 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1470 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1471 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1474 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1475 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1478 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1479 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1480 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1481 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1482 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1483 {ASTC, ynone, Px, opBytes{0xf9}}, 1484 {ASTD, ynone, Px, opBytes{0xfd}}, 1485 {ASTI, ynone, Px, opBytes{0xfb}}, 1486 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1487 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1488 {ASTOSL, ynone, Px, opBytes{0xab}}, 1489 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1490 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1491 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1492 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1493 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1494 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1495 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1496 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1497 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1498 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1499 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1500 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1501 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1502 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1503 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1506 {obj.ATEXT, ytext, Px, opBytes{}}, 1507 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1508 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1509 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1510 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1511 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1512 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1513 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1514 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1515 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1516 {AWAIT, ynone, Px, opBytes{0x9b}}, 1517 {AWORD, ybyte, Px, opBytes{2}}, 1518 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1519 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1520 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1521 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXLAT, ynone, Px, opBytes{0xd7}}, 1523 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1524 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1525 {AXORPD, yxm, Pe, opBytes{0x57}}, 1526 {AXORPS, yxm, Pm, opBytes{0x57}}, 1527 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1528 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1529 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1530 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1531 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1532 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1533 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1534 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1535 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1536 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1537 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1538 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1539 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1540 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1541 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1542 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1543 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1544 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1545 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1546 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1547 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1548 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1549 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1550 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1551 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1552 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1553 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1554 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1555 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1556 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1557 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1558 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1559 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1560 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1561 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1562 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1563 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1564 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1565 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1566 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1567 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1568 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1569 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1570 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1571 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1572 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1573 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1574 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1575 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1576 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1577 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1578 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1579 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1580 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1581 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1582 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1583 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1584 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1585 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1586 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1587 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1588 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1589 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1590 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1591 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1592 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1593 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1594 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1595 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1596 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1597 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1598 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1599 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1600 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1601 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1602 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1603 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1604 {AFFREE, nil, 0, opBytes{}}, 1605 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1606 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1607 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1608 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1609 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1610 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1611 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1612 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1613 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1614 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1615 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1616 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1617 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1618 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1619 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1620 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1621 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1622 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1623 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1624 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1625 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1626 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1627 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1628 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1629 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1630 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1631 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1632 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1633 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1634 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1635 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1636 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1637 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1638 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1639 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1640 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1641 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1642 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1643 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1644 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1645 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1646 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1647 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1649 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1650 {AINVD, ynone, Pm, opBytes{0x08}}, 1651 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1652 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1653 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1654 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1655 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1656 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1657 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1658 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1659 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1660 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1661 {ARSM, ynone, Pm, opBytes{0xaa}}, 1662 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1663 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1664 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1665 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1666 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1667 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1668 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1669 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1670 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1671 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1672 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1673 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1674 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1676 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1677 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1678 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1679 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1680 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1681 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1682 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1683 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1684 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1685 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1686 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1687 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1688 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1689 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1690 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1691 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1692 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1693 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1694 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1695 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1696 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1697 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1698 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1699 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1700 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1701 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1702 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1703 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1704 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1705 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1706 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1707 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1708 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1709 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1710 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1711 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1712 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1713 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1715 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1716 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1717 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1718 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1719 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1720 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1721 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1722 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1723 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1724 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1725 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1726 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1727 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1728 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1729 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1730 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1732 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1733 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1734 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1735 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1736 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1737 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1738 {AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1739 {AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1740 {AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1741 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1742 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1743 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1744 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1745 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1746 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1747 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1748 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1749 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1750 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1751 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1752 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1753 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1754 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1755 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1756 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1757 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1758 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1759 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1760 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1761 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1762 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1763 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1764 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1765 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1766 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1767 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1768 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1769 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1770 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1771 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1772 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1773 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1774 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1775 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1776 1777 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1778 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1779 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1780 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1781 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1782 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1783 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1784 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1785 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1786 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1787 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1788 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1789 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1790 1791 {obj.AEND, nil, 0, opBytes{}}, 1792 {0, nil, 0, opBytes{}}, 1793 } 1794 1795 var opindex [(ALAST + 1) & obj.AMask]*Optab 1796 1797 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1798 // This happens on systems like Solaris that call .so functions instead of system calls. 1799 // It does not seem to be necessary for any other systems. This is probably working 1800 // around a Solaris-specific bug that should be fixed differently, but we don't know 1801 // what that bug is. And this does fix it. 1802 func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1803 if ctxt.Headtype == objabi.Hsolaris { 1804 // All the Solaris dynamic imports from libc.so begin with "libc_". 1805 return strings.HasPrefix(s.Name, "libc_") 1806 } 1807 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1808 } 1809 1810 // single-instruction no-ops of various lengths. 1811 // constructed by hand and disassembled with gdb to verify. 1812 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1813 var nop = [][16]uint8{ 1814 {0x90}, 1815 {0x66, 0x90}, 1816 {0x0F, 0x1F, 0x00}, 1817 {0x0F, 0x1F, 0x40, 0x00}, 1818 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1819 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1820 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1821 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1822 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1823 } 1824 1825 // Native Client rejects the repeated 0x66 prefix. 1826 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1827 func fillnop(p []byte, n int) { 1828 var m int 1829 1830 for n > 0 { 1831 m = n 1832 if m > len(nop) { 1833 m = len(nop) 1834 } 1835 copy(p[:m], nop[m-1][:m]) 1836 p = p[m:] 1837 n -= m 1838 } 1839 } 1840 1841 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1842 s.Grow(int64(c) + int64(pad)) 1843 fillnop(s.P[c:], int(pad)) 1844 return c + pad 1845 } 1846 1847 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1848 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1849 return l 1850 } 1851 return q 1852 } 1853 1854 // If the environment variable GOAMD64=alignedjumps the assembler will ensure that 1855 // no standalone or macro-fused jump will straddle or end on a 32 byte boundary 1856 // by inserting NOPs before the jumps 1857 func isJump(p *obj.Prog) bool { 1858 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || 1859 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO 1860 } 1861 1862 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional 1863 // jump. Otherwise, nil is returned. 1864 func lookForJCC(p *obj.Prog) *obj.Prog { 1865 // Skip any PCDATA, FUNCDATA or NOP instructions 1866 var q *obj.Prog 1867 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { 1868 } 1869 1870 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { 1871 return nil 1872 } 1873 1874 switch q.As { 1875 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, 1876 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: 1877 default: 1878 return nil 1879 } 1880 1881 return q 1882 } 1883 1884 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction. 1885 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false. 1886 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. 1887 func fusedJump(p *obj.Prog) (bool, uint8) { 1888 var fusedSize uint8 1889 1890 // The first instruction in a macro fused pair may be preceeded by the LOCK prefix, 1891 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we 1892 // need to be careful to insert any padding before the locks rather than directly after them. 1893 1894 if p.As == AXRELEASE || p.As == AXACQUIRE { 1895 fusedSize += p.Isize 1896 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1897 } 1898 if p == nil { 1899 return false, 0 1900 } 1901 } 1902 if p.As == ALOCK { 1903 fusedSize += p.Isize 1904 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1905 } 1906 if p == nil { 1907 return false, 0 1908 } 1909 } 1910 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW 1911 1912 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || 1913 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp 1914 1915 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || 1916 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW 1917 1918 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || 1919 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW 1920 1921 if !cmpAddSub && !testAnd && !incDec { 1922 return false, 0 1923 } 1924 1925 if !incDec { 1926 var argOne obj.AddrType 1927 var argTwo obj.AddrType 1928 if cmp { 1929 argOne = p.From.Type 1930 argTwo = p.To.Type 1931 } else { 1932 argOne = p.To.Type 1933 argTwo = p.From.Type 1934 } 1935 if argOne == obj.TYPE_REG { 1936 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { 1937 return false, 0 1938 } 1939 } else if argOne == obj.TYPE_MEM { 1940 if argTwo != obj.TYPE_REG { 1941 return false, 0 1942 } 1943 } else { 1944 return false, 0 1945 } 1946 } 1947 1948 fusedSize += p.Isize 1949 jmp := lookForJCC(p) 1950 if jmp == nil { 1951 return false, 0 1952 } 1953 1954 fusedSize += jmp.Isize 1955 1956 if testAnd { 1957 return true, fusedSize 1958 } 1959 1960 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || 1961 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { 1962 return false, 0 1963 } 1964 1965 if cmpAddSub { 1966 return true, fusedSize 1967 } 1968 1969 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { 1970 return false, 0 1971 } 1972 1973 return true, fusedSize 1974 } 1975 1976 type padJumpsCtx int32 1977 1978 func makePjcCtx(ctxt *obj.Link) padJumpsCtx { 1979 // Disable jump padding on 32 bit builds by settting 1980 // padJumps to 0. 1981 if ctxt.Arch.Family == sys.I386 { 1982 return padJumpsCtx(0) 1983 } 1984 1985 // Disable jump padding for hand written assembly code. 1986 if ctxt.IsAsm { 1987 return padJumpsCtx(0) 1988 } 1989 1990 if objabi.GOAMD64 != "alignedjumps" { 1991 return padJumpsCtx(0) 1992 1993 } 1994 1995 return padJumpsCtx(32) 1996 } 1997 1998 // padJump detects whether the instruction being assembled is a standalone or a macro-fused 1999 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does 2000 // not cross or end on a 32 byte boundary. 2001 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { 2002 if pjc == 0 { 2003 return c 2004 } 2005 2006 var toPad int32 2007 fj, fjSize := fusedJump(p) 2008 mask := int32(pjc - 1) 2009 if fj { 2010 if (c&mask)+int32(fjSize) >= int32(pjc) { 2011 toPad = int32(pjc) - (c & mask) 2012 } 2013 } else if isJump(p) { 2014 if (c&mask)+int32(p.Isize) >= int32(pjc) { 2015 toPad = int32(pjc) - (c & mask) 2016 } 2017 } 2018 if toPad <= 0 { 2019 return c 2020 } 2021 2022 return noppad(ctxt, s, c, toPad) 2023 } 2024 2025 // reAssemble is called if an instruction's size changes during assembly. If 2026 // it does and the instruction is a standalone or a macro-fused jump we need to 2027 // reassemble. 2028 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { 2029 if pjc == 0 { 2030 return false 2031 } 2032 2033 fj, _ := fusedJump(p) 2034 return fj || isJump(p) 2035 } 2036 2037 type nopPad struct { 2038 p *obj.Prog // Instruction before the pad 2039 n int32 // Size of the pad 2040 } 2041 2042 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 2043 pjc := makePjcCtx(ctxt) 2044 2045 if s.P != nil { 2046 return 2047 } 2048 2049 if ycover[0] == 0 { 2050 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 2051 } 2052 2053 for p := s.Func.Text; p != nil; p = p.Link { 2054 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { 2055 p.To.SetTarget(p) 2056 } 2057 if p.As == AADJSP { 2058 p.To.Type = obj.TYPE_REG 2059 p.To.Reg = REG_SP 2060 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 2061 // One exception: It is smaller to encode $-0x80 than $0x80. 2062 // For that case, flip the sign and the op: 2063 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 2064 switch v := p.From.Offset; { 2065 case v == 0: 2066 p.As = obj.ANOP 2067 case v == 0x80 || (v < 0 && v != -0x80): 2068 p.As = spadjop(ctxt, AADDL, AADDQ) 2069 p.From.Offset *= -1 2070 default: 2071 p.As = spadjop(ctxt, ASUBL, ASUBQ) 2072 } 2073 } 2074 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { 2075 if p.To.Type != obj.TYPE_REG { 2076 ctxt.Diag("non-retpoline-compatible: %v", p) 2077 continue 2078 } 2079 p.To.Type = obj.TYPE_BRANCH 2080 p.To.Name = obj.NAME_EXTERN 2081 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) 2082 p.To.Reg = 0 2083 p.To.Offset = 0 2084 } 2085 } 2086 2087 var count int64 // rough count of number of instructions 2088 for p := s.Func.Text; p != nil; p = p.Link { 2089 count++ 2090 p.Back = branchShort // use short branches first time through 2091 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { 2092 p.Back |= branchBackwards 2093 q.Back |= branchLoopHead 2094 } 2095 } 2096 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 2097 2098 var ab AsmBuf 2099 var n int 2100 var c int32 2101 errors := ctxt.Errors 2102 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) 2103 for { 2104 // This loop continues while there are reasons to re-assemble 2105 // whole block, like the presence of long forward jumps. 2106 reAssemble := false 2107 for i := range s.R { 2108 s.R[i] = obj.Reloc{} 2109 } 2110 s.R = s.R[:0] 2111 s.P = s.P[:0] 2112 c = 0 2113 var pPrev *obj.Prog 2114 nops = nops[:0] 2115 for p := s.Func.Text; p != nil; p = p.Link { 2116 c0 := c 2117 c = pjc.padJump(ctxt, s, p, c) 2118 2119 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { 2120 // pad with NOPs 2121 v := -c & (loopAlign - 1) 2122 2123 if v <= maxLoopPad { 2124 s.Grow(int64(c) + int64(v)) 2125 fillnop(s.P[c:], int(v)) 2126 c += v 2127 } 2128 } 2129 2130 p.Pc = int64(c) 2131 2132 // process forward jumps to p 2133 for q := p.Rel; q != nil; q = q.Forwd { 2134 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 2135 if q.Back&branchShort != 0 { 2136 if v > 127 { 2137 reAssemble = true 2138 q.Back ^= branchShort 2139 } 2140 2141 if q.As == AJCXZL || q.As == AXBEGIN { 2142 s.P[q.Pc+2] = byte(v) 2143 } else { 2144 s.P[q.Pc+1] = byte(v) 2145 } 2146 } else { 2147 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 2148 } 2149 } 2150 2151 p.Rel = nil 2152 2153 p.Pc = int64(c) 2154 ab.asmins(ctxt, s, p) 2155 m := ab.Len() 2156 if int(p.Isize) != m { 2157 p.Isize = uint8(m) 2158 if pjc.reAssemble(p) { 2159 // We need to re-assemble here to check for jumps and fused jumps 2160 // that span or end on 32 byte boundaries. 2161 reAssemble = true 2162 } 2163 } 2164 2165 s.Grow(p.Pc + int64(m)) 2166 copy(s.P[p.Pc:], ab.Bytes()) 2167 // If there was padding, remember it. 2168 if pPrev != nil && !ctxt.IsAsm && c > c0 { 2169 nops = append(nops, nopPad{p: pPrev, n: c - c0}) 2170 } 2171 c += int32(m) 2172 pPrev = p 2173 } 2174 2175 n++ 2176 if n > 20 { 2177 ctxt.Diag("span must be looping") 2178 log.Fatalf("loop") 2179 } 2180 if !reAssemble { 2181 break 2182 } 2183 if ctxt.Errors > errors { 2184 return 2185 } 2186 } 2187 // splice padding nops into Progs 2188 for _, n := range nops { 2189 pp := n.p 2190 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} 2191 pp.Link = np 2192 } 2193 2194 s.Size = int64(c) 2195 2196 if false { /* debug['a'] > 1 */ 2197 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2198 var i int 2199 for i = 0; i < len(s.P); i++ { 2200 fmt.Printf(" %.2x", s.P[i]) 2201 if i%16 == 15 { 2202 fmt.Printf("\n %.6x", uint(i+1)) 2203 } 2204 } 2205 2206 if i%16 != 0 { 2207 fmt.Printf("\n") 2208 } 2209 2210 for i := 0; i < len(s.R); i++ { 2211 r := &s.R[i] 2212 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2213 } 2214 } 2215 2216 // Mark nonpreemptible instruction sequences. 2217 // The 2-instruction TLS access sequence 2218 // MOVQ TLS, BX 2219 // MOVQ 0(BX)(TLS*1), BX 2220 // is not async preemptible, as if it is preempted and resumed on 2221 // a different thread, the TLS address may become invalid. 2222 if !CanUse1InsnTLS(ctxt) { 2223 useTLS := func(p *obj.Prog) bool { 2224 // Only need to mark the second instruction, which has 2225 // REG_TLS as Index. (It is okay to interrupt and restart 2226 // the first instruction.) 2227 return p.From.Index == REG_TLS 2228 } 2229 obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil) 2230 } 2231 } 2232 2233 func instinit(ctxt *obj.Link) { 2234 if ycover[0] != 0 { 2235 // Already initialized; stop now. 2236 // This happens in the cmd/asm tests, 2237 // each of which re-initializes the arch. 2238 return 2239 } 2240 2241 switch ctxt.Headtype { 2242 case objabi.Hplan9: 2243 plan9privates = ctxt.Lookup("_privates") 2244 } 2245 2246 for i := range avxOptab { 2247 c := avxOptab[i].as 2248 if opindex[c&obj.AMask] != nil { 2249 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2250 } 2251 opindex[c&obj.AMask] = &avxOptab[i] 2252 } 2253 for i := 1; optab[i].as != 0; i++ { 2254 c := optab[i].as 2255 if opindex[c&obj.AMask] != nil { 2256 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2257 } 2258 opindex[c&obj.AMask] = &optab[i] 2259 } 2260 2261 for i := 0; i < Ymax; i++ { 2262 ycover[i*Ymax+i] = 1 2263 } 2264 2265 ycover[Yi0*Ymax+Yu2] = 1 2266 ycover[Yi1*Ymax+Yu2] = 1 2267 2268 ycover[Yi0*Ymax+Yi8] = 1 2269 ycover[Yi1*Ymax+Yi8] = 1 2270 ycover[Yu2*Ymax+Yi8] = 1 2271 ycover[Yu7*Ymax+Yi8] = 1 2272 2273 ycover[Yi0*Ymax+Yu7] = 1 2274 ycover[Yi1*Ymax+Yu7] = 1 2275 ycover[Yu2*Ymax+Yu7] = 1 2276 2277 ycover[Yi0*Ymax+Yu8] = 1 2278 ycover[Yi1*Ymax+Yu8] = 1 2279 ycover[Yu2*Ymax+Yu8] = 1 2280 ycover[Yu7*Ymax+Yu8] = 1 2281 2282 ycover[Yi0*Ymax+Ys32] = 1 2283 ycover[Yi1*Ymax+Ys32] = 1 2284 ycover[Yu2*Ymax+Ys32] = 1 2285 ycover[Yu7*Ymax+Ys32] = 1 2286 ycover[Yu8*Ymax+Ys32] = 1 2287 ycover[Yi8*Ymax+Ys32] = 1 2288 2289 ycover[Yi0*Ymax+Yi32] = 1 2290 ycover[Yi1*Ymax+Yi32] = 1 2291 ycover[Yu2*Ymax+Yi32] = 1 2292 ycover[Yu7*Ymax+Yi32] = 1 2293 ycover[Yu8*Ymax+Yi32] = 1 2294 ycover[Yi8*Ymax+Yi32] = 1 2295 ycover[Ys32*Ymax+Yi32] = 1 2296 2297 ycover[Yi0*Ymax+Yi64] = 1 2298 ycover[Yi1*Ymax+Yi64] = 1 2299 ycover[Yu7*Ymax+Yi64] = 1 2300 ycover[Yu2*Ymax+Yi64] = 1 2301 ycover[Yu8*Ymax+Yi64] = 1 2302 ycover[Yi8*Ymax+Yi64] = 1 2303 ycover[Ys32*Ymax+Yi64] = 1 2304 ycover[Yi32*Ymax+Yi64] = 1 2305 2306 ycover[Yal*Ymax+Yrb] = 1 2307 ycover[Ycl*Ymax+Yrb] = 1 2308 ycover[Yax*Ymax+Yrb] = 1 2309 ycover[Ycx*Ymax+Yrb] = 1 2310 ycover[Yrx*Ymax+Yrb] = 1 2311 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2312 2313 ycover[Ycl*Ymax+Ycx] = 1 2314 2315 ycover[Yax*Ymax+Yrx] = 1 2316 ycover[Ycx*Ymax+Yrx] = 1 2317 2318 ycover[Yax*Ymax+Yrl] = 1 2319 ycover[Ycx*Ymax+Yrl] = 1 2320 ycover[Yrx*Ymax+Yrl] = 1 2321 ycover[Yrl32*Ymax+Yrl] = 1 2322 2323 ycover[Yf0*Ymax+Yrf] = 1 2324 2325 ycover[Yal*Ymax+Ymb] = 1 2326 ycover[Ycl*Ymax+Ymb] = 1 2327 ycover[Yax*Ymax+Ymb] = 1 2328 ycover[Ycx*Ymax+Ymb] = 1 2329 ycover[Yrx*Ymax+Ymb] = 1 2330 ycover[Yrb*Ymax+Ymb] = 1 2331 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2332 ycover[Ym*Ymax+Ymb] = 1 2333 2334 ycover[Yax*Ymax+Yml] = 1 2335 ycover[Ycx*Ymax+Yml] = 1 2336 ycover[Yrx*Ymax+Yml] = 1 2337 ycover[Yrl*Ymax+Yml] = 1 2338 ycover[Yrl32*Ymax+Yml] = 1 2339 ycover[Ym*Ymax+Yml] = 1 2340 2341 ycover[Yax*Ymax+Ymm] = 1 2342 ycover[Ycx*Ymax+Ymm] = 1 2343 ycover[Yrx*Ymax+Ymm] = 1 2344 ycover[Yrl*Ymax+Ymm] = 1 2345 ycover[Yrl32*Ymax+Ymm] = 1 2346 ycover[Ym*Ymax+Ymm] = 1 2347 ycover[Ymr*Ymax+Ymm] = 1 2348 2349 ycover[Yxr0*Ymax+Yxr] = 1 2350 2351 ycover[Ym*Ymax+Yxm] = 1 2352 ycover[Yxr0*Ymax+Yxm] = 1 2353 ycover[Yxr*Ymax+Yxm] = 1 2354 2355 ycover[Ym*Ymax+Yym] = 1 2356 ycover[Yyr*Ymax+Yym] = 1 2357 2358 ycover[Yxr0*Ymax+YxrEvex] = 1 2359 ycover[Yxr*Ymax+YxrEvex] = 1 2360 2361 ycover[Ym*Ymax+YxmEvex] = 1 2362 ycover[Yxr0*Ymax+YxmEvex] = 1 2363 ycover[Yxr*Ymax+YxmEvex] = 1 2364 ycover[YxrEvex*Ymax+YxmEvex] = 1 2365 2366 ycover[Yyr*Ymax+YyrEvex] = 1 2367 2368 ycover[Ym*Ymax+YymEvex] = 1 2369 ycover[Yyr*Ymax+YymEvex] = 1 2370 ycover[YyrEvex*Ymax+YymEvex] = 1 2371 2372 ycover[Ym*Ymax+Yzm] = 1 2373 ycover[Yzr*Ymax+Yzm] = 1 2374 2375 ycover[Yk0*Ymax+Yk] = 1 2376 ycover[Yknot0*Ymax+Yk] = 1 2377 2378 ycover[Yk0*Ymax+Ykm] = 1 2379 ycover[Yknot0*Ymax+Ykm] = 1 2380 ycover[Yk*Ymax+Ykm] = 1 2381 ycover[Ym*Ymax+Ykm] = 1 2382 2383 ycover[Yxvm*Ymax+YxvmEvex] = 1 2384 2385 ycover[Yyvm*Ymax+YyvmEvex] = 1 2386 2387 for i := 0; i < MAXREG; i++ { 2388 reg[i] = -1 2389 if i >= REG_AL && i <= REG_R15B { 2390 reg[i] = (i - REG_AL) & 7 2391 if i >= REG_SPB && i <= REG_DIB { 2392 regrex[i] = 0x40 2393 } 2394 if i >= REG_R8B && i <= REG_R15B { 2395 regrex[i] = Rxr | Rxx | Rxb 2396 } 2397 } 2398 2399 if i >= REG_AH && i <= REG_BH { 2400 reg[i] = 4 + ((i - REG_AH) & 7) 2401 } 2402 if i >= REG_AX && i <= REG_R15 { 2403 reg[i] = (i - REG_AX) & 7 2404 if i >= REG_R8 { 2405 regrex[i] = Rxr | Rxx | Rxb 2406 } 2407 } 2408 2409 if i >= REG_F0 && i <= REG_F0+7 { 2410 reg[i] = (i - REG_F0) & 7 2411 } 2412 if i >= REG_M0 && i <= REG_M0+7 { 2413 reg[i] = (i - REG_M0) & 7 2414 } 2415 if i >= REG_K0 && i <= REG_K0+7 { 2416 reg[i] = (i - REG_K0) & 7 2417 } 2418 if i >= REG_X0 && i <= REG_X0+15 { 2419 reg[i] = (i - REG_X0) & 7 2420 if i >= REG_X0+8 { 2421 regrex[i] = Rxr | Rxx | Rxb 2422 } 2423 } 2424 if i >= REG_X16 && i <= REG_X16+15 { 2425 reg[i] = (i - REG_X16) & 7 2426 if i >= REG_X16+8 { 2427 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2428 } else { 2429 regrex[i] = RxrEvex 2430 } 2431 } 2432 if i >= REG_Y0 && i <= REG_Y0+15 { 2433 reg[i] = (i - REG_Y0) & 7 2434 if i >= REG_Y0+8 { 2435 regrex[i] = Rxr | Rxx | Rxb 2436 } 2437 } 2438 if i >= REG_Y16 && i <= REG_Y16+15 { 2439 reg[i] = (i - REG_Y16) & 7 2440 if i >= REG_Y16+8 { 2441 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2442 } else { 2443 regrex[i] = RxrEvex 2444 } 2445 } 2446 if i >= REG_Z0 && i <= REG_Z0+15 { 2447 reg[i] = (i - REG_Z0) & 7 2448 if i > REG_Z0+7 { 2449 regrex[i] = Rxr | Rxx | Rxb 2450 } 2451 } 2452 if i >= REG_Z16 && i <= REG_Z16+15 { 2453 reg[i] = (i - REG_Z16) & 7 2454 if i >= REG_Z16+8 { 2455 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2456 } else { 2457 regrex[i] = RxrEvex 2458 } 2459 } 2460 2461 if i >= REG_CR+8 && i <= REG_CR+15 { 2462 regrex[i] = Rxr 2463 } 2464 } 2465 } 2466 2467 var isAndroid = objabi.GOOS == "android" 2468 2469 func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2470 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2471 return 0 2472 } 2473 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2474 switch a.Reg { 2475 case REG_CS: 2476 return 0x2e 2477 2478 case REG_DS: 2479 return 0x3e 2480 2481 case REG_ES: 2482 return 0x26 2483 2484 case REG_FS: 2485 return 0x64 2486 2487 case REG_GS: 2488 return 0x65 2489 2490 case REG_TLS: 2491 // NOTE: Systems listed here should be only systems that 2492 // support direct TLS references like 8(TLS) implemented as 2493 // direct references from FS or GS. Systems that require 2494 // the initial-exec model, where you load the TLS base into 2495 // a register and then index from that register, do not reach 2496 // this code and should not be listed. 2497 if ctxt.Arch.Family == sys.I386 { 2498 switch ctxt.Headtype { 2499 default: 2500 if isAndroid { 2501 return 0x65 // GS 2502 } 2503 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2504 2505 case objabi.Hdarwin, 2506 objabi.Hdragonfly, 2507 objabi.Hfreebsd, 2508 objabi.Hnetbsd, 2509 objabi.Hopenbsd: 2510 return 0x65 // GS 2511 } 2512 } 2513 2514 switch ctxt.Headtype { 2515 default: 2516 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2517 2518 case objabi.Hlinux: 2519 if isAndroid { 2520 return 0x64 // FS 2521 } 2522 2523 if ctxt.Flag_shared { 2524 log.Fatalf("unknown TLS base register for linux with -shared") 2525 } else { 2526 return 0x64 // FS 2527 } 2528 2529 case objabi.Hdragonfly, 2530 objabi.Hfreebsd, 2531 objabi.Hnetbsd, 2532 objabi.Hopenbsd, 2533 objabi.Hsolaris: 2534 return 0x64 // FS 2535 2536 case objabi.Hdarwin: 2537 return 0x65 // GS 2538 } 2539 } 2540 } 2541 2542 if ctxt.Arch.Family == sys.I386 { 2543 if a.Index == REG_TLS && ctxt.Flag_shared { 2544 // When building for inclusion into a shared library, an instruction of the form 2545 // MOVL off(CX)(TLS*1), AX 2546 // becomes 2547 // mov %gs:off(%ecx), %eax 2548 // which assumes that the correct TLS offset has been loaded into %ecx (today 2549 // there is only one TLS variable -- g -- so this is OK). When not building for 2550 // a shared library the instruction it becomes 2551 // mov 0x0(%ecx), %eax 2552 // and a R_TLS_LE relocation, and so does not require a prefix. 2553 return 0x65 // GS 2554 } 2555 return 0 2556 } 2557 2558 switch a.Index { 2559 case REG_CS: 2560 return 0x2e 2561 2562 case REG_DS: 2563 return 0x3e 2564 2565 case REG_ES: 2566 return 0x26 2567 2568 case REG_TLS: 2569 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2570 // When building for inclusion into a shared library, an instruction of the form 2571 // MOV off(CX)(TLS*1), AX 2572 // becomes 2573 // mov %fs:off(%rcx), %rax 2574 // which assumes that the correct TLS offset has been loaded into %rcx (today 2575 // there is only one TLS variable -- g -- so this is OK). When not building for 2576 // a shared library the instruction does not require a prefix. 2577 return 0x64 2578 } 2579 2580 case REG_FS: 2581 return 0x64 2582 2583 case REG_GS: 2584 return 0x65 2585 } 2586 2587 return 0 2588 } 2589 2590 // oclassRegList returns multisource operand class for addr. 2591 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2592 // TODO(quasilyte): when oclass register case is refactored into 2593 // lookup table, use it here to get register kind more easily. 2594 // Helper functions like regIsXmm should go away too (they will become redundant). 2595 2596 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2597 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2598 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2599 2600 reg0, reg1 := decodeRegisterRange(addr.Offset) 2601 low := regIndex(int16(reg0)) 2602 high := regIndex(int16(reg1)) 2603 2604 if ctxt.Arch.Family == sys.I386 { 2605 if low >= 8 || high >= 8 { 2606 return Yxxx 2607 } 2608 } 2609 2610 switch high - low { 2611 case 3: 2612 switch { 2613 case regIsXmm(reg0) && regIsXmm(reg1): 2614 return YxrEvexMulti4 2615 case regIsYmm(reg0) && regIsYmm(reg1): 2616 return YyrEvexMulti4 2617 case regIsZmm(reg0) && regIsZmm(reg1): 2618 return YzrMulti4 2619 default: 2620 return Yxxx 2621 } 2622 default: 2623 return Yxxx 2624 } 2625 } 2626 2627 // oclassVMem returns V-mem (vector memory with VSIB) operand class. 2628 // For addr that is not V-mem returns (Yxxx, false). 2629 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2630 switch addr.Index { 2631 case REG_X0 + 0, 2632 REG_X0 + 1, 2633 REG_X0 + 2, 2634 REG_X0 + 3, 2635 REG_X0 + 4, 2636 REG_X0 + 5, 2637 REG_X0 + 6, 2638 REG_X0 + 7: 2639 return Yxvm, true 2640 case REG_X8 + 0, 2641 REG_X8 + 1, 2642 REG_X8 + 2, 2643 REG_X8 + 3, 2644 REG_X8 + 4, 2645 REG_X8 + 5, 2646 REG_X8 + 6, 2647 REG_X8 + 7: 2648 if ctxt.Arch.Family == sys.I386 { 2649 return Yxxx, true 2650 } 2651 return Yxvm, true 2652 case REG_X16 + 0, 2653 REG_X16 + 1, 2654 REG_X16 + 2, 2655 REG_X16 + 3, 2656 REG_X16 + 4, 2657 REG_X16 + 5, 2658 REG_X16 + 6, 2659 REG_X16 + 7, 2660 REG_X16 + 8, 2661 REG_X16 + 9, 2662 REG_X16 + 10, 2663 REG_X16 + 11, 2664 REG_X16 + 12, 2665 REG_X16 + 13, 2666 REG_X16 + 14, 2667 REG_X16 + 15: 2668 if ctxt.Arch.Family == sys.I386 { 2669 return Yxxx, true 2670 } 2671 return YxvmEvex, true 2672 2673 case REG_Y0 + 0, 2674 REG_Y0 + 1, 2675 REG_Y0 + 2, 2676 REG_Y0 + 3, 2677 REG_Y0 + 4, 2678 REG_Y0 + 5, 2679 REG_Y0 + 6, 2680 REG_Y0 + 7: 2681 return Yyvm, true 2682 case REG_Y8 + 0, 2683 REG_Y8 + 1, 2684 REG_Y8 + 2, 2685 REG_Y8 + 3, 2686 REG_Y8 + 4, 2687 REG_Y8 + 5, 2688 REG_Y8 + 6, 2689 REG_Y8 + 7: 2690 if ctxt.Arch.Family == sys.I386 { 2691 return Yxxx, true 2692 } 2693 return Yyvm, true 2694 case REG_Y16 + 0, 2695 REG_Y16 + 1, 2696 REG_Y16 + 2, 2697 REG_Y16 + 3, 2698 REG_Y16 + 4, 2699 REG_Y16 + 5, 2700 REG_Y16 + 6, 2701 REG_Y16 + 7, 2702 REG_Y16 + 8, 2703 REG_Y16 + 9, 2704 REG_Y16 + 10, 2705 REG_Y16 + 11, 2706 REG_Y16 + 12, 2707 REG_Y16 + 13, 2708 REG_Y16 + 14, 2709 REG_Y16 + 15: 2710 if ctxt.Arch.Family == sys.I386 { 2711 return Yxxx, true 2712 } 2713 return YyvmEvex, true 2714 2715 case REG_Z0 + 0, 2716 REG_Z0 + 1, 2717 REG_Z0 + 2, 2718 REG_Z0 + 3, 2719 REG_Z0 + 4, 2720 REG_Z0 + 5, 2721 REG_Z0 + 6, 2722 REG_Z0 + 7: 2723 return Yzvm, true 2724 case REG_Z8 + 0, 2725 REG_Z8 + 1, 2726 REG_Z8 + 2, 2727 REG_Z8 + 3, 2728 REG_Z8 + 4, 2729 REG_Z8 + 5, 2730 REG_Z8 + 6, 2731 REG_Z8 + 7, 2732 REG_Z8 + 8, 2733 REG_Z8 + 9, 2734 REG_Z8 + 10, 2735 REG_Z8 + 11, 2736 REG_Z8 + 12, 2737 REG_Z8 + 13, 2738 REG_Z8 + 14, 2739 REG_Z8 + 15, 2740 REG_Z8 + 16, 2741 REG_Z8 + 17, 2742 REG_Z8 + 18, 2743 REG_Z8 + 19, 2744 REG_Z8 + 20, 2745 REG_Z8 + 21, 2746 REG_Z8 + 22, 2747 REG_Z8 + 23: 2748 if ctxt.Arch.Family == sys.I386 { 2749 return Yxxx, true 2750 } 2751 return Yzvm, true 2752 } 2753 2754 return Yxxx, false 2755 } 2756 2757 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2758 switch a.Type { 2759 case obj.TYPE_REGLIST: 2760 return oclassRegList(ctxt, a) 2761 2762 case obj.TYPE_NONE: 2763 return Ynone 2764 2765 case obj.TYPE_BRANCH: 2766 return Ybr 2767 2768 case obj.TYPE_INDIR: 2769 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2770 return Yindir 2771 } 2772 return Yxxx 2773 2774 case obj.TYPE_MEM: 2775 // Pseudo registers have negative index, but SP is 2776 // not pseudo on x86, hence REG_SP check is not redundant. 2777 if a.Index == REG_SP || a.Index < 0 { 2778 // Can't use FP/SB/PC/SP as the index register. 2779 return Yxxx 2780 } 2781 2782 if vmem, ok := oclassVMem(ctxt, a); ok { 2783 return vmem 2784 } 2785 2786 if ctxt.Arch.Family == sys.AMD64 { 2787 switch a.Name { 2788 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2789 // Global variables can't use index registers and their 2790 // base register is %rip (%rip is encoded as REG_NONE). 2791 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2792 return Yxxx 2793 } 2794 case obj.NAME_AUTO, obj.NAME_PARAM: 2795 // These names must have a base of SP. The old compiler 2796 // uses 0 for the base register. SSA uses REG_SP. 2797 if a.Reg != REG_SP && a.Reg != 0 { 2798 return Yxxx 2799 } 2800 case obj.NAME_NONE: 2801 // everything is ok 2802 default: 2803 // unknown name 2804 return Yxxx 2805 } 2806 } 2807 return Ym 2808 2809 case obj.TYPE_ADDR: 2810 switch a.Name { 2811 case obj.NAME_GOTREF: 2812 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2813 return Yxxx 2814 2815 case obj.NAME_EXTERN, 2816 obj.NAME_STATIC: 2817 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2818 return Yi32 2819 } 2820 return Yiauto // use pc-relative addressing 2821 2822 case obj.NAME_AUTO, 2823 obj.NAME_PARAM: 2824 return Yiauto 2825 } 2826 2827 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2828 // and got Yi32 in an earlier version of this code. 2829 // Keep doing that until we fix yduff etc. 2830 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2831 return Yi32 2832 } 2833 2834 if a.Sym != nil || a.Name != obj.NAME_NONE { 2835 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2836 } 2837 fallthrough 2838 2839 case obj.TYPE_CONST: 2840 if a.Sym != nil { 2841 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2842 } 2843 2844 v := a.Offset 2845 if ctxt.Arch.Family == sys.I386 { 2846 v = int64(int32(v)) 2847 } 2848 switch { 2849 case v == 0: 2850 return Yi0 2851 case v == 1: 2852 return Yi1 2853 case v >= 0 && v <= 3: 2854 return Yu2 2855 case v >= 0 && v <= 127: 2856 return Yu7 2857 case v >= 0 && v <= 255: 2858 return Yu8 2859 case v >= -128 && v <= 127: 2860 return Yi8 2861 } 2862 if ctxt.Arch.Family == sys.I386 { 2863 return Yi32 2864 } 2865 l := int32(v) 2866 if int64(l) == v { 2867 return Ys32 // can sign extend 2868 } 2869 if v>>32 == 0 { 2870 return Yi32 // unsigned 2871 } 2872 return Yi64 2873 2874 case obj.TYPE_TEXTSIZE: 2875 return Ytextsize 2876 } 2877 2878 if a.Type != obj.TYPE_REG { 2879 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2880 return Yxxx 2881 } 2882 2883 switch a.Reg { 2884 case REG_AL: 2885 return Yal 2886 2887 case REG_AX: 2888 return Yax 2889 2890 /* 2891 case REG_SPB: 2892 */ 2893 case REG_BPB, 2894 REG_SIB, 2895 REG_DIB, 2896 REG_R8B, 2897 REG_R9B, 2898 REG_R10B, 2899 REG_R11B, 2900 REG_R12B, 2901 REG_R13B, 2902 REG_R14B, 2903 REG_R15B: 2904 if ctxt.Arch.Family == sys.I386 { 2905 return Yxxx 2906 } 2907 fallthrough 2908 2909 case REG_DL, 2910 REG_BL, 2911 REG_AH, 2912 REG_CH, 2913 REG_DH, 2914 REG_BH: 2915 return Yrb 2916 2917 case REG_CL: 2918 return Ycl 2919 2920 case REG_CX: 2921 return Ycx 2922 2923 case REG_DX, REG_BX: 2924 return Yrx 2925 2926 case REG_R8, // not really Yrl 2927 REG_R9, 2928 REG_R10, 2929 REG_R11, 2930 REG_R12, 2931 REG_R13, 2932 REG_R14, 2933 REG_R15: 2934 if ctxt.Arch.Family == sys.I386 { 2935 return Yxxx 2936 } 2937 fallthrough 2938 2939 case REG_SP, REG_BP, REG_SI, REG_DI: 2940 if ctxt.Arch.Family == sys.I386 { 2941 return Yrl32 2942 } 2943 return Yrl 2944 2945 case REG_F0 + 0: 2946 return Yf0 2947 2948 case REG_F0 + 1, 2949 REG_F0 + 2, 2950 REG_F0 + 3, 2951 REG_F0 + 4, 2952 REG_F0 + 5, 2953 REG_F0 + 6, 2954 REG_F0 + 7: 2955 return Yrf 2956 2957 case REG_M0 + 0, 2958 REG_M0 + 1, 2959 REG_M0 + 2, 2960 REG_M0 + 3, 2961 REG_M0 + 4, 2962 REG_M0 + 5, 2963 REG_M0 + 6, 2964 REG_M0 + 7: 2965 return Ymr 2966 2967 case REG_X0: 2968 return Yxr0 2969 2970 case REG_X0 + 1, 2971 REG_X0 + 2, 2972 REG_X0 + 3, 2973 REG_X0 + 4, 2974 REG_X0 + 5, 2975 REG_X0 + 6, 2976 REG_X0 + 7, 2977 REG_X0 + 8, 2978 REG_X0 + 9, 2979 REG_X0 + 10, 2980 REG_X0 + 11, 2981 REG_X0 + 12, 2982 REG_X0 + 13, 2983 REG_X0 + 14, 2984 REG_X0 + 15: 2985 return Yxr 2986 2987 case REG_X0 + 16, 2988 REG_X0 + 17, 2989 REG_X0 + 18, 2990 REG_X0 + 19, 2991 REG_X0 + 20, 2992 REG_X0 + 21, 2993 REG_X0 + 22, 2994 REG_X0 + 23, 2995 REG_X0 + 24, 2996 REG_X0 + 25, 2997 REG_X0 + 26, 2998 REG_X0 + 27, 2999 REG_X0 + 28, 3000 REG_X0 + 29, 3001 REG_X0 + 30, 3002 REG_X0 + 31: 3003 return YxrEvex 3004 3005 case REG_Y0 + 0, 3006 REG_Y0 + 1, 3007 REG_Y0 + 2, 3008 REG_Y0 + 3, 3009 REG_Y0 + 4, 3010 REG_Y0 + 5, 3011 REG_Y0 + 6, 3012 REG_Y0 + 7, 3013 REG_Y0 + 8, 3014 REG_Y0 + 9, 3015 REG_Y0 + 10, 3016 REG_Y0 + 11, 3017 REG_Y0 + 12, 3018 REG_Y0 + 13, 3019 REG_Y0 + 14, 3020 REG_Y0 + 15: 3021 return Yyr 3022 3023 case REG_Y0 + 16, 3024 REG_Y0 + 17, 3025 REG_Y0 + 18, 3026 REG_Y0 + 19, 3027 REG_Y0 + 20, 3028 REG_Y0 + 21, 3029 REG_Y0 + 22, 3030 REG_Y0 + 23, 3031 REG_Y0 + 24, 3032 REG_Y0 + 25, 3033 REG_Y0 + 26, 3034 REG_Y0 + 27, 3035 REG_Y0 + 28, 3036 REG_Y0 + 29, 3037 REG_Y0 + 30, 3038 REG_Y0 + 31: 3039 return YyrEvex 3040 3041 case REG_Z0 + 0, 3042 REG_Z0 + 1, 3043 REG_Z0 + 2, 3044 REG_Z0 + 3, 3045 REG_Z0 + 4, 3046 REG_Z0 + 5, 3047 REG_Z0 + 6, 3048 REG_Z0 + 7: 3049 return Yzr 3050 3051 case REG_Z0 + 8, 3052 REG_Z0 + 9, 3053 REG_Z0 + 10, 3054 REG_Z0 + 11, 3055 REG_Z0 + 12, 3056 REG_Z0 + 13, 3057 REG_Z0 + 14, 3058 REG_Z0 + 15, 3059 REG_Z0 + 16, 3060 REG_Z0 + 17, 3061 REG_Z0 + 18, 3062 REG_Z0 + 19, 3063 REG_Z0 + 20, 3064 REG_Z0 + 21, 3065 REG_Z0 + 22, 3066 REG_Z0 + 23, 3067 REG_Z0 + 24, 3068 REG_Z0 + 25, 3069 REG_Z0 + 26, 3070 REG_Z0 + 27, 3071 REG_Z0 + 28, 3072 REG_Z0 + 29, 3073 REG_Z0 + 30, 3074 REG_Z0 + 31: 3075 if ctxt.Arch.Family == sys.I386 { 3076 return Yxxx 3077 } 3078 return Yzr 3079 3080 case REG_K0: 3081 return Yk0 3082 3083 case REG_K0 + 1, 3084 REG_K0 + 2, 3085 REG_K0 + 3, 3086 REG_K0 + 4, 3087 REG_K0 + 5, 3088 REG_K0 + 6, 3089 REG_K0 + 7: 3090 return Yknot0 3091 3092 case REG_CS: 3093 return Ycs 3094 case REG_SS: 3095 return Yss 3096 case REG_DS: 3097 return Yds 3098 case REG_ES: 3099 return Yes 3100 case REG_FS: 3101 return Yfs 3102 case REG_GS: 3103 return Ygs 3104 case REG_TLS: 3105 return Ytls 3106 3107 case REG_GDTR: 3108 return Ygdtr 3109 case REG_IDTR: 3110 return Yidtr 3111 case REG_LDTR: 3112 return Yldtr 3113 case REG_MSW: 3114 return Ymsw 3115 case REG_TASK: 3116 return Ytask 3117 3118 case REG_CR + 0: 3119 return Ycr0 3120 case REG_CR + 1: 3121 return Ycr1 3122 case REG_CR + 2: 3123 return Ycr2 3124 case REG_CR + 3: 3125 return Ycr3 3126 case REG_CR + 4: 3127 return Ycr4 3128 case REG_CR + 5: 3129 return Ycr5 3130 case REG_CR + 6: 3131 return Ycr6 3132 case REG_CR + 7: 3133 return Ycr7 3134 case REG_CR + 8: 3135 return Ycr8 3136 3137 case REG_DR + 0: 3138 return Ydr0 3139 case REG_DR + 1: 3140 return Ydr1 3141 case REG_DR + 2: 3142 return Ydr2 3143 case REG_DR + 3: 3144 return Ydr3 3145 case REG_DR + 4: 3146 return Ydr4 3147 case REG_DR + 5: 3148 return Ydr5 3149 case REG_DR + 6: 3150 return Ydr6 3151 case REG_DR + 7: 3152 return Ydr7 3153 3154 case REG_TR + 0: 3155 return Ytr0 3156 case REG_TR + 1: 3157 return Ytr1 3158 case REG_TR + 2: 3159 return Ytr2 3160 case REG_TR + 3: 3161 return Ytr3 3162 case REG_TR + 4: 3163 return Ytr4 3164 case REG_TR + 5: 3165 return Ytr5 3166 case REG_TR + 6: 3167 return Ytr6 3168 case REG_TR + 7: 3169 return Ytr7 3170 } 3171 3172 return Yxxx 3173 } 3174 3175 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3176 // and hold assembly state. 3177 type AsmBuf struct { 3178 buf [100]byte 3179 off int 3180 rexflag int 3181 vexflag bool // Per inst: true for VEX-encoded 3182 evexflag bool // Per inst: true for EVEX-encoded 3183 rep bool 3184 repn bool 3185 lock bool 3186 3187 evex evexBits // Initialized when evexflag is true 3188 } 3189 3190 // Put1 appends one byte to the end of the buffer. 3191 func (ab *AsmBuf) Put1(x byte) { 3192 ab.buf[ab.off] = x 3193 ab.off++ 3194 } 3195 3196 // Put2 appends two bytes to the end of the buffer. 3197 func (ab *AsmBuf) Put2(x, y byte) { 3198 ab.buf[ab.off+0] = x 3199 ab.buf[ab.off+1] = y 3200 ab.off += 2 3201 } 3202 3203 // Put3 appends three bytes to the end of the buffer. 3204 func (ab *AsmBuf) Put3(x, y, z byte) { 3205 ab.buf[ab.off+0] = x 3206 ab.buf[ab.off+1] = y 3207 ab.buf[ab.off+2] = z 3208 ab.off += 3 3209 } 3210 3211 // Put4 appends four bytes to the end of the buffer. 3212 func (ab *AsmBuf) Put4(x, y, z, w byte) { 3213 ab.buf[ab.off+0] = x 3214 ab.buf[ab.off+1] = y 3215 ab.buf[ab.off+2] = z 3216 ab.buf[ab.off+3] = w 3217 ab.off += 4 3218 } 3219 3220 // PutInt16 writes v into the buffer using little-endian encoding. 3221 func (ab *AsmBuf) PutInt16(v int16) { 3222 ab.buf[ab.off+0] = byte(v) 3223 ab.buf[ab.off+1] = byte(v >> 8) 3224 ab.off += 2 3225 } 3226 3227 // PutInt32 writes v into the buffer using little-endian encoding. 3228 func (ab *AsmBuf) PutInt32(v int32) { 3229 ab.buf[ab.off+0] = byte(v) 3230 ab.buf[ab.off+1] = byte(v >> 8) 3231 ab.buf[ab.off+2] = byte(v >> 16) 3232 ab.buf[ab.off+3] = byte(v >> 24) 3233 ab.off += 4 3234 } 3235 3236 // PutInt64 writes v into the buffer using little-endian encoding. 3237 func (ab *AsmBuf) PutInt64(v int64) { 3238 ab.buf[ab.off+0] = byte(v) 3239 ab.buf[ab.off+1] = byte(v >> 8) 3240 ab.buf[ab.off+2] = byte(v >> 16) 3241 ab.buf[ab.off+3] = byte(v >> 24) 3242 ab.buf[ab.off+4] = byte(v >> 32) 3243 ab.buf[ab.off+5] = byte(v >> 40) 3244 ab.buf[ab.off+6] = byte(v >> 48) 3245 ab.buf[ab.off+7] = byte(v >> 56) 3246 ab.off += 8 3247 } 3248 3249 // Put copies b into the buffer. 3250 func (ab *AsmBuf) Put(b []byte) { 3251 copy(ab.buf[ab.off:], b) 3252 ab.off += len(b) 3253 } 3254 3255 // PutOpBytesLit writes zero terminated sequence of bytes from op, 3256 // starting at specified offset (e.g. z counter value). 3257 // Trailing 0 is not written. 3258 // 3259 // Intended to be used for literal Z cases. 3260 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3261 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3262 for int(op[offset]) != 0 { 3263 ab.Put1(byte(op[offset])) 3264 offset++ 3265 } 3266 } 3267 3268 // Insert inserts b at offset i. 3269 func (ab *AsmBuf) Insert(i int, b byte) { 3270 ab.off++ 3271 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3272 ab.buf[i] = b 3273 } 3274 3275 // Last returns the byte at the end of the buffer. 3276 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3277 3278 // Len returns the length of the buffer. 3279 func (ab *AsmBuf) Len() int { return ab.off } 3280 3281 // Bytes returns the contents of the buffer. 3282 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3283 3284 // Reset empties the buffer. 3285 func (ab *AsmBuf) Reset() { ab.off = 0 } 3286 3287 // At returns the byte at offset i. 3288 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3289 3290 // asmidx emits SIB byte. 3291 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3292 var i int 3293 3294 // X/Y index register is used in VSIB. 3295 switch index { 3296 default: 3297 goto bad 3298 3299 case REG_NONE: 3300 i = 4 << 3 3301 goto bas 3302 3303 case REG_R8, 3304 REG_R9, 3305 REG_R10, 3306 REG_R11, 3307 REG_R12, 3308 REG_R13, 3309 REG_R14, 3310 REG_R15, 3311 REG_X8, 3312 REG_X9, 3313 REG_X10, 3314 REG_X11, 3315 REG_X12, 3316 REG_X13, 3317 REG_X14, 3318 REG_X15, 3319 REG_X16, 3320 REG_X17, 3321 REG_X18, 3322 REG_X19, 3323 REG_X20, 3324 REG_X21, 3325 REG_X22, 3326 REG_X23, 3327 REG_X24, 3328 REG_X25, 3329 REG_X26, 3330 REG_X27, 3331 REG_X28, 3332 REG_X29, 3333 REG_X30, 3334 REG_X31, 3335 REG_Y8, 3336 REG_Y9, 3337 REG_Y10, 3338 REG_Y11, 3339 REG_Y12, 3340 REG_Y13, 3341 REG_Y14, 3342 REG_Y15, 3343 REG_Y16, 3344 REG_Y17, 3345 REG_Y18, 3346 REG_Y19, 3347 REG_Y20, 3348 REG_Y21, 3349 REG_Y22, 3350 REG_Y23, 3351 REG_Y24, 3352 REG_Y25, 3353 REG_Y26, 3354 REG_Y27, 3355 REG_Y28, 3356 REG_Y29, 3357 REG_Y30, 3358 REG_Y31, 3359 REG_Z8, 3360 REG_Z9, 3361 REG_Z10, 3362 REG_Z11, 3363 REG_Z12, 3364 REG_Z13, 3365 REG_Z14, 3366 REG_Z15, 3367 REG_Z16, 3368 REG_Z17, 3369 REG_Z18, 3370 REG_Z19, 3371 REG_Z20, 3372 REG_Z21, 3373 REG_Z22, 3374 REG_Z23, 3375 REG_Z24, 3376 REG_Z25, 3377 REG_Z26, 3378 REG_Z27, 3379 REG_Z28, 3380 REG_Z29, 3381 REG_Z30, 3382 REG_Z31: 3383 if ctxt.Arch.Family == sys.I386 { 3384 goto bad 3385 } 3386 fallthrough 3387 3388 case REG_AX, 3389 REG_CX, 3390 REG_DX, 3391 REG_BX, 3392 REG_BP, 3393 REG_SI, 3394 REG_DI, 3395 REG_X0, 3396 REG_X1, 3397 REG_X2, 3398 REG_X3, 3399 REG_X4, 3400 REG_X5, 3401 REG_X6, 3402 REG_X7, 3403 REG_Y0, 3404 REG_Y1, 3405 REG_Y2, 3406 REG_Y3, 3407 REG_Y4, 3408 REG_Y5, 3409 REG_Y6, 3410 REG_Y7, 3411 REG_Z0, 3412 REG_Z1, 3413 REG_Z2, 3414 REG_Z3, 3415 REG_Z4, 3416 REG_Z5, 3417 REG_Z6, 3418 REG_Z7: 3419 i = reg[index] << 3 3420 } 3421 3422 switch scale { 3423 default: 3424 goto bad 3425 3426 case 1: 3427 break 3428 3429 case 2: 3430 i |= 1 << 6 3431 3432 case 4: 3433 i |= 2 << 6 3434 3435 case 8: 3436 i |= 3 << 6 3437 } 3438 3439 bas: 3440 switch base { 3441 default: 3442 goto bad 3443 3444 case REG_NONE: // must be mod=00 3445 i |= 5 3446 3447 case REG_R8, 3448 REG_R9, 3449 REG_R10, 3450 REG_R11, 3451 REG_R12, 3452 REG_R13, 3453 REG_R14, 3454 REG_R15: 3455 if ctxt.Arch.Family == sys.I386 { 3456 goto bad 3457 } 3458 fallthrough 3459 3460 case REG_AX, 3461 REG_CX, 3462 REG_DX, 3463 REG_BX, 3464 REG_SP, 3465 REG_BP, 3466 REG_SI, 3467 REG_DI: 3468 i |= reg[base] 3469 } 3470 3471 ab.Put1(byte(i)) 3472 return 3473 3474 bad: 3475 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3476 ab.Put1(0) 3477 } 3478 3479 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3480 var rel obj.Reloc 3481 3482 v := vaddr(ctxt, p, a, &rel) 3483 if rel.Siz != 0 { 3484 if rel.Siz != 4 { 3485 ctxt.Diag("bad reloc") 3486 } 3487 r := obj.Addrel(cursym) 3488 *r = rel 3489 r.Off = int32(p.Pc + int64(ab.Len())) 3490 } 3491 3492 ab.PutInt32(int32(v)) 3493 } 3494 3495 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3496 if r != nil { 3497 *r = obj.Reloc{} 3498 } 3499 3500 switch a.Name { 3501 case obj.NAME_STATIC, 3502 obj.NAME_GOTREF, 3503 obj.NAME_EXTERN: 3504 s := a.Sym 3505 if r == nil { 3506 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3507 log.Fatalf("reloc") 3508 } 3509 3510 if a.Name == obj.NAME_GOTREF { 3511 r.Siz = 4 3512 r.Type = objabi.R_GOTPCREL 3513 } else if useAbs(ctxt, s) { 3514 r.Siz = 4 3515 r.Type = objabi.R_ADDR 3516 } else { 3517 r.Siz = 4 3518 r.Type = objabi.R_PCREL 3519 } 3520 3521 r.Off = -1 // caller must fill in 3522 r.Sym = s 3523 r.Add = a.Offset 3524 3525 return 0 3526 } 3527 3528 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3529 if r == nil { 3530 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3531 log.Fatalf("reloc") 3532 } 3533 3534 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3535 r.Type = objabi.R_TLS_LE 3536 r.Siz = 4 3537 r.Off = -1 // caller must fill in 3538 r.Add = a.Offset 3539 } 3540 return 0 3541 } 3542 3543 return a.Offset 3544 } 3545 3546 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3547 var base int 3548 var rel obj.Reloc 3549 3550 rex &= 0x40 | Rxr 3551 if a.Offset != int64(int32(a.Offset)) { 3552 // The rules are slightly different for 386 and AMD64, 3553 // mostly for historical reasons. We may unify them later, 3554 // but it must be discussed beforehand. 3555 // 3556 // For 64bit mode only LEAL is allowed to overflow. 3557 // It's how https://golang.org/cl/59630 made it. 3558 // crypto/sha1/sha1block_amd64.s depends on this feature. 3559 // 3560 // For 32bit mode rules are more permissive. 3561 // If offset fits uint32, it's permitted. 3562 // This is allowed for assembly that wants to use 32-bit hex 3563 // constants, e.g. LEAL 0x99999999(AX), AX. 3564 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3565 (ctxt.Arch.Family != sys.AMD64 && 3566 int64(uint32(a.Offset)) == a.Offset && 3567 ab.rexflag&Rxw == 0) 3568 if !overflowOK { 3569 ctxt.Diag("offset too large in %s", p) 3570 } 3571 } 3572 v := int32(a.Offset) 3573 rel.Siz = 0 3574 3575 switch a.Type { 3576 case obj.TYPE_ADDR: 3577 if a.Name == obj.NAME_NONE { 3578 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3579 } 3580 if a.Index == REG_TLS { 3581 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3582 } 3583 goto bad 3584 3585 case obj.TYPE_REG: 3586 const regFirst = REG_AL 3587 const regLast = REG_Z31 3588 if a.Reg < regFirst || regLast < a.Reg { 3589 goto bad 3590 } 3591 if v != 0 { 3592 goto bad 3593 } 3594 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3595 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3596 return 3597 } 3598 3599 if a.Type != obj.TYPE_MEM { 3600 goto bad 3601 } 3602 3603 if a.Index != REG_NONE && a.Index != REG_TLS { 3604 base := int(a.Reg) 3605 switch a.Name { 3606 case obj.NAME_EXTERN, 3607 obj.NAME_GOTREF, 3608 obj.NAME_STATIC: 3609 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3610 goto bad 3611 } 3612 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3613 // The base register has already been set. It holds the PC 3614 // of this instruction returned by a PC-reading thunk. 3615 // See obj6.go:rewriteToPcrel. 3616 } else { 3617 base = REG_NONE 3618 } 3619 v = int32(vaddr(ctxt, p, a, &rel)) 3620 3621 case obj.NAME_AUTO, 3622 obj.NAME_PARAM: 3623 base = REG_SP 3624 } 3625 3626 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3627 if base == REG_NONE { 3628 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3629 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3630 goto putrelv 3631 } 3632 3633 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3634 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3635 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3636 return 3637 } 3638 3639 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3640 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3641 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3642 ab.Put1(disp8) 3643 return 3644 } 3645 3646 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3647 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3648 goto putrelv 3649 } 3650 3651 base = int(a.Reg) 3652 switch a.Name { 3653 case obj.NAME_STATIC, 3654 obj.NAME_GOTREF, 3655 obj.NAME_EXTERN: 3656 if a.Sym == nil { 3657 ctxt.Diag("bad addr: %v", p) 3658 } 3659 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3660 // The base register has already been set. It holds the PC 3661 // of this instruction returned by a PC-reading thunk. 3662 // See obj6.go:rewriteToPcrel. 3663 } else { 3664 base = REG_NONE 3665 } 3666 v = int32(vaddr(ctxt, p, a, &rel)) 3667 3668 case obj.NAME_AUTO, 3669 obj.NAME_PARAM: 3670 base = REG_SP 3671 } 3672 3673 if base == REG_TLS { 3674 v = int32(vaddr(ctxt, p, a, &rel)) 3675 } 3676 3677 ab.rexflag |= regrex[base]&Rxb | rex 3678 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3679 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3680 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3681 ctxt.Diag("%v has offset against gotref", p) 3682 } 3683 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3684 goto putrelv 3685 } 3686 3687 // temporary 3688 ab.Put2( 3689 byte(0<<6|4<<0|r<<3), // sib present 3690 0<<6|4<<3|5<<0, // DS:d32 3691 ) 3692 goto putrelv 3693 } 3694 3695 if base == REG_SP || base == REG_R12 { 3696 if v == 0 { 3697 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3698 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3699 return 3700 } 3701 3702 if disp8, ok := toDisp8(v, p, ab); ok { 3703 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3704 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3705 ab.Put1(disp8) 3706 return 3707 } 3708 3709 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3710 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3711 goto putrelv 3712 } 3713 3714 if REG_AX <= base && base <= REG_R15 { 3715 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid { 3716 rel = obj.Reloc{} 3717 rel.Type = objabi.R_TLS_LE 3718 rel.Siz = 4 3719 rel.Sym = nil 3720 rel.Add = int64(v) 3721 v = 0 3722 } 3723 3724 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3725 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3726 return 3727 } 3728 3729 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3730 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3731 return 3732 } 3733 3734 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3735 goto putrelv 3736 } 3737 3738 goto bad 3739 3740 putrelv: 3741 if rel.Siz != 0 { 3742 if rel.Siz != 4 { 3743 ctxt.Diag("bad rel") 3744 goto bad 3745 } 3746 3747 r := obj.Addrel(cursym) 3748 *r = rel 3749 r.Off = int32(p.Pc + int64(ab.Len())) 3750 } 3751 3752 ab.PutInt32(v) 3753 return 3754 3755 bad: 3756 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3757 } 3758 3759 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3760 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3761 } 3762 3763 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3764 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3765 } 3766 3767 func bytereg(a *obj.Addr, t *uint8) { 3768 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3769 a.Reg += REG_AL - REG_AX 3770 *t = 0 3771 } 3772 } 3773 3774 func unbytereg(a *obj.Addr, t *uint8) { 3775 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3776 a.Reg += REG_AX - REG_AL 3777 *t = 0 3778 } 3779 } 3780 3781 const ( 3782 movLit uint8 = iota // Like Zlit 3783 movRegMem 3784 movMemReg 3785 movRegMem2op 3786 movMemReg2op 3787 movFullPtr // Load full pointer, trash heap (unsupported) 3788 movDoubleShift 3789 movTLSReg 3790 ) 3791 3792 var ymovtab = []movtab{ 3793 // push 3794 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3795 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3796 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3797 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3798 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3799 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3800 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3801 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3802 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3803 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3804 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3805 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3806 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3807 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3808 3809 // pop 3810 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3811 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3812 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3813 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3814 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3815 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3816 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3817 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3818 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3819 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3820 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3821 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3822 3823 // mov seg 3824 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3825 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3826 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3827 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3828 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3829 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3830 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3831 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3832 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3833 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3834 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3835 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3836 3837 // mov cr 3838 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3839 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3840 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3841 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3842 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3843 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3844 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3845 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3846 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3847 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3848 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3849 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3850 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3851 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3852 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3853 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3854 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3855 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3856 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3857 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3858 3859 // mov dr 3860 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3861 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3862 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3863 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3864 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3865 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3866 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3867 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3868 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3869 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3870 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3871 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3872 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3873 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3874 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3875 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3876 3877 // mov tr 3878 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3879 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3880 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3881 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3882 3883 // lgdt, sgdt, lidt, sidt 3884 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3885 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3886 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3887 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3888 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3889 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3890 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3891 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3892 3893 // lldt, sldt 3894 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3895 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3896 3897 // lmsw, smsw 3898 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3899 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3900 3901 // ltr, str 3902 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3903 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3904 3905 /* load full pointer - unsupported 3906 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3907 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3908 */ 3909 3910 // double shift 3911 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3912 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3913 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3914 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3915 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3916 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3917 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3918 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3919 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3920 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3921 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3922 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3923 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3924 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3925 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3926 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3927 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3928 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3929 3930 // load TLS base 3931 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3932 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3933 {0, 0, 0, 0, 0, [4]uint8{}}, 3934 } 3935 3936 func isax(a *obj.Addr) bool { 3937 switch a.Reg { 3938 case REG_AX, REG_AL, REG_AH: 3939 return true 3940 } 3941 3942 if a.Index == REG_AX { 3943 return true 3944 } 3945 return false 3946 } 3947 3948 func subreg(p *obj.Prog, from int, to int) { 3949 if false { /* debug['Q'] */ 3950 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3951 } 3952 3953 if int(p.From.Reg) == from { 3954 p.From.Reg = int16(to) 3955 p.Ft = 0 3956 } 3957 3958 if int(p.To.Reg) == from { 3959 p.To.Reg = int16(to) 3960 p.Tt = 0 3961 } 3962 3963 if int(p.From.Index) == from { 3964 p.From.Index = int16(to) 3965 p.Ft = 0 3966 } 3967 3968 if int(p.To.Index) == from { 3969 p.To.Index = int16(to) 3970 p.Tt = 0 3971 } 3972 3973 if false { /* debug['Q'] */ 3974 fmt.Printf("%v\n", p) 3975 } 3976 } 3977 3978 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 3979 switch op { 3980 case Pm, Pe, Pf2, Pf3: 3981 if osize != 1 { 3982 if op != Pm { 3983 ab.Put1(byte(op)) 3984 } 3985 ab.Put1(Pm) 3986 z++ 3987 op = int(o.op[z]) 3988 break 3989 } 3990 fallthrough 3991 3992 default: 3993 if ab.Len() == 0 || ab.Last() != Pm { 3994 ab.Put1(Pm) 3995 } 3996 } 3997 3998 ab.Put1(byte(op)) 3999 return z 4000 } 4001 4002 var bpduff1 = []byte{ 4003 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 4004 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 4005 } 4006 4007 var bpduff2 = []byte{ 4008 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 4009 } 4010 4011 // asmevex emits EVEX pregis and opcode byte. 4012 // In addition to asmvex r/m, vvvv and reg fields also requires optional 4013 // K-masking register. 4014 // 4015 // Expects asmbuf.evex to be properly initialized. 4016 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 4017 ab.evexflag = true 4018 evex := ab.evex 4019 4020 rexR := byte(1) 4021 evexR := byte(1) 4022 rexX := byte(1) 4023 rexB := byte(1) 4024 if r != nil { 4025 if regrex[r.Reg]&Rxr != 0 { 4026 rexR = 0 // "ModR/M.reg" selector 4th bit. 4027 } 4028 if regrex[r.Reg]&RxrEvex != 0 { 4029 evexR = 0 // "ModR/M.reg" selector 5th bit. 4030 } 4031 } 4032 if rm != nil { 4033 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 4034 rexX = 0 4035 } else if regrex[rm.Index]&Rxx != 0 { 4036 rexX = 0 4037 } 4038 if regrex[rm.Reg]&Rxb != 0 { 4039 rexB = 0 4040 } 4041 } 4042 // P0 = [R][X][B][R'][00][mm] 4043 p0 := (rexR << 7) | 4044 (rexX << 6) | 4045 (rexB << 5) | 4046 (evexR << 4) | 4047 (0 << 2) | 4048 (evex.M() << 0) 4049 4050 vexV := byte(0) 4051 if v != nil { 4052 // 4bit-wide reg index. 4053 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4054 } 4055 vexV ^= 0x0F 4056 // P1 = [W][vvvv][1][pp] 4057 p1 := (evex.W() << 7) | 4058 (vexV << 3) | 4059 (1 << 2) | 4060 (evex.P() << 0) 4061 4062 suffix := evexSuffixMap[p.Scond] 4063 evexZ := byte(0) 4064 evexLL := evex.L() 4065 evexB := byte(0) 4066 evexV := byte(1) 4067 evexA := byte(0) 4068 if suffix.zeroing { 4069 if !evex.ZeroingEnabled() { 4070 ctxt.Diag("unsupported zeroing: %v", p) 4071 } 4072 evexZ = 1 4073 } 4074 switch { 4075 case suffix.rounding != rcUnset: 4076 if rm != nil && rm.Type == obj.TYPE_MEM { 4077 ctxt.Diag("illegal rounding with memory argument: %v", p) 4078 } else if !evex.RoundingEnabled() { 4079 ctxt.Diag("unsupported rounding: %v", p) 4080 } 4081 evexB = 1 4082 evexLL = suffix.rounding 4083 case suffix.broadcast: 4084 if rm == nil || rm.Type != obj.TYPE_MEM { 4085 ctxt.Diag("illegal broadcast without memory argument: %v", p) 4086 } else if !evex.BroadcastEnabled() { 4087 ctxt.Diag("unsupported broadcast: %v", p) 4088 } 4089 evexB = 1 4090 case suffix.sae: 4091 if rm != nil && rm.Type == obj.TYPE_MEM { 4092 ctxt.Diag("illegal SAE with memory argument: %v", p) 4093 } else if !evex.SaeEnabled() { 4094 ctxt.Diag("unsupported SAE: %v", p) 4095 } 4096 evexB = 1 4097 } 4098 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 4099 evexV = 0 4100 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 4101 evexV = 0 // VSR selector 5th bit. 4102 } 4103 if k != nil { 4104 evexA = byte(reg[k.Reg]) 4105 } 4106 // P2 = [z][L'L][b][V'][aaa] 4107 p2 := (evexZ << 7) | 4108 (evexLL << 5) | 4109 (evexB << 4) | 4110 (evexV << 3) | 4111 (evexA << 0) 4112 4113 const evexEscapeByte = 0x62 4114 ab.Put4(evexEscapeByte, p0, p1, p2) 4115 ab.Put1(evex.opcode) 4116 } 4117 4118 // Emit VEX prefix and opcode byte. 4119 // The three addresses are the r/m, vvvv, and reg fields. 4120 // The reg and rm arguments appear in the same order as the 4121 // arguments to asmand, which typically follows the call to asmvex. 4122 // The final two arguments are the VEX prefix (see encoding above) 4123 // and the opcode byte. 4124 // For details about vex prefix see: 4125 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 4126 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 4127 ab.vexflag = true 4128 rexR := 0 4129 if r != nil { 4130 rexR = regrex[r.Reg] & Rxr 4131 } 4132 rexB := 0 4133 rexX := 0 4134 if rm != nil { 4135 rexB = regrex[rm.Reg] & Rxb 4136 rexX = regrex[rm.Index] & Rxx 4137 } 4138 vexM := (vex >> 3) & 0x7 4139 vexWLP := vex & 0x87 4140 vexV := byte(0) 4141 if v != nil { 4142 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4143 } 4144 vexV ^= 0xF 4145 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 4146 // Can use 2-byte encoding. 4147 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 4148 } else { 4149 // Must use 3-byte encoding. 4150 ab.Put3(0xc4, 4151 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 4152 vexV<<3|vexWLP, 4153 ) 4154 } 4155 ab.Put1(opcode) 4156 } 4157 4158 // regIndex returns register index that fits in 5 bits. 4159 // 4160 // R : 3 bit | legacy instructions | N/A 4161 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 4162 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex 4163 // 4164 // Examples: 4165 // REG_Z30 => 30 4166 // REG_X15 => 15 4167 // REG_R9 => 9 4168 // REG_AX => 0 4169 // 4170 func regIndex(r int16) int { 4171 lower3bits := reg[r] 4172 high4bit := regrex[r] & Rxr << 1 4173 high5bit := regrex[r] & RxrEvex << 0 4174 return lower3bits | high4bit | high5bit 4175 } 4176 4177 // avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4178 // Reports errors via ctxt. 4179 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4180 // If any pair of the index, mask, or destination registers 4181 // are the same, illegal instruction trap (#UD) is triggered. 4182 index := regIndex(p.GetFrom3().Index) 4183 mask := regIndex(p.From.Reg) 4184 dest := regIndex(p.To.Reg) 4185 if dest == mask || dest == index || mask == index { 4186 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4187 return false 4188 } 4189 4190 return true 4191 } 4192 4193 // avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4194 // Reports errors via ctxt. 4195 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4196 // Illegal instruction trap (#UD) is triggered if the destination vector 4197 // register is the same as index vector in VSIB. 4198 index := regIndex(p.From.Index) 4199 dest := regIndex(p.To.Reg) 4200 if dest == index { 4201 ctxt.Diag("index and destination registers should be distinct: %v", p) 4202 return false 4203 } 4204 4205 return true 4206 } 4207 4208 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4209 o := opindex[p.As&obj.AMask] 4210 4211 if o == nil { 4212 ctxt.Diag("asmins: missing op %v", p) 4213 return 4214 } 4215 4216 if pre := prefixof(ctxt, &p.From); pre != 0 { 4217 ab.Put1(byte(pre)) 4218 } 4219 if pre := prefixof(ctxt, &p.To); pre != 0 { 4220 ab.Put1(byte(pre)) 4221 } 4222 4223 // Checks to warn about instruction/arguments combinations that 4224 // will unconditionally trigger illegal instruction trap (#UD). 4225 switch p.As { 4226 case AVGATHERDPD, 4227 AVGATHERQPD, 4228 AVGATHERDPS, 4229 AVGATHERQPS, 4230 AVPGATHERDD, 4231 AVPGATHERQD, 4232 AVPGATHERDQ, 4233 AVPGATHERQQ: 4234 // AVX512 gather requires explicit K mask. 4235 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4236 if !avx512gatherValid(ctxt, p) { 4237 return 4238 } 4239 } else { 4240 if !avx2gatherValid(ctxt, p) { 4241 return 4242 } 4243 } 4244 } 4245 4246 if p.Ft == 0 { 4247 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4248 } 4249 if p.Tt == 0 { 4250 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4251 } 4252 4253 ft := int(p.Ft) * Ymax 4254 var f3t int 4255 tt := int(p.Tt) * Ymax 4256 4257 xo := obj.Bool2int(o.op[0] == 0x0f) 4258 z := 0 4259 var a *obj.Addr 4260 var l int 4261 var op int 4262 var q *obj.Prog 4263 var r *obj.Reloc 4264 var rel obj.Reloc 4265 var v int64 4266 4267 args := make([]int, 0, argListMax) 4268 if ft != Ynone*Ymax { 4269 args = append(args, ft) 4270 } 4271 for i := range p.RestArgs { 4272 args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax) 4273 } 4274 if tt != Ynone*Ymax { 4275 args = append(args, tt) 4276 } 4277 4278 for _, yt := range o.ytab { 4279 // ytab matching is purely args-based, 4280 // but AVX512 suffixes like "Z" or "RU_SAE" will 4281 // add EVEX-only filter that will reject non-EVEX matches. 4282 // 4283 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4284 // Without this rule, operands will lead to VEX-encoded form 4285 // and produce "c5b15813" encoding. 4286 if !yt.match(args) { 4287 // "xo" is always zero for VEX/EVEX encoded insts. 4288 z += int(yt.zoffset) + xo 4289 } else { 4290 if p.Scond != 0 && !evexZcase(yt.zcase) { 4291 // Do not signal error and continue to search 4292 // for matching EVEX-encoded form. 4293 z += int(yt.zoffset) 4294 continue 4295 } 4296 4297 switch o.prefix { 4298 case Px1: // first option valid only in 32-bit mode 4299 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4300 z += int(yt.zoffset) + xo 4301 continue 4302 } 4303 case Pq: // 16 bit escape and opcode escape 4304 ab.Put2(Pe, Pm) 4305 4306 case Pq3: // 16 bit escape and opcode escape + REX.W 4307 ab.rexflag |= Pw 4308 ab.Put2(Pe, Pm) 4309 4310 case Pq4: // 66 0F 38 4311 ab.Put3(0x66, 0x0F, 0x38) 4312 4313 case Pq4w: // 66 0F 38 + REX.W 4314 ab.rexflag |= Pw 4315 ab.Put3(0x66, 0x0F, 0x38) 4316 4317 case Pq5: // F3 0F 38 4318 ab.Put3(0xF3, 0x0F, 0x38) 4319 4320 case Pq5w: // F3 0F 38 + REX.W 4321 ab.rexflag |= Pw 4322 ab.Put3(0xF3, 0x0F, 0x38) 4323 4324 case Pf2, // xmm opcode escape 4325 Pf3: 4326 ab.Put2(o.prefix, Pm) 4327 4328 case Pef3: 4329 ab.Put3(Pe, Pf3, Pm) 4330 4331 case Pfw: // xmm opcode escape + REX.W 4332 ab.rexflag |= Pw 4333 ab.Put2(Pf3, Pm) 4334 4335 case Pm: // opcode escape 4336 ab.Put1(Pm) 4337 4338 case Pe: // 16 bit escape 4339 ab.Put1(Pe) 4340 4341 case Pw: // 64-bit escape 4342 if ctxt.Arch.Family != sys.AMD64 { 4343 ctxt.Diag("asmins: illegal 64: %v", p) 4344 } 4345 ab.rexflag |= Pw 4346 4347 case Pw8: // 64-bit escape if z >= 8 4348 if z >= 8 { 4349 if ctxt.Arch.Family != sys.AMD64 { 4350 ctxt.Diag("asmins: illegal 64: %v", p) 4351 } 4352 ab.rexflag |= Pw 4353 } 4354 4355 case Pb: // botch 4356 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4357 goto bad 4358 } 4359 // NOTE(rsc): This is probably safe to do always, 4360 // but when enabled it chooses different encodings 4361 // than the old cmd/internal/obj/i386 code did, 4362 // which breaks our "same bits out" checks. 4363 // In particular, CMPB AX, $0 encodes as 80 f8 00 4364 // in the original obj/i386, and it would encode 4365 // (using a valid, shorter form) as 3c 00 if we enabled 4366 // the call to bytereg here. 4367 if ctxt.Arch.Family == sys.AMD64 { 4368 bytereg(&p.From, &p.Ft) 4369 bytereg(&p.To, &p.Tt) 4370 } 4371 4372 case P32: // 32 bit but illegal if 64-bit mode 4373 if ctxt.Arch.Family == sys.AMD64 { 4374 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4375 } 4376 4377 case Py: // 64-bit only, no prefix 4378 if ctxt.Arch.Family != sys.AMD64 { 4379 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4380 } 4381 4382 case Py1: // 64-bit only if z < 1, no prefix 4383 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4384 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4385 } 4386 4387 case Py3: // 64-bit only if z < 3, no prefix 4388 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4389 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4390 } 4391 } 4392 4393 if z >= len(o.op) { 4394 log.Fatalf("asmins bad table %v", p) 4395 } 4396 op = int(o.op[z]) 4397 if op == 0x0f { 4398 ab.Put1(byte(op)) 4399 z++ 4400 op = int(o.op[z]) 4401 } 4402 4403 switch yt.zcase { 4404 default: 4405 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4406 return 4407 4408 case Zpseudo: 4409 break 4410 4411 case Zlit: 4412 ab.PutOpBytesLit(z, &o.op) 4413 4414 case Zlitr_m: 4415 ab.PutOpBytesLit(z, &o.op) 4416 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4417 4418 case Zlitm_r: 4419 ab.PutOpBytesLit(z, &o.op) 4420 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4421 4422 case Zlit_m_r: 4423 ab.PutOpBytesLit(z, &o.op) 4424 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4425 4426 case Zmb_r: 4427 bytereg(&p.From, &p.Ft) 4428 fallthrough 4429 4430 case Zm_r: 4431 ab.Put1(byte(op)) 4432 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4433 4434 case Z_m_r: 4435 ab.Put1(byte(op)) 4436 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4437 4438 case Zm2_r: 4439 ab.Put2(byte(op), o.op[z+1]) 4440 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4441 4442 case Zm_r_xm: 4443 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4444 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4445 4446 case Zm_r_xm_nr: 4447 ab.rexflag = 0 4448 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4449 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4450 4451 case Zm_r_i_xm: 4452 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4453 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4454 ab.Put1(byte(p.To.Offset)) 4455 4456 case Zibm_r, Zibr_m: 4457 ab.PutOpBytesLit(z, &o.op) 4458 if yt.zcase == Zibr_m { 4459 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4460 } else { 4461 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4462 } 4463 switch { 4464 default: 4465 ab.Put1(byte(p.From.Offset)) 4466 case yt.args[0] == Yi32 && o.prefix == Pe: 4467 ab.PutInt16(int16(p.From.Offset)) 4468 case yt.args[0] == Yi32: 4469 ab.PutInt32(int32(p.From.Offset)) 4470 } 4471 4472 case Zaut_r: 4473 ab.Put1(0x8d) // leal 4474 if p.From.Type != obj.TYPE_ADDR { 4475 ctxt.Diag("asmins: Zaut sb type ADDR") 4476 } 4477 p.From.Type = obj.TYPE_MEM 4478 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4479 p.From.Type = obj.TYPE_ADDR 4480 4481 case Zm_o: 4482 ab.Put1(byte(op)) 4483 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4484 4485 case Zr_m: 4486 ab.Put1(byte(op)) 4487 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4488 4489 case Zvex: 4490 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4491 4492 case Zvex_rm_v_r: 4493 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4494 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4495 4496 case Zvex_rm_v_ro: 4497 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4498 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4499 4500 case Zvex_i_rm_vo: 4501 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4502 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4503 ab.Put1(byte(p.From.Offset)) 4504 4505 case Zvex_i_r_v: 4506 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4507 regnum := byte(0x7) 4508 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4509 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4510 } else { 4511 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4512 } 4513 ab.Put1(o.op[z+2] | regnum) 4514 ab.Put1(byte(p.From.Offset)) 4515 4516 case Zvex_i_rm_v_r: 4517 imm, from, from3, to := unpackOps4(p) 4518 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4519 ab.asmand(ctxt, cursym, p, from, to) 4520 ab.Put1(byte(imm.Offset)) 4521 4522 case Zvex_i_rm_r: 4523 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4524 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4525 ab.Put1(byte(p.From.Offset)) 4526 4527 case Zvex_v_rm_r: 4528 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4529 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4530 4531 case Zvex_r_v_rm: 4532 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4533 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4534 4535 case Zvex_rm_r_vo: 4536 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4537 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4538 4539 case Zvex_i_r_rm: 4540 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4541 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4542 ab.Put1(byte(p.From.Offset)) 4543 4544 case Zvex_hr_rm_v_r: 4545 hr, from, from3, to := unpackOps4(p) 4546 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4547 ab.asmand(ctxt, cursym, p, from, to) 4548 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4549 4550 case Zevex_k_rmo: 4551 ab.evex = newEVEXBits(z, &o.op) 4552 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4553 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4554 4555 case Zevex_i_rm_vo: 4556 ab.evex = newEVEXBits(z, &o.op) 4557 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4558 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4559 ab.Put1(byte(p.From.Offset)) 4560 4561 case Zevex_i_rm_k_vo: 4562 imm, from, kmask, to := unpackOps4(p) 4563 ab.evex = newEVEXBits(z, &o.op) 4564 ab.asmevex(ctxt, p, from, to, nil, kmask) 4565 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4566 ab.Put1(byte(imm.Offset)) 4567 4568 case Zevex_i_r_rm: 4569 ab.evex = newEVEXBits(z, &o.op) 4570 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4571 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4572 ab.Put1(byte(p.From.Offset)) 4573 4574 case Zevex_i_r_k_rm: 4575 imm, from, kmask, to := unpackOps4(p) 4576 ab.evex = newEVEXBits(z, &o.op) 4577 ab.asmevex(ctxt, p, to, nil, from, kmask) 4578 ab.asmand(ctxt, cursym, p, to, from) 4579 ab.Put1(byte(imm.Offset)) 4580 4581 case Zevex_i_rm_r: 4582 ab.evex = newEVEXBits(z, &o.op) 4583 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4584 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4585 ab.Put1(byte(p.From.Offset)) 4586 4587 case Zevex_i_rm_k_r: 4588 imm, from, kmask, to := unpackOps4(p) 4589 ab.evex = newEVEXBits(z, &o.op) 4590 ab.asmevex(ctxt, p, from, nil, to, kmask) 4591 ab.asmand(ctxt, cursym, p, from, to) 4592 ab.Put1(byte(imm.Offset)) 4593 4594 case Zevex_i_rm_v_r: 4595 imm, from, from3, to := unpackOps4(p) 4596 ab.evex = newEVEXBits(z, &o.op) 4597 ab.asmevex(ctxt, p, from, from3, to, nil) 4598 ab.asmand(ctxt, cursym, p, from, to) 4599 ab.Put1(byte(imm.Offset)) 4600 4601 case Zevex_i_rm_v_k_r: 4602 imm, from, from3, kmask, to := unpackOps5(p) 4603 ab.evex = newEVEXBits(z, &o.op) 4604 ab.asmevex(ctxt, p, from, from3, to, kmask) 4605 ab.asmand(ctxt, cursym, p, from, to) 4606 ab.Put1(byte(imm.Offset)) 4607 4608 case Zevex_r_v_rm: 4609 ab.evex = newEVEXBits(z, &o.op) 4610 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4611 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4612 4613 case Zevex_rm_v_r: 4614 ab.evex = newEVEXBits(z, &o.op) 4615 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4616 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4617 4618 case Zevex_rm_k_r: 4619 ab.evex = newEVEXBits(z, &o.op) 4620 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4621 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4622 4623 case Zevex_r_k_rm: 4624 ab.evex = newEVEXBits(z, &o.op) 4625 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4626 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4627 4628 case Zevex_rm_v_k_r: 4629 from, from3, kmask, to := unpackOps4(p) 4630 ab.evex = newEVEXBits(z, &o.op) 4631 ab.asmevex(ctxt, p, from, from3, to, kmask) 4632 ab.asmand(ctxt, cursym, p, from, to) 4633 4634 case Zevex_r_v_k_rm: 4635 from, from3, kmask, to := unpackOps4(p) 4636 ab.evex = newEVEXBits(z, &o.op) 4637 ab.asmevex(ctxt, p, to, from3, from, kmask) 4638 ab.asmand(ctxt, cursym, p, to, from) 4639 4640 case Zr_m_xm: 4641 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4642 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4643 4644 case Zr_m_xm_nr: 4645 ab.rexflag = 0 4646 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4647 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4648 4649 case Zo_m: 4650 ab.Put1(byte(op)) 4651 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4652 4653 case Zcallindreg: 4654 r = obj.Addrel(cursym) 4655 r.Off = int32(p.Pc) 4656 r.Type = objabi.R_CALLIND 4657 r.Siz = 0 4658 fallthrough 4659 4660 case Zo_m64: 4661 ab.Put1(byte(op)) 4662 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4663 4664 case Zm_ibo: 4665 ab.Put1(byte(op)) 4666 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4667 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4668 4669 case Zibo_m: 4670 ab.Put1(byte(op)) 4671 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4672 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4673 4674 case Zibo_m_xm: 4675 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4676 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4677 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4678 4679 case Z_ib, Zib_: 4680 if yt.zcase == Zib_ { 4681 a = &p.From 4682 } else { 4683 a = &p.To 4684 } 4685 ab.Put1(byte(op)) 4686 if p.As == AXABORT { 4687 ab.Put1(o.op[z+1]) 4688 } 4689 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4690 4691 case Zib_rp: 4692 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4693 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4694 4695 case Zil_rp: 4696 ab.rexflag |= regrex[p.To.Reg] & Rxb 4697 ab.Put1(byte(op + reg[p.To.Reg])) 4698 if o.prefix == Pe { 4699 v = vaddr(ctxt, p, &p.From, nil) 4700 ab.PutInt16(int16(v)) 4701 } else { 4702 ab.relput4(ctxt, cursym, p, &p.From) 4703 } 4704 4705 case Zo_iw: 4706 ab.Put1(byte(op)) 4707 if p.From.Type != obj.TYPE_NONE { 4708 v = vaddr(ctxt, p, &p.From, nil) 4709 ab.PutInt16(int16(v)) 4710 } 4711 4712 case Ziq_rp: 4713 v = vaddr(ctxt, p, &p.From, &rel) 4714 l = int(v >> 32) 4715 if l == 0 && rel.Siz != 8 { 4716 ab.rexflag &^= (0x40 | Rxw) 4717 4718 ab.rexflag |= regrex[p.To.Reg] & Rxb 4719 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4720 if rel.Type != 0 { 4721 r = obj.Addrel(cursym) 4722 *r = rel 4723 r.Off = int32(p.Pc + int64(ab.Len())) 4724 } 4725 4726 ab.PutInt32(int32(v)) 4727 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4728 ab.Put1(0xc7) 4729 ab.asmando(ctxt, cursym, p, &p.To, 0) 4730 4731 ab.PutInt32(int32(v)) // need all 8 4732 } else { 4733 ab.rexflag |= regrex[p.To.Reg] & Rxb 4734 ab.Put1(byte(op + reg[p.To.Reg])) 4735 if rel.Type != 0 { 4736 r = obj.Addrel(cursym) 4737 *r = rel 4738 r.Off = int32(p.Pc + int64(ab.Len())) 4739 } 4740 4741 ab.PutInt64(v) 4742 } 4743 4744 case Zib_rr: 4745 ab.Put1(byte(op)) 4746 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4747 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4748 4749 case Z_il, Zil_: 4750 if yt.zcase == Zil_ { 4751 a = &p.From 4752 } else { 4753 a = &p.To 4754 } 4755 ab.Put1(byte(op)) 4756 if o.prefix == Pe { 4757 v = vaddr(ctxt, p, a, nil) 4758 ab.PutInt16(int16(v)) 4759 } else { 4760 ab.relput4(ctxt, cursym, p, a) 4761 } 4762 4763 case Zm_ilo, Zilo_m: 4764 ab.Put1(byte(op)) 4765 if yt.zcase == Zilo_m { 4766 a = &p.From 4767 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4768 } else { 4769 a = &p.To 4770 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4771 } 4772 4773 if o.prefix == Pe { 4774 v = vaddr(ctxt, p, a, nil) 4775 ab.PutInt16(int16(v)) 4776 } else { 4777 ab.relput4(ctxt, cursym, p, a) 4778 } 4779 4780 case Zil_rr: 4781 ab.Put1(byte(op)) 4782 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4783 if o.prefix == Pe { 4784 v = vaddr(ctxt, p, &p.From, nil) 4785 ab.PutInt16(int16(v)) 4786 } else { 4787 ab.relput4(ctxt, cursym, p, &p.From) 4788 } 4789 4790 case Z_rp: 4791 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4792 ab.Put1(byte(op + reg[p.To.Reg])) 4793 4794 case Zrp_: 4795 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4796 ab.Put1(byte(op + reg[p.From.Reg])) 4797 4798 case Zcallcon, Zjmpcon: 4799 if yt.zcase == Zcallcon { 4800 ab.Put1(byte(op)) 4801 } else { 4802 ab.Put1(o.op[z+1]) 4803 } 4804 r = obj.Addrel(cursym) 4805 r.Off = int32(p.Pc + int64(ab.Len())) 4806 r.Type = objabi.R_PCREL 4807 r.Siz = 4 4808 r.Add = p.To.Offset 4809 ab.PutInt32(0) 4810 4811 case Zcallind: 4812 ab.Put2(byte(op), o.op[z+1]) 4813 r = obj.Addrel(cursym) 4814 r.Off = int32(p.Pc + int64(ab.Len())) 4815 if ctxt.Arch.Family == sys.AMD64 { 4816 r.Type = objabi.R_PCREL 4817 } else { 4818 r.Type = objabi.R_ADDR 4819 } 4820 r.Siz = 4 4821 r.Add = p.To.Offset 4822 r.Sym = p.To.Sym 4823 ab.PutInt32(0) 4824 4825 case Zcall, Zcallduff: 4826 if p.To.Sym == nil { 4827 ctxt.Diag("call without target") 4828 ctxt.DiagFlush() 4829 log.Fatalf("bad code") 4830 } 4831 4832 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4833 ctxt.Diag("directly calling duff when dynamically linking Go") 4834 } 4835 4836 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4837 // Maintain BP around call, since duffcopy/duffzero can't do it 4838 // (the call jumps into the middle of the function). 4839 // This makes it possible to see call sites for duffcopy/duffzero in 4840 // BP-based profiling tools like Linux perf (which is the 4841 // whole point of maintaining frame pointers in Go). 4842 // MOVQ BP, -16(SP) 4843 // LEAQ -16(SP), BP 4844 ab.Put(bpduff1) 4845 } 4846 ab.Put1(byte(op)) 4847 r = obj.Addrel(cursym) 4848 r.Off = int32(p.Pc + int64(ab.Len())) 4849 r.Sym = p.To.Sym 4850 r.Add = p.To.Offset 4851 r.Type = objabi.R_CALL 4852 r.Siz = 4 4853 ab.PutInt32(0) 4854 4855 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4856 // Pop BP pushed above. 4857 // MOVQ 0(BP), BP 4858 ab.Put(bpduff2) 4859 } 4860 4861 // TODO: jump across functions needs reloc 4862 case Zbr, Zjmp, Zloop: 4863 if p.As == AXBEGIN { 4864 ab.Put1(byte(op)) 4865 } 4866 if p.To.Sym != nil { 4867 if yt.zcase != Zjmp { 4868 ctxt.Diag("branch to ATEXT") 4869 ctxt.DiagFlush() 4870 log.Fatalf("bad code") 4871 } 4872 4873 ab.Put1(o.op[z+1]) 4874 r = obj.Addrel(cursym) 4875 r.Off = int32(p.Pc + int64(ab.Len())) 4876 r.Sym = p.To.Sym 4877 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4878 // it can point to a trampoline instead of the destination itself. 4879 r.Type = objabi.R_CALL 4880 r.Siz = 4 4881 ab.PutInt32(0) 4882 break 4883 } 4884 4885 // Assumes q is in this function. 4886 // TODO: Check in input, preserve in brchain. 4887 4888 // Fill in backward jump now. 4889 q = p.To.Target() 4890 4891 if q == nil { 4892 ctxt.Diag("jmp/branch/loop without target") 4893 ctxt.DiagFlush() 4894 log.Fatalf("bad code") 4895 } 4896 4897 if p.Back&branchBackwards != 0 { 4898 v = q.Pc - (p.Pc + 2) 4899 if v >= -128 && p.As != AXBEGIN { 4900 if p.As == AJCXZL { 4901 ab.Put1(0x67) 4902 } 4903 ab.Put2(byte(op), byte(v)) 4904 } else if yt.zcase == Zloop { 4905 ctxt.Diag("loop too far: %v", p) 4906 } else { 4907 v -= 5 - 2 4908 if p.As == AXBEGIN { 4909 v-- 4910 } 4911 if yt.zcase == Zbr { 4912 ab.Put1(0x0f) 4913 v-- 4914 } 4915 4916 ab.Put1(o.op[z+1]) 4917 ab.PutInt32(int32(v)) 4918 } 4919 4920 break 4921 } 4922 4923 // Annotate target; will fill in later. 4924 p.Forwd = q.Rel 4925 4926 q.Rel = p 4927 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4928 if p.As == AJCXZL { 4929 ab.Put1(0x67) 4930 } 4931 ab.Put2(byte(op), 0) 4932 } else if yt.zcase == Zloop { 4933 ctxt.Diag("loop too far: %v", p) 4934 } else { 4935 if yt.zcase == Zbr { 4936 ab.Put1(0x0f) 4937 } 4938 ab.Put1(o.op[z+1]) 4939 ab.PutInt32(0) 4940 } 4941 4942 case Zbyte: 4943 v = vaddr(ctxt, p, &p.From, &rel) 4944 if rel.Siz != 0 { 4945 rel.Siz = uint8(op) 4946 r = obj.Addrel(cursym) 4947 *r = rel 4948 r.Off = int32(p.Pc + int64(ab.Len())) 4949 } 4950 4951 ab.Put1(byte(v)) 4952 if op > 1 { 4953 ab.Put1(byte(v >> 8)) 4954 if op > 2 { 4955 ab.PutInt16(int16(v >> 16)) 4956 if op > 4 { 4957 ab.PutInt32(int32(v >> 32)) 4958 } 4959 } 4960 } 4961 } 4962 4963 return 4964 } 4965 } 4966 f3t = Ynone * Ymax 4967 if p.GetFrom3() != nil { 4968 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 4969 } 4970 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 4971 var pp obj.Prog 4972 var t []byte 4973 if p.As == mo[0].as { 4974 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 4975 t = mo[0].op[:] 4976 switch mo[0].code { 4977 default: 4978 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 4979 4980 case movLit: 4981 for z = 0; t[z] != 0; z++ { 4982 ab.Put1(t[z]) 4983 } 4984 4985 case movRegMem: 4986 ab.Put1(t[0]) 4987 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 4988 4989 case movMemReg: 4990 ab.Put1(t[0]) 4991 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 4992 4993 case movRegMem2op: // r,m - 2op 4994 ab.Put2(t[0], t[1]) 4995 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 4996 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 4997 4998 case movMemReg2op: 4999 ab.Put2(t[0], t[1]) 5000 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 5001 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 5002 5003 case movFullPtr: 5004 if t[0] != 0 { 5005 ab.Put1(t[0]) 5006 } 5007 switch p.To.Index { 5008 default: 5009 goto bad 5010 5011 case REG_DS: 5012 ab.Put1(0xc5) 5013 5014 case REG_SS: 5015 ab.Put2(0x0f, 0xb2) 5016 5017 case REG_ES: 5018 ab.Put1(0xc4) 5019 5020 case REG_FS: 5021 ab.Put2(0x0f, 0xb4) 5022 5023 case REG_GS: 5024 ab.Put2(0x0f, 0xb5) 5025 } 5026 5027 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 5028 5029 case movDoubleShift: 5030 if t[0] == Pw { 5031 if ctxt.Arch.Family != sys.AMD64 { 5032 ctxt.Diag("asmins: illegal 64: %v", p) 5033 } 5034 ab.rexflag |= Pw 5035 t = t[1:] 5036 } else if t[0] == Pe { 5037 ab.Put1(Pe) 5038 t = t[1:] 5039 } 5040 5041 switch p.From.Type { 5042 default: 5043 goto bad 5044 5045 case obj.TYPE_CONST: 5046 ab.Put2(0x0f, t[0]) 5047 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5048 ab.Put1(byte(p.From.Offset)) 5049 5050 case obj.TYPE_REG: 5051 switch p.From.Reg { 5052 default: 5053 goto bad 5054 5055 case REG_CL, REG_CX: 5056 ab.Put2(0x0f, t[1]) 5057 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5058 } 5059 } 5060 5061 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5062 // where you load the TLS base register into a register and then index off that 5063 // register to access the actual TLS variables. Systems that allow direct TLS access 5064 // are handled in prefixof above and should not be listed here. 5065 case movTLSReg: 5066 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 5067 ctxt.Diag("invalid load of TLS: %v", p) 5068 } 5069 5070 if ctxt.Arch.Family == sys.I386 { 5071 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5072 // where you load the TLS base register into a register and then index off that 5073 // register to access the actual TLS variables. Systems that allow direct TLS access 5074 // are handled in prefixof above and should not be listed here. 5075 switch ctxt.Headtype { 5076 default: 5077 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5078 5079 case objabi.Hlinux, objabi.Hfreebsd: 5080 if ctxt.Flag_shared { 5081 // Note that this is not generating the same insns as the other cases. 5082 // MOV TLS, dst 5083 // becomes 5084 // call __x86.get_pc_thunk.dst 5085 // movl (gotpc + g@gotntpoff)(dst), dst 5086 // which is encoded as 5087 // call __x86.get_pc_thunk.dst 5088 // movq 0(dst), dst 5089 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 5090 // is g, which we can't check here, but will when we assemble the second 5091 // instruction. 5092 dst := p.To.Reg 5093 ab.Put1(0xe8) 5094 r = obj.Addrel(cursym) 5095 r.Off = int32(p.Pc + int64(ab.Len())) 5096 r.Type = objabi.R_CALL 5097 r.Siz = 4 5098 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 5099 ab.PutInt32(0) 5100 5101 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 5102 r = obj.Addrel(cursym) 5103 r.Off = int32(p.Pc + int64(ab.Len())) 5104 r.Type = objabi.R_TLS_IE 5105 r.Siz = 4 5106 r.Add = 2 5107 ab.PutInt32(0) 5108 } else { 5109 // ELF TLS base is 0(GS). 5110 pp.From = p.From 5111 5112 pp.From.Type = obj.TYPE_MEM 5113 pp.From.Reg = REG_GS 5114 pp.From.Offset = 0 5115 pp.From.Index = REG_NONE 5116 pp.From.Scale = 0 5117 ab.Put2(0x65, // GS 5118 0x8B) 5119 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5120 } 5121 case objabi.Hplan9: 5122 pp.From = obj.Addr{} 5123 pp.From.Type = obj.TYPE_MEM 5124 pp.From.Name = obj.NAME_EXTERN 5125 pp.From.Sym = plan9privates 5126 pp.From.Offset = 0 5127 pp.From.Index = REG_NONE 5128 ab.Put1(0x8B) 5129 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5130 5131 case objabi.Hwindows: 5132 // Windows TLS base is always 0x14(FS). 5133 pp.From = p.From 5134 5135 pp.From.Type = obj.TYPE_MEM 5136 pp.From.Reg = REG_FS 5137 pp.From.Offset = 0x14 5138 pp.From.Index = REG_NONE 5139 pp.From.Scale = 0 5140 ab.Put2(0x64, // FS 5141 0x8B) 5142 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5143 } 5144 break 5145 } 5146 5147 switch ctxt.Headtype { 5148 default: 5149 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5150 5151 case objabi.Hlinux, objabi.Hfreebsd: 5152 if !ctxt.Flag_shared { 5153 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 5154 } 5155 // Note that this is not generating the same insn as the other cases. 5156 // MOV TLS, R_to 5157 // becomes 5158 // movq g@gottpoff(%rip), R_to 5159 // which is encoded as 5160 // movq 0(%rip), R_to 5161 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 5162 // is g, which we can't check here, but will when we assemble the second 5163 // instruction. 5164 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 5165 5166 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 5167 r = obj.Addrel(cursym) 5168 r.Off = int32(p.Pc + int64(ab.Len())) 5169 r.Type = objabi.R_TLS_IE 5170 r.Siz = 4 5171 r.Add = -4 5172 ab.PutInt32(0) 5173 5174 case objabi.Hplan9: 5175 pp.From = obj.Addr{} 5176 pp.From.Type = obj.TYPE_MEM 5177 pp.From.Name = obj.NAME_EXTERN 5178 pp.From.Sym = plan9privates 5179 pp.From.Offset = 0 5180 pp.From.Index = REG_NONE 5181 ab.rexflag |= Pw 5182 ab.Put1(0x8B) 5183 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5184 5185 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5186 // TLS base is 0(FS). 5187 pp.From = p.From 5188 5189 pp.From.Type = obj.TYPE_MEM 5190 pp.From.Name = obj.NAME_NONE 5191 pp.From.Reg = REG_NONE 5192 pp.From.Offset = 0 5193 pp.From.Index = REG_NONE 5194 pp.From.Scale = 0 5195 ab.rexflag |= Pw 5196 ab.Put2(0x64, // FS 5197 0x8B) 5198 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5199 5200 case objabi.Hwindows: 5201 // Windows TLS base is always 0x28(GS). 5202 pp.From = p.From 5203 5204 pp.From.Type = obj.TYPE_MEM 5205 pp.From.Name = obj.NAME_NONE 5206 pp.From.Reg = REG_GS 5207 pp.From.Offset = 0x28 5208 pp.From.Index = REG_NONE 5209 pp.From.Scale = 0 5210 ab.rexflag |= Pw 5211 ab.Put2(0x65, // GS 5212 0x8B) 5213 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5214 } 5215 } 5216 return 5217 } 5218 } 5219 } 5220 goto bad 5221 5222 bad: 5223 if ctxt.Arch.Family != sys.AMD64 { 5224 // here, the assembly has failed. 5225 // if it's a byte instruction that has 5226 // unaddressable registers, try to 5227 // exchange registers and reissue the 5228 // instruction with the operands renamed. 5229 pp := *p 5230 5231 unbytereg(&pp.From, &pp.Ft) 5232 unbytereg(&pp.To, &pp.Tt) 5233 5234 z := int(p.From.Reg) 5235 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5236 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5237 // For now, different to keep bit-for-bit compatibility. 5238 if ctxt.Arch.Family == sys.I386 { 5239 breg := byteswapreg(ctxt, &p.To) 5240 if breg != REG_AX { 5241 ab.Put1(0x87) // xchg lhs,bx 5242 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5243 subreg(&pp, z, breg) 5244 ab.doasm(ctxt, cursym, &pp) 5245 ab.Put1(0x87) // xchg lhs,bx 5246 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5247 } else { 5248 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5249 subreg(&pp, z, REG_AX) 5250 ab.doasm(ctxt, cursym, &pp) 5251 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5252 } 5253 return 5254 } 5255 5256 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5257 // We certainly don't want to exchange 5258 // with AX if the op is MUL or DIV. 5259 ab.Put1(0x87) // xchg lhs,bx 5260 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5261 subreg(&pp, z, REG_BX) 5262 ab.doasm(ctxt, cursym, &pp) 5263 ab.Put1(0x87) // xchg lhs,bx 5264 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5265 } else { 5266 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5267 subreg(&pp, z, REG_AX) 5268 ab.doasm(ctxt, cursym, &pp) 5269 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5270 } 5271 return 5272 } 5273 5274 z = int(p.To.Reg) 5275 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5276 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5277 // For now, different to keep bit-for-bit compatibility. 5278 if ctxt.Arch.Family == sys.I386 { 5279 breg := byteswapreg(ctxt, &p.From) 5280 if breg != REG_AX { 5281 ab.Put1(0x87) //xchg rhs,bx 5282 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5283 subreg(&pp, z, breg) 5284 ab.doasm(ctxt, cursym, &pp) 5285 ab.Put1(0x87) // xchg rhs,bx 5286 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5287 } else { 5288 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5289 subreg(&pp, z, REG_AX) 5290 ab.doasm(ctxt, cursym, &pp) 5291 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5292 } 5293 return 5294 } 5295 5296 if isax(&p.From) { 5297 ab.Put1(0x87) // xchg rhs,bx 5298 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5299 subreg(&pp, z, REG_BX) 5300 ab.doasm(ctxt, cursym, &pp) 5301 ab.Put1(0x87) // xchg rhs,bx 5302 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5303 } else { 5304 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5305 subreg(&pp, z, REG_AX) 5306 ab.doasm(ctxt, cursym, &pp) 5307 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5308 } 5309 return 5310 } 5311 } 5312 5313 ctxt.Diag("invalid instruction: %v", p) 5314 } 5315 5316 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5317 // which is not referenced in a. 5318 // If a is empty, it returns BX to account for MULB-like instructions 5319 // that might use DX and AX. 5320 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5321 cana, canb, canc, cand := true, true, true, true 5322 if a.Type == obj.TYPE_NONE { 5323 cana, cand = false, false 5324 } 5325 5326 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5327 switch a.Reg { 5328 case REG_NONE: 5329 cana, cand = false, false 5330 case REG_AX, REG_AL, REG_AH: 5331 cana = false 5332 case REG_BX, REG_BL, REG_BH: 5333 canb = false 5334 case REG_CX, REG_CL, REG_CH: 5335 canc = false 5336 case REG_DX, REG_DL, REG_DH: 5337 cand = false 5338 } 5339 } 5340 5341 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5342 switch a.Index { 5343 case REG_AX: 5344 cana = false 5345 case REG_BX: 5346 canb = false 5347 case REG_CX: 5348 canc = false 5349 case REG_DX: 5350 cand = false 5351 } 5352 } 5353 5354 switch { 5355 case cana: 5356 return REG_AX 5357 case canb: 5358 return REG_BX 5359 case canc: 5360 return REG_CX 5361 case cand: 5362 return REG_DX 5363 default: 5364 ctxt.Diag("impossible byte register") 5365 ctxt.DiagFlush() 5366 log.Fatalf("bad code") 5367 return 0 5368 } 5369 } 5370 5371 func isbadbyte(a *obj.Addr) bool { 5372 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5373 } 5374 5375 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5376 ab.Reset() 5377 5378 ab.rexflag = 0 5379 ab.vexflag = false 5380 ab.evexflag = false 5381 mark := ab.Len() 5382 ab.doasm(ctxt, cursym, p) 5383 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5384 // as befits the whole approach of the architecture, 5385 // the rex prefix must appear before the first opcode byte 5386 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5387 // before the 0f opcode escape!), or it might be ignored. 5388 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5389 if ctxt.Arch.Family != sys.AMD64 { 5390 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5391 } 5392 n := ab.Len() 5393 var np int 5394 for np = mark; np < n; np++ { 5395 c := ab.At(np) 5396 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5397 break 5398 } 5399 } 5400 ab.Insert(np, byte(0x40|ab.rexflag)) 5401 } 5402 5403 n := ab.Len() 5404 for i := len(cursym.R) - 1; i >= 0; i-- { 5405 r := &cursym.R[i] 5406 if int64(r.Off) < p.Pc { 5407 break 5408 } 5409 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5410 r.Off++ 5411 } 5412 if r.Type == objabi.R_PCREL { 5413 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5414 // PC-relative addressing is relative to the end of the instruction, 5415 // but the relocations applied by the linker are relative to the end 5416 // of the relocation. Because immediate instruction 5417 // arguments can follow the PC-relative memory reference in the 5418 // instruction encoding, the two may not coincide. In this case, 5419 // adjust addend so that linker can keep relocating relative to the 5420 // end of the relocation. 5421 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5422 } else if ctxt.Arch.Family == sys.I386 { 5423 // On 386 PC-relative addressing (for non-call/jmp instructions) 5424 // assumes that the previous instruction loaded the PC of the end 5425 // of that instruction into CX, so the adjustment is relative to 5426 // that. 5427 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5428 } 5429 } 5430 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5431 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5432 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5433 } 5434 5435 } 5436 } 5437 5438 // unpackOps4 extracts 4 operands from p. 5439 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5440 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To 5441 } 5442 5443 // unpackOps5 extracts 5 operands from p. 5444 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5445 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To 5446 }