gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

asm6.go (149461B)


      1 // Inferno utils/6l/span.c
      2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
      3 //
      4 //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
      5 //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
      6 //	Portions Copyright © 1997-1999 Vita Nuova Limited
      7 //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
      8 //	Portions Copyright © 2004,2006 Bruce Ellis
      9 //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
     10 //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
     11 //	Portions Copyright © 2009 The Go Authors. All rights reserved.
     12 //
     13 // Permission is hereby granted, free of charge, to any person obtaining a copy
     14 // of this software and associated documentation files (the "Software"), to deal
     15 // in the Software without restriction, including without limitation the rights
     16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     17 // copies of the Software, and to permit persons to whom the Software is
     18 // furnished to do so, subject to the following conditions:
     19 //
     20 // The above copyright notice and this permission notice shall be included in
     21 // all copies or substantial portions of the Software.
     22 //
     23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     29 // THE SOFTWARE.
     30 
     31 package x86
     32 
     33 import (
     34 	"github.com/twitchyliquid64/golang-asm/obj"
     35 	"github.com/twitchyliquid64/golang-asm/objabi"
     36 	"github.com/twitchyliquid64/golang-asm/sys"
     37 	"encoding/binary"
     38 	"fmt"
     39 	"log"
     40 	"strings"
     41 )
     42 
     43 var (
     44 	plan9privates *obj.LSym
     45 	deferreturn   *obj.LSym
     46 )
     47 
     48 // Instruction layout.
     49 
     50 // Loop alignment constants:
     51 // want to align loop entry to loopAlign-byte boundary,
     52 // and willing to insert at most maxLoopPad bytes of NOP to do so.
     53 // We define a loop entry as the target of a backward jump.
     54 //
     55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
     56 // and it aligns all jump targets, not just backward jump targets.
     57 //
     58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
     59 // is very slight but negative, so the alignment is disabled by
     60 // setting MaxLoopPad = 0. The code is here for reference and
     61 // for future experiments.
     62 //
     63 const (
     64 	loopAlign  = 16
     65 	maxLoopPad = 0
     66 )
     67 
     68 // Bit flags that are used to express jump target properties.
     69 const (
     70 	// branchBackwards marks targets that are located behind.
     71 	// Used to express jumps to loop headers.
     72 	branchBackwards = (1 << iota)
     73 	// branchShort marks branches those target is close,
     74 	// with offset is in -128..127 range.
     75 	branchShort
     76 	// branchLoopHead marks loop entry.
     77 	// Used to insert padding for misaligned loops.
     78 	branchLoopHead
     79 )
     80 
     81 // opBytes holds optab encoding bytes.
     82 // Each ytab reserves fixed amount of bytes in this array.
     83 //
     84 // The size should be the minimal number of bytes that
     85 // are enough to hold biggest optab op lines.
     86 type opBytes [31]uint8
     87 
     88 type Optab struct {
     89 	as     obj.As
     90 	ytab   []ytab
     91 	prefix uint8
     92 	op     opBytes
     93 }
     94 
     95 type movtab struct {
     96 	as   obj.As
     97 	ft   uint8
     98 	f3t  uint8
     99 	tt   uint8
    100 	code uint8
    101 	op   [4]uint8
    102 }
    103 
    104 const (
    105 	Yxxx = iota
    106 	Ynone
    107 	Yi0 // $0
    108 	Yi1 // $1
    109 	Yu2 // $x, x fits in uint2
    110 	Yi8 // $x, x fits in int8
    111 	Yu8 // $x, x fits in uint8
    112 	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    113 	Ys32
    114 	Yi32
    115 	Yi64
    116 	Yiauto
    117 	Yal
    118 	Ycl
    119 	Yax
    120 	Ycx
    121 	Yrb
    122 	Yrl
    123 	Yrl32 // Yrl on 32-bit system
    124 	Yrf
    125 	Yf0
    126 	Yrx
    127 	Ymb
    128 	Yml
    129 	Ym
    130 	Ybr
    131 	Ycs
    132 	Yss
    133 	Yds
    134 	Yes
    135 	Yfs
    136 	Ygs
    137 	Ygdtr
    138 	Yidtr
    139 	Yldtr
    140 	Ymsw
    141 	Ytask
    142 	Ycr0
    143 	Ycr1
    144 	Ycr2
    145 	Ycr3
    146 	Ycr4
    147 	Ycr5
    148 	Ycr6
    149 	Ycr7
    150 	Ycr8
    151 	Ydr0
    152 	Ydr1
    153 	Ydr2
    154 	Ydr3
    155 	Ydr4
    156 	Ydr5
    157 	Ydr6
    158 	Ydr7
    159 	Ytr0
    160 	Ytr1
    161 	Ytr2
    162 	Ytr3
    163 	Ytr4
    164 	Ytr5
    165 	Ytr6
    166 	Ytr7
    167 	Ymr
    168 	Ymm
    169 	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
    170 	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
    171 	Yxr           // X0..X15
    172 	YxrEvex       // X0..X31
    173 	Yxm
    174 	YxmEvex       // YxrEvex+Ym
    175 	Yxvm          // VSIB vector array; vm32x/vm64x
    176 	YxvmEvex      // Yxvm which permits High-16 X register as index.
    177 	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
    178 	Yyr           // Y0..Y15
    179 	YyrEvex       // Y0..Y31
    180 	Yym
    181 	YymEvex   // YyrEvex+Ym
    182 	Yyvm      // VSIB vector array; vm32y/vm64y
    183 	YyvmEvex  // Yyvm which permits High-16 Y register as index.
    184 	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
    185 	Yzr       // Z0..Z31
    186 	Yzm       // Yzr+Ym
    187 	Yzvm      // VSIB vector array; vm32z/vm64z
    188 	Yk0       // K0
    189 	Yknot0    // K1..K7; write mask
    190 	Yk        // K0..K7; used for KOP
    191 	Ykm       // Yk+Ym; used for KOP
    192 	Ytls
    193 	Ytextsize
    194 	Yindir
    195 	Ymax
    196 )
    197 
    198 const (
    199 	Zxxx = iota
    200 	Zlit
    201 	Zlitm_r
    202 	Zlitr_m
    203 	Zlit_m_r
    204 	Z_rp
    205 	Zbr
    206 	Zcall
    207 	Zcallcon
    208 	Zcallduff
    209 	Zcallind
    210 	Zcallindreg
    211 	Zib_
    212 	Zib_rp
    213 	Zibo_m
    214 	Zibo_m_xm
    215 	Zil_
    216 	Zil_rp
    217 	Ziq_rp
    218 	Zilo_m
    219 	Zjmp
    220 	Zjmpcon
    221 	Zloop
    222 	Zo_iw
    223 	Zm_o
    224 	Zm_r
    225 	Z_m_r
    226 	Zm2_r
    227 	Zm_r_xm
    228 	Zm_r_i_xm
    229 	Zm_r_xm_nr
    230 	Zr_m_xm_nr
    231 	Zibm_r // mmx1,mmx2/mem64,imm8
    232 	Zibr_m
    233 	Zmb_r
    234 	Zaut_r
    235 	Zo_m
    236 	Zo_m64
    237 	Zpseudo
    238 	Zr_m
    239 	Zr_m_xm
    240 	Zrp_
    241 	Z_ib
    242 	Z_il
    243 	Zm_ibo
    244 	Zm_ilo
    245 	Zib_rr
    246 	Zil_rr
    247 	Zbyte
    248 
    249 	Zvex_rm_v_r
    250 	Zvex_rm_v_ro
    251 	Zvex_r_v_rm
    252 	Zvex_i_rm_vo
    253 	Zvex_v_rm_r
    254 	Zvex_i_rm_r
    255 	Zvex_i_r_v
    256 	Zvex_i_rm_v_r
    257 	Zvex
    258 	Zvex_rm_r_vo
    259 	Zvex_i_r_rm
    260 	Zvex_hr_rm_v_r
    261 
    262 	Zevex_first
    263 	Zevex_i_r_k_rm
    264 	Zevex_i_r_rm
    265 	Zevex_i_rm_k_r
    266 	Zevex_i_rm_k_vo
    267 	Zevex_i_rm_r
    268 	Zevex_i_rm_v_k_r
    269 	Zevex_i_rm_v_r
    270 	Zevex_i_rm_vo
    271 	Zevex_k_rmo
    272 	Zevex_r_k_rm
    273 	Zevex_r_v_k_rm
    274 	Zevex_r_v_rm
    275 	Zevex_rm_k_r
    276 	Zevex_rm_v_k_r
    277 	Zevex_rm_v_r
    278 	Zevex_last
    279 
    280 	Zmax
    281 )
    282 
    283 const (
    284 	Px   = 0
    285 	Px1  = 1    // symbolic; exact value doesn't matter
    286 	P32  = 0x32 // 32-bit only
    287 	Pe   = 0x66 // operand escape
    288 	Pm   = 0x0f // 2byte opcode escape
    289 	Pq   = 0xff // both escapes: 66 0f
    290 	Pb   = 0xfe // byte operands
    291 	Pf2  = 0xf2 // xmm escape 1: f2 0f
    292 	Pf3  = 0xf3 // xmm escape 2: f3 0f
    293 	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
    294 	Pq3  = 0x67 // xmm escape 3: 66 48 0f
    295 	Pq4  = 0x68 // xmm escape 4: 66 0F 38
    296 	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
    297 	Pq5  = 0x6a // xmm escape 5: F3 0F 38
    298 	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
    299 	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
    300 	Pw   = 0x48 // Rex.w
    301 	Pw8  = 0x90 // symbolic; exact value doesn't matter
    302 	Py   = 0x80 // defaults to 64-bit mode
    303 	Py1  = 0x81 // symbolic; exact value doesn't matter
    304 	Py3  = 0x83 // symbolic; exact value doesn't matter
    305 	Pavx = 0x84 // symbolic: exact value doesn't matter
    306 
    307 	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
    308 	Rxw     = 1 << 3 // =1, 64-bit operand size
    309 	Rxr     = 1 << 2 // extend modrm reg
    310 	Rxx     = 1 << 1 // extend sib index
    311 	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
    312 )
    313 
    314 const (
    315 	// Encoding for VEX prefix in tables.
    316 	// The P, L, and W fields are chosen to match
    317 	// their eventual locations in the VEX prefix bytes.
    318 
    319 	// Encoding for VEX prefix in tables.
    320 	// The P, L, and W fields are chosen to match
    321 	// their eventual locations in the VEX prefix bytes.
    322 
    323 	// Using spare bit to make leading [E]VEX encoding byte different from
    324 	// 0x0f even if all other VEX fields are 0.
    325 	avxEscape = 1 << 6
    326 
    327 	// P field - 2 bits
    328 	vex66 = 1 << 0
    329 	vexF3 = 2 << 0
    330 	vexF2 = 3 << 0
    331 	// L field - 1 bit
    332 	vexLZ  = 0 << 2
    333 	vexLIG = 0 << 2
    334 	vex128 = 0 << 2
    335 	vex256 = 1 << 2
    336 	// W field - 1 bit
    337 	vexWIG = 0 << 7
    338 	vexW0  = 0 << 7
    339 	vexW1  = 1 << 7
    340 	// M field - 5 bits, but mostly reserved; we can store up to 3
    341 	vex0F   = 1 << 3
    342 	vex0F38 = 2 << 3
    343 	vex0F3A = 3 << 3
    344 )
    345 
    346 var ycover [Ymax * Ymax]uint8
    347 
    348 var reg [MAXREG]int
    349 
    350 var regrex [MAXREG + 1]int
    351 
    352 var ynone = []ytab{
    353 	{Zlit, 1, argList{}},
    354 }
    355 
    356 var ytext = []ytab{
    357 	{Zpseudo, 0, argList{Ymb, Ytextsize}},
    358 	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
    359 }
    360 
    361 var ynop = []ytab{
    362 	{Zpseudo, 0, argList{}},
    363 	{Zpseudo, 0, argList{Yiauto}},
    364 	{Zpseudo, 0, argList{Yml}},
    365 	{Zpseudo, 0, argList{Yrf}},
    366 	{Zpseudo, 0, argList{Yxr}},
    367 	{Zpseudo, 0, argList{Yiauto}},
    368 	{Zpseudo, 0, argList{Yml}},
    369 	{Zpseudo, 0, argList{Yrf}},
    370 	{Zpseudo, 1, argList{Yxr}},
    371 }
    372 
    373 var yfuncdata = []ytab{
    374 	{Zpseudo, 0, argList{Yi32, Ym}},
    375 }
    376 
    377 var ypcdata = []ytab{
    378 	{Zpseudo, 0, argList{Yi32, Yi32}},
    379 }
    380 
    381 var yxorb = []ytab{
    382 	{Zib_, 1, argList{Yi32, Yal}},
    383 	{Zibo_m, 2, argList{Yi32, Ymb}},
    384 	{Zr_m, 1, argList{Yrb, Ymb}},
    385 	{Zm_r, 1, argList{Ymb, Yrb}},
    386 }
    387 
    388 var yaddl = []ytab{
    389 	{Zibo_m, 2, argList{Yi8, Yml}},
    390 	{Zil_, 1, argList{Yi32, Yax}},
    391 	{Zilo_m, 2, argList{Yi32, Yml}},
    392 	{Zr_m, 1, argList{Yrl, Yml}},
    393 	{Zm_r, 1, argList{Yml, Yrl}},
    394 }
    395 
    396 var yincl = []ytab{
    397 	{Z_rp, 1, argList{Yrl}},
    398 	{Zo_m, 2, argList{Yml}},
    399 }
    400 
    401 var yincq = []ytab{
    402 	{Zo_m, 2, argList{Yml}},
    403 }
    404 
    405 var ycmpb = []ytab{
    406 	{Z_ib, 1, argList{Yal, Yi32}},
    407 	{Zm_ibo, 2, argList{Ymb, Yi32}},
    408 	{Zm_r, 1, argList{Ymb, Yrb}},
    409 	{Zr_m, 1, argList{Yrb, Ymb}},
    410 }
    411 
    412 var ycmpl = []ytab{
    413 	{Zm_ibo, 2, argList{Yml, Yi8}},
    414 	{Z_il, 1, argList{Yax, Yi32}},
    415 	{Zm_ilo, 2, argList{Yml, Yi32}},
    416 	{Zm_r, 1, argList{Yml, Yrl}},
    417 	{Zr_m, 1, argList{Yrl, Yml}},
    418 }
    419 
    420 var yshb = []ytab{
    421 	{Zo_m, 2, argList{Yi1, Ymb}},
    422 	{Zibo_m, 2, argList{Yu8, Ymb}},
    423 	{Zo_m, 2, argList{Ycx, Ymb}},
    424 }
    425 
    426 var yshl = []ytab{
    427 	{Zo_m, 2, argList{Yi1, Yml}},
    428 	{Zibo_m, 2, argList{Yu8, Yml}},
    429 	{Zo_m, 2, argList{Ycl, Yml}},
    430 	{Zo_m, 2, argList{Ycx, Yml}},
    431 }
    432 
    433 var ytestl = []ytab{
    434 	{Zil_, 1, argList{Yi32, Yax}},
    435 	{Zilo_m, 2, argList{Yi32, Yml}},
    436 	{Zr_m, 1, argList{Yrl, Yml}},
    437 	{Zm_r, 1, argList{Yml, Yrl}},
    438 }
    439 
    440 var ymovb = []ytab{
    441 	{Zr_m, 1, argList{Yrb, Ymb}},
    442 	{Zm_r, 1, argList{Ymb, Yrb}},
    443 	{Zib_rp, 1, argList{Yi32, Yrb}},
    444 	{Zibo_m, 2, argList{Yi32, Ymb}},
    445 }
    446 
    447 var ybtl = []ytab{
    448 	{Zibo_m, 2, argList{Yi8, Yml}},
    449 	{Zr_m, 1, argList{Yrl, Yml}},
    450 }
    451 
    452 var ymovw = []ytab{
    453 	{Zr_m, 1, argList{Yrl, Yml}},
    454 	{Zm_r, 1, argList{Yml, Yrl}},
    455 	{Zil_rp, 1, argList{Yi32, Yrl}},
    456 	{Zilo_m, 2, argList{Yi32, Yml}},
    457 	{Zaut_r, 2, argList{Yiauto, Yrl}},
    458 }
    459 
    460 var ymovl = []ytab{
    461 	{Zr_m, 1, argList{Yrl, Yml}},
    462 	{Zm_r, 1, argList{Yml, Yrl}},
    463 	{Zil_rp, 1, argList{Yi32, Yrl}},
    464 	{Zilo_m, 2, argList{Yi32, Yml}},
    465 	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
    466 	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
    467 	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
    468 	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
    469 	{Zaut_r, 2, argList{Yiauto, Yrl}},
    470 }
    471 
    472 var yret = []ytab{
    473 	{Zo_iw, 1, argList{}},
    474 	{Zo_iw, 1, argList{Yi32}},
    475 }
    476 
    477 var ymovq = []ytab{
    478 	// valid in 32-bit mode
    479 	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
    480 	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
    481 	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
    482 	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
    483 	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
    484 
    485 	// valid only in 64-bit mode, usually with 64-bit prefix
    486 	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
    487 	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
    488 	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
    489 	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
    490 	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
    491 	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
    492 	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
    493 	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
    494 	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
    495 	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
    496 }
    497 
    498 var ymovbe = []ytab{
    499 	{Zlitm_r, 3, argList{Ym, Yrl}},
    500 	{Zlitr_m, 3, argList{Yrl, Ym}},
    501 }
    502 
    503 var ym_rl = []ytab{
    504 	{Zm_r, 1, argList{Ym, Yrl}},
    505 }
    506 
    507 var yrl_m = []ytab{
    508 	{Zr_m, 1, argList{Yrl, Ym}},
    509 }
    510 
    511 var ymb_rl = []ytab{
    512 	{Zmb_r, 1, argList{Ymb, Yrl}},
    513 }
    514 
    515 var yml_rl = []ytab{
    516 	{Zm_r, 1, argList{Yml, Yrl}},
    517 }
    518 
    519 var yrl_ml = []ytab{
    520 	{Zr_m, 1, argList{Yrl, Yml}},
    521 }
    522 
    523 var yml_mb = []ytab{
    524 	{Zr_m, 1, argList{Yrb, Ymb}},
    525 	{Zm_r, 1, argList{Ymb, Yrb}},
    526 }
    527 
    528 var yrb_mb = []ytab{
    529 	{Zr_m, 1, argList{Yrb, Ymb}},
    530 }
    531 
    532 var yxchg = []ytab{
    533 	{Z_rp, 1, argList{Yax, Yrl}},
    534 	{Zrp_, 1, argList{Yrl, Yax}},
    535 	{Zr_m, 1, argList{Yrl, Yml}},
    536 	{Zm_r, 1, argList{Yml, Yrl}},
    537 }
    538 
    539 var ydivl = []ytab{
    540 	{Zm_o, 2, argList{Yml}},
    541 }
    542 
    543 var ydivb = []ytab{
    544 	{Zm_o, 2, argList{Ymb}},
    545 }
    546 
    547 var yimul = []ytab{
    548 	{Zm_o, 2, argList{Yml}},
    549 	{Zib_rr, 1, argList{Yi8, Yrl}},
    550 	{Zil_rr, 1, argList{Yi32, Yrl}},
    551 	{Zm_r, 2, argList{Yml, Yrl}},
    552 }
    553 
    554 var yimul3 = []ytab{
    555 	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
    556 	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
    557 }
    558 
    559 var ybyte = []ytab{
    560 	{Zbyte, 1, argList{Yi64}},
    561 }
    562 
    563 var yin = []ytab{
    564 	{Zib_, 1, argList{Yi32}},
    565 	{Zlit, 1, argList{}},
    566 }
    567 
    568 var yint = []ytab{
    569 	{Zib_, 1, argList{Yi32}},
    570 }
    571 
    572 var ypushl = []ytab{
    573 	{Zrp_, 1, argList{Yrl}},
    574 	{Zm_o, 2, argList{Ym}},
    575 	{Zib_, 1, argList{Yi8}},
    576 	{Zil_, 1, argList{Yi32}},
    577 }
    578 
    579 var ypopl = []ytab{
    580 	{Z_rp, 1, argList{Yrl}},
    581 	{Zo_m, 2, argList{Ym}},
    582 }
    583 
    584 var ywrfsbase = []ytab{
    585 	{Zm_o, 2, argList{Yrl}},
    586 }
    587 
    588 var yrdrand = []ytab{
    589 	{Zo_m, 2, argList{Yrl}},
    590 }
    591 
    592 var yclflush = []ytab{
    593 	{Zo_m, 2, argList{Ym}},
    594 }
    595 
    596 var ybswap = []ytab{
    597 	{Z_rp, 2, argList{Yrl}},
    598 }
    599 
    600 var yscond = []ytab{
    601 	{Zo_m, 2, argList{Ymb}},
    602 }
    603 
    604 var yjcond = []ytab{
    605 	{Zbr, 0, argList{Ybr}},
    606 	{Zbr, 0, argList{Yi0, Ybr}},
    607 	{Zbr, 1, argList{Yi1, Ybr}},
    608 }
    609 
    610 var yloop = []ytab{
    611 	{Zloop, 1, argList{Ybr}},
    612 }
    613 
    614 var ycall = []ytab{
    615 	{Zcallindreg, 0, argList{Yml}},
    616 	{Zcallindreg, 2, argList{Yrx, Yrx}},
    617 	{Zcallind, 2, argList{Yindir}},
    618 	{Zcall, 0, argList{Ybr}},
    619 	{Zcallcon, 1, argList{Yi32}},
    620 }
    621 
    622 var yduff = []ytab{
    623 	{Zcallduff, 1, argList{Yi32}},
    624 }
    625 
    626 var yjmp = []ytab{
    627 	{Zo_m64, 2, argList{Yml}},
    628 	{Zjmp, 0, argList{Ybr}},
    629 	{Zjmpcon, 1, argList{Yi32}},
    630 }
    631 
    632 var yfmvd = []ytab{
    633 	{Zm_o, 2, argList{Ym, Yf0}},
    634 	{Zo_m, 2, argList{Yf0, Ym}},
    635 	{Zm_o, 2, argList{Yrf, Yf0}},
    636 	{Zo_m, 2, argList{Yf0, Yrf}},
    637 }
    638 
    639 var yfmvdp = []ytab{
    640 	{Zo_m, 2, argList{Yf0, Ym}},
    641 	{Zo_m, 2, argList{Yf0, Yrf}},
    642 }
    643 
    644 var yfmvf = []ytab{
    645 	{Zm_o, 2, argList{Ym, Yf0}},
    646 	{Zo_m, 2, argList{Yf0, Ym}},
    647 }
    648 
    649 var yfmvx = []ytab{
    650 	{Zm_o, 2, argList{Ym, Yf0}},
    651 }
    652 
    653 var yfmvp = []ytab{
    654 	{Zo_m, 2, argList{Yf0, Ym}},
    655 }
    656 
    657 var yfcmv = []ytab{
    658 	{Zm_o, 2, argList{Yrf, Yf0}},
    659 }
    660 
    661 var yfadd = []ytab{
    662 	{Zm_o, 2, argList{Ym, Yf0}},
    663 	{Zm_o, 2, argList{Yrf, Yf0}},
    664 	{Zo_m, 2, argList{Yf0, Yrf}},
    665 }
    666 
    667 var yfxch = []ytab{
    668 	{Zo_m, 2, argList{Yf0, Yrf}},
    669 	{Zm_o, 2, argList{Yrf, Yf0}},
    670 }
    671 
    672 var ycompp = []ytab{
    673 	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
    674 }
    675 
    676 var ystsw = []ytab{
    677 	{Zo_m, 2, argList{Ym}},
    678 	{Zlit, 1, argList{Yax}},
    679 }
    680 
    681 var ysvrs_mo = []ytab{
    682 	{Zm_o, 2, argList{Ym}},
    683 }
    684 
    685 // unaryDst version of "ysvrs_mo".
    686 var ysvrs_om = []ytab{
    687 	{Zo_m, 2, argList{Ym}},
    688 }
    689 
    690 var ymm = []ytab{
    691 	{Zm_r_xm, 1, argList{Ymm, Ymr}},
    692 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    693 }
    694 
    695 var yxm = []ytab{
    696 	{Zm_r_xm, 1, argList{Yxm, Yxr}},
    697 }
    698 
    699 var yxm_q4 = []ytab{
    700 	{Zm_r, 1, argList{Yxm, Yxr}},
    701 }
    702 
    703 var yxcvm1 = []ytab{
    704 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    705 	{Zm_r_xm, 2, argList{Yxm, Ymr}},
    706 }
    707 
    708 var yxcvm2 = []ytab{
    709 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    710 	{Zm_r_xm, 2, argList{Ymm, Yxr}},
    711 }
    712 
    713 var yxr = []ytab{
    714 	{Zm_r_xm, 1, argList{Yxr, Yxr}},
    715 }
    716 
    717 var yxr_ml = []ytab{
    718 	{Zr_m_xm, 1, argList{Yxr, Yml}},
    719 }
    720 
    721 var ymr = []ytab{
    722 	{Zm_r, 1, argList{Ymr, Ymr}},
    723 }
    724 
    725 var ymr_ml = []ytab{
    726 	{Zr_m_xm, 1, argList{Ymr, Yml}},
    727 }
    728 
    729 var yxcmpi = []ytab{
    730 	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
    731 }
    732 
    733 var yxmov = []ytab{
    734 	{Zm_r_xm, 1, argList{Yxm, Yxr}},
    735 	{Zr_m_xm, 1, argList{Yxr, Yxm}},
    736 }
    737 
    738 var yxcvfl = []ytab{
    739 	{Zm_r_xm, 1, argList{Yxm, Yrl}},
    740 }
    741 
    742 var yxcvlf = []ytab{
    743 	{Zm_r_xm, 1, argList{Yml, Yxr}},
    744 }
    745 
    746 var yxcvfq = []ytab{
    747 	{Zm_r_xm, 2, argList{Yxm, Yrl}},
    748 }
    749 
    750 var yxcvqf = []ytab{
    751 	{Zm_r_xm, 2, argList{Yml, Yxr}},
    752 }
    753 
    754 var yps = []ytab{
    755 	{Zm_r_xm, 1, argList{Ymm, Ymr}},
    756 	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
    757 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    758 	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
    759 }
    760 
    761 var yxrrl = []ytab{
    762 	{Zm_r, 1, argList{Yxr, Yrl}},
    763 }
    764 
    765 var ymrxr = []ytab{
    766 	{Zm_r, 1, argList{Ymr, Yxr}},
    767 	{Zm_r_xm, 1, argList{Yxm, Yxr}},
    768 }
    769 
    770 var ymshuf = []ytab{
    771 	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
    772 }
    773 
    774 var ymshufb = []ytab{
    775 	{Zm2_r, 2, argList{Yxm, Yxr}},
    776 }
    777 
    778 // It should never have more than 1 entry,
    779 // because some optab entries you opcode secuences that
    780 // are longer than 2 bytes (zoffset=2 here),
    781 // ROUNDPD and ROUNDPS and recently added BLENDPD,
    782 // to name a few.
    783 var yxshuf = []ytab{
    784 	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
    785 }
    786 
    787 var yextrw = []ytab{
    788 	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
    789 	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
    790 }
    791 
    792 var yextr = []ytab{
    793 	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
    794 }
    795 
    796 var yinsrw = []ytab{
    797 	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
    798 }
    799 
    800 var yinsr = []ytab{
    801 	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
    802 }
    803 
    804 var ypsdq = []ytab{
    805 	{Zibo_m, 2, argList{Yi8, Yxr}},
    806 }
    807 
    808 var ymskb = []ytab{
    809 	{Zm_r_xm, 2, argList{Yxr, Yrl}},
    810 	{Zm_r_xm, 1, argList{Ymr, Yrl}},
    811 }
    812 
    813 var ycrc32l = []ytab{
    814 	{Zlitm_r, 0, argList{Yml, Yrl}},
    815 }
    816 
    817 var ycrc32b = []ytab{
    818 	{Zlitm_r, 0, argList{Ymb, Yrl}},
    819 }
    820 
    821 var yprefetch = []ytab{
    822 	{Zm_o, 2, argList{Ym}},
    823 }
    824 
    825 var yaes = []ytab{
    826 	{Zlitm_r, 2, argList{Yxm, Yxr}},
    827 }
    828 
    829 var yxbegin = []ytab{
    830 	{Zjmp, 1, argList{Ybr}},
    831 }
    832 
    833 var yxabort = []ytab{
    834 	{Zib_, 1, argList{Yu8}},
    835 }
    836 
    837 var ylddqu = []ytab{
    838 	{Zm_r, 1, argList{Ym, Yxr}},
    839 }
    840 
    841 var ypalignr = []ytab{
    842 	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
    843 }
    844 
    845 var ysha256rnds2 = []ytab{
    846 	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
    847 }
    848 
    849 var yblendvpd = []ytab{
    850 	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
    851 }
    852 
    853 var ymmxmm0f38 = []ytab{
    854 	{Zlitm_r, 3, argList{Ymm, Ymr}},
    855 	{Zlitm_r, 5, argList{Yxm, Yxr}},
    856 }
    857 
    858 var yextractps = []ytab{
    859 	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
    860 }
    861 
    862 var ysha1rnds4 = []ytab{
    863 	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
    864 }
    865 
    866 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
    867 // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
    868 // to find the entry with the given p.As and then looks through the ytable for
    869 // that instruction (the second field in the optab struct) for a line whose
    870 // first two values match the Ytypes of the p.From and p.To operands.  The
    871 // function oclass computes the specific Ytype of an operand and then the set
    872 // of more general Ytypes that it satisfies is implied by the ycover table, set
    873 // up in instinit.  For example, oclass distinguishes the constants 0 and 1
    874 // from the more general 8-bit constants, but instinit says
    875 //
    876 //        ycover[Yi0*Ymax+Ys32] = 1
    877 //        ycover[Yi1*Ymax+Ys32] = 1
    878 //        ycover[Yi8*Ymax+Ys32] = 1
    879 //
    880 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
    881 // if that's what an instruction can handle.
    882 //
    883 // In parallel with the scan through the ytable for the appropriate line, there
    884 // is a z pointer that starts out pointing at the strange magic byte list in
    885 // the Optab struct.  With each step past a non-matching ytable line, z
    886 // advances by the 4th entry in the line.  When a matching line is found, that
    887 // z pointer has the extra data to use in laying down the instruction bytes.
    888 // The actual bytes laid down are a function of the 3rd entry in the line (that
    889 // is, the Ztype) and the z bytes.
    890 //
    891 // For example, let's look at AADDL.  The optab line says:
    892 //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    893 //
    894 // and yaddl says
    895 //        var yaddl = []ytab{
    896 //                {Yi8, Ynone, Yml, Zibo_m, 2},
    897 //                {Yi32, Ynone, Yax, Zil_, 1},
    898 //                {Yi32, Ynone, Yml, Zilo_m, 2},
    899 //                {Yrl, Ynone, Yml, Zr_m, 1},
    900 //                {Yml, Ynone, Yrl, Zm_r, 1},
    901 //        }
    902 //
    903 // so there are 5 possible types of ADDL instruction that can be laid down, and
    904 // possible states used to lay them down (Ztype and z pointer, assuming z
    905 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
    906 //
    907 //        Yi8, Yml -> Zibo_m, z (0x83, 00)
    908 //        Yi32, Yax -> Zil_, z+2 (0x05)
    909 //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
    910 //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
    911 //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
    912 //
    913 // The Pconstant in the optab line controls the prefix bytes to emit.  That's
    914 // relatively straightforward as this program goes.
    915 //
    916 // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
    917 // example, is an opcode byte (z[0]) then an asmando (which is some kind of
    918 // encoded addressing mode for the Yml arg), and then a single immediate byte.
    919 // Zilo_m is the same but a long (32-bit) immediate.
    920 var optab =
    921 //	as, ytab, andproto, opcode
    922 [...]Optab{
    923 	{obj.AXXX, nil, 0, opBytes{}},
    924 	{AAAA, ynone, P32, opBytes{0x37}},
    925 	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
    926 	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
    927 	{AAAS, ynone, P32, opBytes{0x3f}},
    928 	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
    929 	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    930 	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    931 	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    932 	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
    933 	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
    934 	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
    935 	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    936 	{AADDPD, yxm, Pq, opBytes{0x58}},
    937 	{AADDPS, yxm, Pm, opBytes{0x58}},
    938 	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    939 	{AADDSD, yxm, Pf2, opBytes{0x58}},
    940 	{AADDSS, yxm, Pf3, opBytes{0x58}},
    941 	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
    942 	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
    943 	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    944 	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
    945 	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
    946 	{AADJSP, nil, 0, opBytes{}},
    947 	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
    948 	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    949 	{AANDNPD, yxm, Pq, opBytes{0x55}},
    950 	{AANDNPS, yxm, Pm, opBytes{0x55}},
    951 	{AANDPD, yxm, Pq, opBytes{0x54}},
    952 	{AANDPS, yxm, Pm, opBytes{0x54}},
    953 	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    954 	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    955 	{AARPL, yrl_ml, P32, opBytes{0x63}},
    956 	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
    957 	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
    958 	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
    959 	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
    960 	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
    961 	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
    962 	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
    963 	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
    964 	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
    965 	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
    966 	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
    967 	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
    968 	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
    969 	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
    970 	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
    971 	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
    972 	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
    973 	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
    974 	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
    975 	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
    976 	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
    977 	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
    978 	{ABYTE, ybyte, Px, opBytes{1}},
    979 	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
    980 	{ACBW, ynone, Pe, opBytes{0x98}},
    981 	{ACDQ, ynone, Px, opBytes{0x99}},
    982 	{ACDQE, ynone, Pw, opBytes{0x98}},
    983 	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
    984 	{ACLC, ynone, Px, opBytes{0xf8}},
    985 	{ACLD, ynone, Px, opBytes{0xfc}},
    986 	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
    987 	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
    988 	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
    989 	{ACLI, ynone, Px, opBytes{0xfa}},
    990 	{ACLTS, ynone, Pm, opBytes{0x06}},
    991 	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
    992 	{ACMC, ynone, Px, opBytes{0xf5}},
    993 	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
    994 	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
    995 	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
    996 	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
    997 	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
    998 	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
    999 	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
   1000 	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
   1001 	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
   1002 	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
   1003 	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
   1004 	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
   1005 	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
   1006 	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
   1007 	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
   1008 	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
   1009 	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
   1010 	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
   1011 	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
   1012 	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
   1013 	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
   1014 	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
   1015 	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
   1016 	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
   1017 	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
   1018 	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
   1019 	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
   1020 	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
   1021 	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
   1022 	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
   1023 	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
   1024 	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
   1025 	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
   1026 	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
   1027 	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
   1028 	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
   1029 	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
   1030 	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
   1031 	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
   1032 	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
   1033 	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
   1034 	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
   1035 	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
   1036 	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
   1037 	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
   1038 	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
   1039 	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
   1040 	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
   1041 	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
   1042 	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1043 	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
   1044 	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
   1045 	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1046 	{ACMPSB, ynone, Pb, opBytes{0xa6}},
   1047 	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
   1048 	{ACMPSL, ynone, Px, opBytes{0xa7}},
   1049 	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
   1050 	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
   1051 	{ACMPSW, ynone, Pe, opBytes{0xa7}},
   1052 	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1053 	{ACOMISD, yxm, Pe, opBytes{0x2f}},
   1054 	{ACOMISS, yxm, Pm, opBytes{0x2f}},
   1055 	{ACPUID, ynone, Pm, opBytes{0xa2}},
   1056 	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
   1057 	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
   1058 	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
   1059 	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
   1060 	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
   1061 	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
   1062 	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
   1063 	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
   1064 	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
   1065 	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
   1066 	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
   1067 	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
   1068 	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
   1069 	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
   1070 	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
   1071 	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
   1072 	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
   1073 	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
   1074 	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
   1075 	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
   1076 	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
   1077 	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
   1078 	{ACWD, ynone, Pe, opBytes{0x99}},
   1079 	{ACWDE, ynone, Px, opBytes{0x98}},
   1080 	{ACQO, ynone, Pw, opBytes{0x99}},
   1081 	{ADAA, ynone, P32, opBytes{0x27}},
   1082 	{ADAS, ynone, P32, opBytes{0x2f}},
   1083 	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
   1084 	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
   1085 	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
   1086 	{ADECW, yincq, Pe, opBytes{0xff, 01}},
   1087 	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
   1088 	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
   1089 	{ADIVPD, yxm, Pe, opBytes{0x5e}},
   1090 	{ADIVPS, yxm, Pm, opBytes{0x5e}},
   1091 	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
   1092 	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
   1093 	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
   1094 	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
   1095 	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
   1096 	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
   1097 	{AEMMS, ynone, Pm, opBytes{0x77}},
   1098 	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
   1099 	{AENTER, nil, 0, opBytes{}}, // botch
   1100 	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
   1101 	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
   1102 	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   1103 	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   1104 	{AHLT, ynone, Px, opBytes{0xf4}},
   1105 	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
   1106 	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
   1107 	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
   1108 	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
   1109 	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
   1110 	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1111 	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1112 	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1113 	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
   1114 	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
   1115 	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
   1116 	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
   1117 	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
   1118 	{AINL, yin, Px, opBytes{0xe5, 0xed}},
   1119 	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
   1120 	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
   1121 	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
   1122 	{AINCW, yincq, Pe, opBytes{0xff, 00}},
   1123 	{AINSB, ynone, Pb, opBytes{0x6c}},
   1124 	{AINSL, ynone, Px, opBytes{0x6d}},
   1125 	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
   1126 	{AINSW, ynone, Pe, opBytes{0x6d}},
   1127 	{AICEBP, ynone, Px, opBytes{0xf1}},
   1128 	{AINT, yint, Px, opBytes{0xcd}},
   1129 	{AINTO, ynone, P32, opBytes{0xce}},
   1130 	{AIRETL, ynone, Px, opBytes{0xcf}},
   1131 	{AIRETQ, ynone, Pw, opBytes{0xcf}},
   1132 	{AIRETW, ynone, Pe, opBytes{0xcf}},
   1133 	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
   1134 	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
   1135 	{AJCXZL, yloop, Px, opBytes{0xe3}},
   1136 	{AJCXZW, yloop, Px, opBytes{0xe3}},
   1137 	{AJCXZQ, yloop, Px, opBytes{0xe3}},
   1138 	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
   1139 	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
   1140 	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
   1141 	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
   1142 	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
   1143 	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
   1144 	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
   1145 	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
   1146 	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
   1147 	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
   1148 	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
   1149 	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
   1150 	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
   1151 	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
   1152 	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
   1153 	{AHADDPD, yxm, Pq, opBytes{0x7c}},
   1154 	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
   1155 	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
   1156 	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
   1157 	{ALAHF, ynone, Px, opBytes{0x9f}},
   1158 	{ALARL, yml_rl, Pm, opBytes{0x02}},
   1159 	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
   1160 	{ALARW, yml_rl, Pq, opBytes{0x02}},
   1161 	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
   1162 	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
   1163 	{ALEAL, ym_rl, Px, opBytes{0x8d}},
   1164 	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
   1165 	{ALEAVEL, ynone, P32, opBytes{0xc9}},
   1166 	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
   1167 	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
   1168 	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
   1169 	{ALOCK, ynone, Px, opBytes{0xf0}},
   1170 	{ALODSB, ynone, Pb, opBytes{0xac}},
   1171 	{ALODSL, ynone, Px, opBytes{0xad}},
   1172 	{ALODSQ, ynone, Pw, opBytes{0xad}},
   1173 	{ALODSW, ynone, Pe, opBytes{0xad}},
   1174 	{ALONG, ybyte, Px, opBytes{4}},
   1175 	{ALOOP, yloop, Px, opBytes{0xe2}},
   1176 	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
   1177 	{ALOOPNE, yloop, Px, opBytes{0xe0}},
   1178 	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
   1179 	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
   1180 	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
   1181 	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
   1182 	{ALSLL, yml_rl, Pm, opBytes{0x03}},
   1183 	{ALSLW, yml_rl, Pq, opBytes{0x03}},
   1184 	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
   1185 	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
   1186 	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
   1187 	{AMAXPD, yxm, Pe, opBytes{0x5f}},
   1188 	{AMAXPS, yxm, Pm, opBytes{0x5f}},
   1189 	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
   1190 	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
   1191 	{AMINPD, yxm, Pe, opBytes{0x5d}},
   1192 	{AMINPS, yxm, Pm, opBytes{0x5d}},
   1193 	{AMINSD, yxm, Pf2, opBytes{0x5d}},
   1194 	{AMINSS, yxm, Pf3, opBytes{0x5d}},
   1195 	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
   1196 	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
   1197 	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
   1198 	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
   1199 	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
   1200 	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
   1201 	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
   1202 	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
   1203 	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
   1204 	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
   1205 	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
   1206 	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
   1207 	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
   1208 	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
   1209 	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
   1210 	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
   1211 	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
   1212 	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
   1213 	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1214 	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
   1215 	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
   1216 	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
   1217 	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
   1218 	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
   1219 	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
   1220 	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
   1221 	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
   1222 	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
   1223 	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
   1224 	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
   1225 	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
   1226 	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1227 	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
   1228 	{AMOVSB, ynone, Pb, opBytes{0xa4}},
   1229 	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
   1230 	{AMOVSL, ynone, Px, opBytes{0xa5}},
   1231 	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
   1232 	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
   1233 	{AMOVSW, ynone, Pe, opBytes{0xa5}},
   1234 	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
   1235 	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
   1236 	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
   1237 	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
   1238 	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
   1239 	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
   1240 	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
   1241 	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
   1242 	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
   1243 	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
   1244 	{AMULPD, yxm, Pe, opBytes{0x59}},
   1245 	{AMULPS, yxm, Ym, opBytes{0x59}},
   1246 	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
   1247 	{AMULSD, yxm, Pf2, opBytes{0x59}},
   1248 	{AMULSS, yxm, Pf3, opBytes{0x59}},
   1249 	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
   1250 	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
   1251 	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
   1252 	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
   1253 	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
   1254 	{obj.ANOP, ynop, Px, opBytes{0, 0}},
   1255 	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
   1256 	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
   1257 	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
   1258 	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
   1259 	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
   1260 	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1261 	{AORPD, yxm, Pq, opBytes{0x56}},
   1262 	{AORPS, yxm, Pm, opBytes{0x56}},
   1263 	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1264 	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1265 	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
   1266 	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
   1267 	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
   1268 	{AOUTSB, ynone, Pb, opBytes{0x6e}},
   1269 	{AOUTSL, ynone, Px, opBytes{0x6f}},
   1270 	{AOUTSW, ynone, Pe, opBytes{0x6f}},
   1271 	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
   1272 	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
   1273 	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
   1274 	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
   1275 	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
   1276 	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
   1277 	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
   1278 	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
   1279 	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
   1280 	{APADDQ, yxm, Pe, opBytes{0xd4}},
   1281 	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
   1282 	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
   1283 	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
   1284 	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
   1285 	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
   1286 	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
   1287 	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
   1288 	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
   1289 	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
   1290 	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
   1291 	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
   1292 	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
   1293 	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
   1294 	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
   1295 	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
   1296 	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
   1297 	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
   1298 	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
   1299 	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
   1300 	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
   1301 	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
   1302 	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
   1303 	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
   1304 	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
   1305 	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
   1306 	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
   1307 	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
   1308 	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
   1309 	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
   1310 	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
   1311 	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
   1312 	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
   1313 	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
   1314 	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
   1315 	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
   1316 	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
   1317 	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
   1318 	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
   1319 	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
   1320 	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
   1321 	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
   1322 	{APMAXSW, yxm, Pe, opBytes{0xee}},
   1323 	{APMAXUB, yxm, Pe, opBytes{0xde}},
   1324 	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
   1325 	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
   1326 	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
   1327 	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
   1328 	{APMINSW, yxm, Pe, opBytes{0xea}},
   1329 	{APMINUB, yxm, Pe, opBytes{0xda}},
   1330 	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
   1331 	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
   1332 	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
   1333 	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
   1334 	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
   1335 	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
   1336 	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
   1337 	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
   1338 	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
   1339 	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
   1340 	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
   1341 	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
   1342 	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
   1343 	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
   1344 	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
   1345 	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
   1346 	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
   1347 	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
   1348 	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
   1349 	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
   1350 	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
   1351 	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
   1352 	{APOPAL, ynone, P32, opBytes{0x61}},
   1353 	{APOPAW, ynone, Pe, opBytes{0x61}},
   1354 	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
   1355 	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
   1356 	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
   1357 	{APOPFL, ynone, P32, opBytes{0x9d}},
   1358 	{APOPFQ, ynone, Py, opBytes{0x9d}},
   1359 	{APOPFW, ynone, Pe, opBytes{0x9d}},
   1360 	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
   1361 	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
   1362 	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
   1363 	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
   1364 	{APSADBW, yxm, Pq, opBytes{0xf6}},
   1365 	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
   1366 	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
   1367 	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
   1368 	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
   1369 	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
   1370 	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
   1371 	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
   1372 	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
   1373 	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
   1374 	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
   1375 	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
   1376 	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
   1377 	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
   1378 	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
   1379 	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
   1380 	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
   1381 	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
   1382 	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
   1383 	{APSUBB, yxm, Pe, opBytes{0xf8}},
   1384 	{APSUBL, yxm, Pe, opBytes{0xfa}},
   1385 	{APSUBQ, yxm, Pe, opBytes{0xfb}},
   1386 	{APSUBSB, yxm, Pe, opBytes{0xe8}},
   1387 	{APSUBSW, yxm, Pe, opBytes{0xe9}},
   1388 	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
   1389 	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
   1390 	{APSUBW, yxm, Pe, opBytes{0xf9}},
   1391 	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
   1392 	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
   1393 	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
   1394 	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
   1395 	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
   1396 	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
   1397 	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
   1398 	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
   1399 	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
   1400 	{APUSHAL, ynone, P32, opBytes{0x60}},
   1401 	{APUSHAW, ynone, Pe, opBytes{0x60}},
   1402 	{APUSHFL, ynone, P32, opBytes{0x9c}},
   1403 	{APUSHFQ, ynone, Py, opBytes{0x9c}},
   1404 	{APUSHFW, ynone, Pe, opBytes{0x9c}},
   1405 	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
   1406 	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
   1407 	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
   1408 	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
   1409 	{AQUAD, ybyte, Px, opBytes{8}},
   1410 	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
   1411 	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1412 	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1413 	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1414 	{ARCPPS, yxm, Pm, opBytes{0x53}},
   1415 	{ARCPSS, yxm, Pf3, opBytes{0x53}},
   1416 	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
   1417 	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1418 	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1419 	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1420 	{AREP, ynone, Px, opBytes{0xf3}},
   1421 	{AREPN, ynone, Px, opBytes{0xf2}},
   1422 	{obj.ARET, ynone, Px, opBytes{0xc3}},
   1423 	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
   1424 	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
   1425 	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
   1426 	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
   1427 	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1428 	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1429 	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1430 	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
   1431 	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1432 	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1433 	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1434 	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
   1435 	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
   1436 	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
   1437 	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1438 	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1439 	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1440 	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1441 	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
   1442 	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1443 	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1444 	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1445 	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
   1446 	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1447 	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1448 	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1449 	{ASCASB, ynone, Pb, opBytes{0xae}},
   1450 	{ASCASL, ynone, Px, opBytes{0xaf}},
   1451 	{ASCASQ, ynone, Pw, opBytes{0xaf}},
   1452 	{ASCASW, ynone, Pe, opBytes{0xaf}},
   1453 	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
   1454 	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
   1455 	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
   1456 	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
   1457 	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
   1458 	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
   1459 	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
   1460 	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
   1461 	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
   1462 	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
   1463 	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
   1464 	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
   1465 	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
   1466 	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
   1467 	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
   1468 	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
   1469 	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1470 	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1471 	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1472 	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1473 	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
   1474 	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1475 	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1476 	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1477 	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
   1478 	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
   1479 	{ASQRTPD, yxm, Pe, opBytes{0x51}},
   1480 	{ASQRTPS, yxm, Pm, opBytes{0x51}},
   1481 	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
   1482 	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
   1483 	{ASTC, ynone, Px, opBytes{0xf9}},
   1484 	{ASTD, ynone, Px, opBytes{0xfd}},
   1485 	{ASTI, ynone, Px, opBytes{0xfb}},
   1486 	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
   1487 	{ASTOSB, ynone, Pb, opBytes{0xaa}},
   1488 	{ASTOSL, ynone, Px, opBytes{0xab}},
   1489 	{ASTOSQ, ynone, Pw, opBytes{0xab}},
   1490 	{ASTOSW, ynone, Pe, opBytes{0xab}},
   1491 	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
   1492 	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1493 	{ASUBPD, yxm, Pe, opBytes{0x5c}},
   1494 	{ASUBPS, yxm, Pm, opBytes{0x5c}},
   1495 	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1496 	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
   1497 	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
   1498 	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1499 	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
   1500 	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
   1501 	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
   1502 	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
   1503 	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
   1504 	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
   1505 	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
   1506 	{obj.ATEXT, ytext, Px, opBytes{}},
   1507 	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
   1508 	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
   1509 	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
   1510 	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
   1511 	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
   1512 	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
   1513 	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
   1514 	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
   1515 	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
   1516 	{AWAIT, ynone, Px, opBytes{0x9b}},
   1517 	{AWORD, ybyte, Px, opBytes{2}},
   1518 	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
   1519 	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
   1520 	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
   1521 	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
   1522 	{AXLAT, ynone, Px, opBytes{0xd7}},
   1523 	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
   1524 	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1525 	{AXORPD, yxm, Pe, opBytes{0x57}},
   1526 	{AXORPS, yxm, Pm, opBytes{0x57}},
   1527 	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1528 	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1529 	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
   1530 	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
   1531 	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
   1532 	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
   1533 	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
   1534 	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
   1535 	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
   1536 	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
   1537 	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
   1538 	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
   1539 	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
   1540 	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
   1541 	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
   1542 	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
   1543 	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
   1544 	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
   1545 	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
   1546 	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
   1547 	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
   1548 	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
   1549 	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
   1550 	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
   1551 	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
   1552 	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
   1553 	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
   1554 	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
   1555 	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
   1556 	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
   1557 	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
   1558 	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
   1559 	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
   1560 	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
   1561 	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
   1562 	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
   1563 	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
   1564 	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
   1565 	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
   1566 	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
   1567 	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
   1568 	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
   1569 	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
   1570 	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
   1571 	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
   1572 	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
   1573 	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
   1574 	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
   1575 	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
   1576 	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
   1577 	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
   1578 	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
   1579 	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
   1580 	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
   1581 	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
   1582 	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
   1583 	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
   1584 	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
   1585 	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
   1586 	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
   1587 	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
   1588 	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
   1589 	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
   1590 	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
   1591 	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
   1592 	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
   1593 	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
   1594 	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
   1595 	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
   1596 	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
   1597 	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
   1598 	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
   1599 	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
   1600 	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
   1601 	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
   1602 	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
   1603 	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
   1604 	{AFFREE, nil, 0, opBytes{}},
   1605 	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
   1606 	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
   1607 	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
   1608 	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
   1609 	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
   1610 	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
   1611 	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
   1612 	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
   1613 	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
   1614 	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
   1615 	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
   1616 	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
   1617 	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
   1618 	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
   1619 	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
   1620 	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
   1621 	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
   1622 	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
   1623 	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
   1624 	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
   1625 	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
   1626 	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
   1627 	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
   1628 	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
   1629 	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
   1630 	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
   1631 	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
   1632 	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
   1633 	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
   1634 	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
   1635 	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
   1636 	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
   1637 	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
   1638 	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
   1639 	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
   1640 	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
   1641 	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
   1642 	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
   1643 	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
   1644 	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
   1645 	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
   1646 	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
   1647 	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
   1648 	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
   1649 	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
   1650 	{AINVD, ynone, Pm, opBytes{0x08}},
   1651 	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
   1652 	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
   1653 	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
   1654 	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
   1655 	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
   1656 	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
   1657 	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
   1658 	{ARDMSR, ynone, Pm, opBytes{0x32}},
   1659 	{ARDPMC, ynone, Pm, opBytes{0x33}},
   1660 	{ARDTSC, ynone, Pm, opBytes{0x31}},
   1661 	{ARSM, ynone, Pm, opBytes{0xaa}},
   1662 	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
   1663 	{ASYSRET, ynone, Pm, opBytes{0x07}},
   1664 	{AWBINVD, ynone, Pm, opBytes{0x09}},
   1665 	{AWRMSR, ynone, Pm, opBytes{0x30}},
   1666 	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
   1667 	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
   1668 	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
   1669 	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
   1670 	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
   1671 	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
   1672 	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
   1673 	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
   1674 	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
   1675 	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
   1676 	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
   1677 	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
   1678 	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
   1679 	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
   1680 	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
   1681 	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
   1682 	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
   1683 	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
   1684 	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
   1685 	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
   1686 	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
   1687 	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
   1688 	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
   1689 	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
   1690 	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
   1691 	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
   1692 	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
   1693 	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
   1694 	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
   1695 	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
   1696 	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
   1697 	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
   1698 	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
   1699 	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
   1700 	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
   1701 	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
   1702 	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
   1703 	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
   1704 	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
   1705 	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
   1706 	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
   1707 	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
   1708 	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
   1709 	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
   1710 	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
   1711 	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
   1712 	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
   1713 	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
   1714 	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
   1715 	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
   1716 	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
   1717 	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
   1718 	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
   1719 	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
   1720 	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
   1721 	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
   1722 	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
   1723 	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
   1724 	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
   1725 	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
   1726 	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
   1727 	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
   1728 	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
   1729 	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
   1730 	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
   1731 	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
   1732 	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
   1733 	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
   1734 	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
   1735 	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
   1736 	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
   1737 	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
   1738 	{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
   1739 	{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
   1740 	{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
   1741 	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
   1742 	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
   1743 	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
   1744 	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
   1745 	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
   1746 	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
   1747 	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
   1748 	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
   1749 	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
   1750 	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
   1751 	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
   1752 	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
   1753 	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
   1754 	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
   1755 	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
   1756 	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
   1757 	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
   1758 	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
   1759 	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
   1760 	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
   1761 	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
   1762 	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
   1763 	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
   1764 	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
   1765 	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
   1766 	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
   1767 	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
   1768 	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
   1769 	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
   1770 	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
   1771 	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
   1772 	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
   1773 	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
   1774 	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
   1775 	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
   1776 
   1777 	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
   1778 	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
   1779 	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
   1780 	{AXRELEASE, ynone, Px, opBytes{0xf3}},
   1781 	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
   1782 	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
   1783 	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
   1784 	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
   1785 	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
   1786 	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
   1787 	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
   1788 	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
   1789 	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
   1790 
   1791 	{obj.AEND, nil, 0, opBytes{}},
   1792 	{0, nil, 0, opBytes{}},
   1793 }
   1794 
   1795 var opindex [(ALAST + 1) & obj.AMask]*Optab
   1796 
   1797 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
   1798 // This happens on systems like Solaris that call .so functions instead of system calls.
   1799 // It does not seem to be necessary for any other systems. This is probably working
   1800 // around a Solaris-specific bug that should be fixed differently, but we don't know
   1801 // what that bug is. And this does fix it.
   1802 func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
   1803 	if ctxt.Headtype == objabi.Hsolaris {
   1804 		// All the Solaris dynamic imports from libc.so begin with "libc_".
   1805 		return strings.HasPrefix(s.Name, "libc_")
   1806 	}
   1807 	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
   1808 }
   1809 
   1810 // single-instruction no-ops of various lengths.
   1811 // constructed by hand and disassembled with gdb to verify.
   1812 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
   1813 var nop = [][16]uint8{
   1814 	{0x90},
   1815 	{0x66, 0x90},
   1816 	{0x0F, 0x1F, 0x00},
   1817 	{0x0F, 0x1F, 0x40, 0x00},
   1818 	{0x0F, 0x1F, 0x44, 0x00, 0x00},
   1819 	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
   1820 	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
   1821 	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1822 	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1823 }
   1824 
   1825 // Native Client rejects the repeated 0x66 prefix.
   1826 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1827 func fillnop(p []byte, n int) {
   1828 	var m int
   1829 
   1830 	for n > 0 {
   1831 		m = n
   1832 		if m > len(nop) {
   1833 			m = len(nop)
   1834 		}
   1835 		copy(p[:m], nop[m-1][:m])
   1836 		p = p[m:]
   1837 		n -= m
   1838 	}
   1839 }
   1840 
   1841 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
   1842 	s.Grow(int64(c) + int64(pad))
   1843 	fillnop(s.P[c:], int(pad))
   1844 	return c + pad
   1845 }
   1846 
   1847 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
   1848 	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
   1849 		return l
   1850 	}
   1851 	return q
   1852 }
   1853 
   1854 // If the environment variable GOAMD64=alignedjumps the assembler will ensure that
   1855 // no standalone or macro-fused jump will straddle or end on a 32 byte boundary
   1856 // by inserting NOPs before the jumps
   1857 func isJump(p *obj.Prog) bool {
   1858 	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
   1859 		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
   1860 }
   1861 
   1862 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
   1863 // jump. Otherwise, nil is returned.
   1864 func lookForJCC(p *obj.Prog) *obj.Prog {
   1865 	// Skip any PCDATA, FUNCDATA or NOP instructions
   1866 	var q *obj.Prog
   1867 	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
   1868 	}
   1869 
   1870 	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
   1871 		return nil
   1872 	}
   1873 
   1874 	switch q.As {
   1875 	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
   1876 		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
   1877 	default:
   1878 		return nil
   1879 	}
   1880 
   1881 	return q
   1882 }
   1883 
   1884 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
   1885 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
   1886 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
   1887 func fusedJump(p *obj.Prog) (bool, uint8) {
   1888 	var fusedSize uint8
   1889 
   1890 	// The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
   1891 	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
   1892 	// need to be careful to insert any padding before the locks rather than directly after them.
   1893 
   1894 	if p.As == AXRELEASE || p.As == AXACQUIRE {
   1895 		fusedSize += p.Isize
   1896 		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
   1897 		}
   1898 		if p == nil {
   1899 			return false, 0
   1900 		}
   1901 	}
   1902 	if p.As == ALOCK {
   1903 		fusedSize += p.Isize
   1904 		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
   1905 		}
   1906 		if p == nil {
   1907 			return false, 0
   1908 		}
   1909 	}
   1910 	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
   1911 
   1912 	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
   1913 		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
   1914 
   1915 	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
   1916 		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
   1917 
   1918 	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
   1919 		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
   1920 
   1921 	if !cmpAddSub && !testAnd && !incDec {
   1922 		return false, 0
   1923 	}
   1924 
   1925 	if !incDec {
   1926 		var argOne obj.AddrType
   1927 		var argTwo obj.AddrType
   1928 		if cmp {
   1929 			argOne = p.From.Type
   1930 			argTwo = p.To.Type
   1931 		} else {
   1932 			argOne = p.To.Type
   1933 			argTwo = p.From.Type
   1934 		}
   1935 		if argOne == obj.TYPE_REG {
   1936 			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
   1937 				return false, 0
   1938 			}
   1939 		} else if argOne == obj.TYPE_MEM {
   1940 			if argTwo != obj.TYPE_REG {
   1941 				return false, 0
   1942 			}
   1943 		} else {
   1944 			return false, 0
   1945 		}
   1946 	}
   1947 
   1948 	fusedSize += p.Isize
   1949 	jmp := lookForJCC(p)
   1950 	if jmp == nil {
   1951 		return false, 0
   1952 	}
   1953 
   1954 	fusedSize += jmp.Isize
   1955 
   1956 	if testAnd {
   1957 		return true, fusedSize
   1958 	}
   1959 
   1960 	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
   1961 		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
   1962 		return false, 0
   1963 	}
   1964 
   1965 	if cmpAddSub {
   1966 		return true, fusedSize
   1967 	}
   1968 
   1969 	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
   1970 		return false, 0
   1971 	}
   1972 
   1973 	return true, fusedSize
   1974 }
   1975 
   1976 type padJumpsCtx int32
   1977 
   1978 func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
   1979 	// Disable jump padding on 32 bit builds by settting
   1980 	// padJumps to 0.
   1981 	if ctxt.Arch.Family == sys.I386 {
   1982 		return padJumpsCtx(0)
   1983 	}
   1984 
   1985 	// Disable jump padding for hand written assembly code.
   1986 	if ctxt.IsAsm {
   1987 		return padJumpsCtx(0)
   1988 	}
   1989 
   1990 	if objabi.GOAMD64 != "alignedjumps" {
   1991 		return padJumpsCtx(0)
   1992 
   1993 	}
   1994 
   1995 	return padJumpsCtx(32)
   1996 }
   1997 
   1998 // padJump detects whether the instruction being assembled is a standalone or a macro-fused
   1999 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
   2000 // not cross or end on a 32 byte boundary.
   2001 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
   2002 	if pjc == 0 {
   2003 		return c
   2004 	}
   2005 
   2006 	var toPad int32
   2007 	fj, fjSize := fusedJump(p)
   2008 	mask := int32(pjc - 1)
   2009 	if fj {
   2010 		if (c&mask)+int32(fjSize) >= int32(pjc) {
   2011 			toPad = int32(pjc) - (c & mask)
   2012 		}
   2013 	} else if isJump(p) {
   2014 		if (c&mask)+int32(p.Isize) >= int32(pjc) {
   2015 			toPad = int32(pjc) - (c & mask)
   2016 		}
   2017 	}
   2018 	if toPad <= 0 {
   2019 		return c
   2020 	}
   2021 
   2022 	return noppad(ctxt, s, c, toPad)
   2023 }
   2024 
   2025 // reAssemble is called if an instruction's size changes during assembly. If
   2026 // it does and the instruction is a standalone or a macro-fused jump we need to
   2027 // reassemble.
   2028 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
   2029 	if pjc == 0 {
   2030 		return false
   2031 	}
   2032 
   2033 	fj, _ := fusedJump(p)
   2034 	return fj || isJump(p)
   2035 }
   2036 
   2037 type nopPad struct {
   2038 	p *obj.Prog // Instruction before the pad
   2039 	n int32     // Size of the pad
   2040 }
   2041 
   2042 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
   2043 	pjc := makePjcCtx(ctxt)
   2044 
   2045 	if s.P != nil {
   2046 		return
   2047 	}
   2048 
   2049 	if ycover[0] == 0 {
   2050 		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
   2051 	}
   2052 
   2053 	for p := s.Func.Text; p != nil; p = p.Link {
   2054 		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
   2055 			p.To.SetTarget(p)
   2056 		}
   2057 		if p.As == AADJSP {
   2058 			p.To.Type = obj.TYPE_REG
   2059 			p.To.Reg = REG_SP
   2060 			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
   2061 			// One exception: It is smaller to encode $-0x80 than $0x80.
   2062 			// For that case, flip the sign and the op:
   2063 			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
   2064 			switch v := p.From.Offset; {
   2065 			case v == 0:
   2066 				p.As = obj.ANOP
   2067 			case v == 0x80 || (v < 0 && v != -0x80):
   2068 				p.As = spadjop(ctxt, AADDL, AADDQ)
   2069 				p.From.Offset *= -1
   2070 			default:
   2071 				p.As = spadjop(ctxt, ASUBL, ASUBQ)
   2072 			}
   2073 		}
   2074 		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
   2075 			if p.To.Type != obj.TYPE_REG {
   2076 				ctxt.Diag("non-retpoline-compatible: %v", p)
   2077 				continue
   2078 			}
   2079 			p.To.Type = obj.TYPE_BRANCH
   2080 			p.To.Name = obj.NAME_EXTERN
   2081 			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
   2082 			p.To.Reg = 0
   2083 			p.To.Offset = 0
   2084 		}
   2085 	}
   2086 
   2087 	var count int64 // rough count of number of instructions
   2088 	for p := s.Func.Text; p != nil; p = p.Link {
   2089 		count++
   2090 		p.Back = branchShort // use short branches first time through
   2091 		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
   2092 			p.Back |= branchBackwards
   2093 			q.Back |= branchLoopHead
   2094 		}
   2095 	}
   2096 	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
   2097 
   2098 	var ab AsmBuf
   2099 	var n int
   2100 	var c int32
   2101 	errors := ctxt.Errors
   2102 	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
   2103 	for {
   2104 		// This loop continues while there are reasons to re-assemble
   2105 		// whole block, like the presence of long forward jumps.
   2106 		reAssemble := false
   2107 		for i := range s.R {
   2108 			s.R[i] = obj.Reloc{}
   2109 		}
   2110 		s.R = s.R[:0]
   2111 		s.P = s.P[:0]
   2112 		c = 0
   2113 		var pPrev *obj.Prog
   2114 		nops = nops[:0]
   2115 		for p := s.Func.Text; p != nil; p = p.Link {
   2116 			c0 := c
   2117 			c = pjc.padJump(ctxt, s, p, c)
   2118 
   2119 			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
   2120 				// pad with NOPs
   2121 				v := -c & (loopAlign - 1)
   2122 
   2123 				if v <= maxLoopPad {
   2124 					s.Grow(int64(c) + int64(v))
   2125 					fillnop(s.P[c:], int(v))
   2126 					c += v
   2127 				}
   2128 			}
   2129 
   2130 			p.Pc = int64(c)
   2131 
   2132 			// process forward jumps to p
   2133 			for q := p.Rel; q != nil; q = q.Forwd {
   2134 				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
   2135 				if q.Back&branchShort != 0 {
   2136 					if v > 127 {
   2137 						reAssemble = true
   2138 						q.Back ^= branchShort
   2139 					}
   2140 
   2141 					if q.As == AJCXZL || q.As == AXBEGIN {
   2142 						s.P[q.Pc+2] = byte(v)
   2143 					} else {
   2144 						s.P[q.Pc+1] = byte(v)
   2145 					}
   2146 				} else {
   2147 					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
   2148 				}
   2149 			}
   2150 
   2151 			p.Rel = nil
   2152 
   2153 			p.Pc = int64(c)
   2154 			ab.asmins(ctxt, s, p)
   2155 			m := ab.Len()
   2156 			if int(p.Isize) != m {
   2157 				p.Isize = uint8(m)
   2158 				if pjc.reAssemble(p) {
   2159 					// We need to re-assemble here to check for jumps and fused jumps
   2160 					// that span or end on 32 byte boundaries.
   2161 					reAssemble = true
   2162 				}
   2163 			}
   2164 
   2165 			s.Grow(p.Pc + int64(m))
   2166 			copy(s.P[p.Pc:], ab.Bytes())
   2167 			// If there was padding, remember it.
   2168 			if pPrev != nil && !ctxt.IsAsm && c > c0 {
   2169 				nops = append(nops, nopPad{p: pPrev, n: c - c0})
   2170 			}
   2171 			c += int32(m)
   2172 			pPrev = p
   2173 		}
   2174 
   2175 		n++
   2176 		if n > 20 {
   2177 			ctxt.Diag("span must be looping")
   2178 			log.Fatalf("loop")
   2179 		}
   2180 		if !reAssemble {
   2181 			break
   2182 		}
   2183 		if ctxt.Errors > errors {
   2184 			return
   2185 		}
   2186 	}
   2187 	// splice padding nops into Progs
   2188 	for _, n := range nops {
   2189 		pp := n.p
   2190 		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
   2191 		pp.Link = np
   2192 	}
   2193 
   2194 	s.Size = int64(c)
   2195 
   2196 	if false { /* debug['a'] > 1 */
   2197 		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
   2198 		var i int
   2199 		for i = 0; i < len(s.P); i++ {
   2200 			fmt.Printf(" %.2x", s.P[i])
   2201 			if i%16 == 15 {
   2202 				fmt.Printf("\n  %.6x", uint(i+1))
   2203 			}
   2204 		}
   2205 
   2206 		if i%16 != 0 {
   2207 			fmt.Printf("\n")
   2208 		}
   2209 
   2210 		for i := 0; i < len(s.R); i++ {
   2211 			r := &s.R[i]
   2212 			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
   2213 		}
   2214 	}
   2215 
   2216 	// Mark nonpreemptible instruction sequences.
   2217 	// The 2-instruction TLS access sequence
   2218 	//	MOVQ TLS, BX
   2219 	//	MOVQ 0(BX)(TLS*1), BX
   2220 	// is not async preemptible, as if it is preempted and resumed on
   2221 	// a different thread, the TLS address may become invalid.
   2222 	if !CanUse1InsnTLS(ctxt) {
   2223 		useTLS := func(p *obj.Prog) bool {
   2224 			// Only need to mark the second instruction, which has
   2225 			// REG_TLS as Index. (It is okay to interrupt and restart
   2226 			// the first instruction.)
   2227 			return p.From.Index == REG_TLS
   2228 		}
   2229 		obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil)
   2230 	}
   2231 }
   2232 
   2233 func instinit(ctxt *obj.Link) {
   2234 	if ycover[0] != 0 {
   2235 		// Already initialized; stop now.
   2236 		// This happens in the cmd/asm tests,
   2237 		// each of which re-initializes the arch.
   2238 		return
   2239 	}
   2240 
   2241 	switch ctxt.Headtype {
   2242 	case objabi.Hplan9:
   2243 		plan9privates = ctxt.Lookup("_privates")
   2244 	}
   2245 
   2246 	for i := range avxOptab {
   2247 		c := avxOptab[i].as
   2248 		if opindex[c&obj.AMask] != nil {
   2249 			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
   2250 		}
   2251 		opindex[c&obj.AMask] = &avxOptab[i]
   2252 	}
   2253 	for i := 1; optab[i].as != 0; i++ {
   2254 		c := optab[i].as
   2255 		if opindex[c&obj.AMask] != nil {
   2256 			ctxt.Diag("phase error in optab: %d (%v)", i, c)
   2257 		}
   2258 		opindex[c&obj.AMask] = &optab[i]
   2259 	}
   2260 
   2261 	for i := 0; i < Ymax; i++ {
   2262 		ycover[i*Ymax+i] = 1
   2263 	}
   2264 
   2265 	ycover[Yi0*Ymax+Yu2] = 1
   2266 	ycover[Yi1*Ymax+Yu2] = 1
   2267 
   2268 	ycover[Yi0*Ymax+Yi8] = 1
   2269 	ycover[Yi1*Ymax+Yi8] = 1
   2270 	ycover[Yu2*Ymax+Yi8] = 1
   2271 	ycover[Yu7*Ymax+Yi8] = 1
   2272 
   2273 	ycover[Yi0*Ymax+Yu7] = 1
   2274 	ycover[Yi1*Ymax+Yu7] = 1
   2275 	ycover[Yu2*Ymax+Yu7] = 1
   2276 
   2277 	ycover[Yi0*Ymax+Yu8] = 1
   2278 	ycover[Yi1*Ymax+Yu8] = 1
   2279 	ycover[Yu2*Ymax+Yu8] = 1
   2280 	ycover[Yu7*Ymax+Yu8] = 1
   2281 
   2282 	ycover[Yi0*Ymax+Ys32] = 1
   2283 	ycover[Yi1*Ymax+Ys32] = 1
   2284 	ycover[Yu2*Ymax+Ys32] = 1
   2285 	ycover[Yu7*Ymax+Ys32] = 1
   2286 	ycover[Yu8*Ymax+Ys32] = 1
   2287 	ycover[Yi8*Ymax+Ys32] = 1
   2288 
   2289 	ycover[Yi0*Ymax+Yi32] = 1
   2290 	ycover[Yi1*Ymax+Yi32] = 1
   2291 	ycover[Yu2*Ymax+Yi32] = 1
   2292 	ycover[Yu7*Ymax+Yi32] = 1
   2293 	ycover[Yu8*Ymax+Yi32] = 1
   2294 	ycover[Yi8*Ymax+Yi32] = 1
   2295 	ycover[Ys32*Ymax+Yi32] = 1
   2296 
   2297 	ycover[Yi0*Ymax+Yi64] = 1
   2298 	ycover[Yi1*Ymax+Yi64] = 1
   2299 	ycover[Yu7*Ymax+Yi64] = 1
   2300 	ycover[Yu2*Ymax+Yi64] = 1
   2301 	ycover[Yu8*Ymax+Yi64] = 1
   2302 	ycover[Yi8*Ymax+Yi64] = 1
   2303 	ycover[Ys32*Ymax+Yi64] = 1
   2304 	ycover[Yi32*Ymax+Yi64] = 1
   2305 
   2306 	ycover[Yal*Ymax+Yrb] = 1
   2307 	ycover[Ycl*Ymax+Yrb] = 1
   2308 	ycover[Yax*Ymax+Yrb] = 1
   2309 	ycover[Ycx*Ymax+Yrb] = 1
   2310 	ycover[Yrx*Ymax+Yrb] = 1
   2311 	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
   2312 
   2313 	ycover[Ycl*Ymax+Ycx] = 1
   2314 
   2315 	ycover[Yax*Ymax+Yrx] = 1
   2316 	ycover[Ycx*Ymax+Yrx] = 1
   2317 
   2318 	ycover[Yax*Ymax+Yrl] = 1
   2319 	ycover[Ycx*Ymax+Yrl] = 1
   2320 	ycover[Yrx*Ymax+Yrl] = 1
   2321 	ycover[Yrl32*Ymax+Yrl] = 1
   2322 
   2323 	ycover[Yf0*Ymax+Yrf] = 1
   2324 
   2325 	ycover[Yal*Ymax+Ymb] = 1
   2326 	ycover[Ycl*Ymax+Ymb] = 1
   2327 	ycover[Yax*Ymax+Ymb] = 1
   2328 	ycover[Ycx*Ymax+Ymb] = 1
   2329 	ycover[Yrx*Ymax+Ymb] = 1
   2330 	ycover[Yrb*Ymax+Ymb] = 1
   2331 	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
   2332 	ycover[Ym*Ymax+Ymb] = 1
   2333 
   2334 	ycover[Yax*Ymax+Yml] = 1
   2335 	ycover[Ycx*Ymax+Yml] = 1
   2336 	ycover[Yrx*Ymax+Yml] = 1
   2337 	ycover[Yrl*Ymax+Yml] = 1
   2338 	ycover[Yrl32*Ymax+Yml] = 1
   2339 	ycover[Ym*Ymax+Yml] = 1
   2340 
   2341 	ycover[Yax*Ymax+Ymm] = 1
   2342 	ycover[Ycx*Ymax+Ymm] = 1
   2343 	ycover[Yrx*Ymax+Ymm] = 1
   2344 	ycover[Yrl*Ymax+Ymm] = 1
   2345 	ycover[Yrl32*Ymax+Ymm] = 1
   2346 	ycover[Ym*Ymax+Ymm] = 1
   2347 	ycover[Ymr*Ymax+Ymm] = 1
   2348 
   2349 	ycover[Yxr0*Ymax+Yxr] = 1
   2350 
   2351 	ycover[Ym*Ymax+Yxm] = 1
   2352 	ycover[Yxr0*Ymax+Yxm] = 1
   2353 	ycover[Yxr*Ymax+Yxm] = 1
   2354 
   2355 	ycover[Ym*Ymax+Yym] = 1
   2356 	ycover[Yyr*Ymax+Yym] = 1
   2357 
   2358 	ycover[Yxr0*Ymax+YxrEvex] = 1
   2359 	ycover[Yxr*Ymax+YxrEvex] = 1
   2360 
   2361 	ycover[Ym*Ymax+YxmEvex] = 1
   2362 	ycover[Yxr0*Ymax+YxmEvex] = 1
   2363 	ycover[Yxr*Ymax+YxmEvex] = 1
   2364 	ycover[YxrEvex*Ymax+YxmEvex] = 1
   2365 
   2366 	ycover[Yyr*Ymax+YyrEvex] = 1
   2367 
   2368 	ycover[Ym*Ymax+YymEvex] = 1
   2369 	ycover[Yyr*Ymax+YymEvex] = 1
   2370 	ycover[YyrEvex*Ymax+YymEvex] = 1
   2371 
   2372 	ycover[Ym*Ymax+Yzm] = 1
   2373 	ycover[Yzr*Ymax+Yzm] = 1
   2374 
   2375 	ycover[Yk0*Ymax+Yk] = 1
   2376 	ycover[Yknot0*Ymax+Yk] = 1
   2377 
   2378 	ycover[Yk0*Ymax+Ykm] = 1
   2379 	ycover[Yknot0*Ymax+Ykm] = 1
   2380 	ycover[Yk*Ymax+Ykm] = 1
   2381 	ycover[Ym*Ymax+Ykm] = 1
   2382 
   2383 	ycover[Yxvm*Ymax+YxvmEvex] = 1
   2384 
   2385 	ycover[Yyvm*Ymax+YyvmEvex] = 1
   2386 
   2387 	for i := 0; i < MAXREG; i++ {
   2388 		reg[i] = -1
   2389 		if i >= REG_AL && i <= REG_R15B {
   2390 			reg[i] = (i - REG_AL) & 7
   2391 			if i >= REG_SPB && i <= REG_DIB {
   2392 				regrex[i] = 0x40
   2393 			}
   2394 			if i >= REG_R8B && i <= REG_R15B {
   2395 				regrex[i] = Rxr | Rxx | Rxb
   2396 			}
   2397 		}
   2398 
   2399 		if i >= REG_AH && i <= REG_BH {
   2400 			reg[i] = 4 + ((i - REG_AH) & 7)
   2401 		}
   2402 		if i >= REG_AX && i <= REG_R15 {
   2403 			reg[i] = (i - REG_AX) & 7
   2404 			if i >= REG_R8 {
   2405 				regrex[i] = Rxr | Rxx | Rxb
   2406 			}
   2407 		}
   2408 
   2409 		if i >= REG_F0 && i <= REG_F0+7 {
   2410 			reg[i] = (i - REG_F0) & 7
   2411 		}
   2412 		if i >= REG_M0 && i <= REG_M0+7 {
   2413 			reg[i] = (i - REG_M0) & 7
   2414 		}
   2415 		if i >= REG_K0 && i <= REG_K0+7 {
   2416 			reg[i] = (i - REG_K0) & 7
   2417 		}
   2418 		if i >= REG_X0 && i <= REG_X0+15 {
   2419 			reg[i] = (i - REG_X0) & 7
   2420 			if i >= REG_X0+8 {
   2421 				regrex[i] = Rxr | Rxx | Rxb
   2422 			}
   2423 		}
   2424 		if i >= REG_X16 && i <= REG_X16+15 {
   2425 			reg[i] = (i - REG_X16) & 7
   2426 			if i >= REG_X16+8 {
   2427 				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
   2428 			} else {
   2429 				regrex[i] = RxrEvex
   2430 			}
   2431 		}
   2432 		if i >= REG_Y0 && i <= REG_Y0+15 {
   2433 			reg[i] = (i - REG_Y0) & 7
   2434 			if i >= REG_Y0+8 {
   2435 				regrex[i] = Rxr | Rxx | Rxb
   2436 			}
   2437 		}
   2438 		if i >= REG_Y16 && i <= REG_Y16+15 {
   2439 			reg[i] = (i - REG_Y16) & 7
   2440 			if i >= REG_Y16+8 {
   2441 				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
   2442 			} else {
   2443 				regrex[i] = RxrEvex
   2444 			}
   2445 		}
   2446 		if i >= REG_Z0 && i <= REG_Z0+15 {
   2447 			reg[i] = (i - REG_Z0) & 7
   2448 			if i > REG_Z0+7 {
   2449 				regrex[i] = Rxr | Rxx | Rxb
   2450 			}
   2451 		}
   2452 		if i >= REG_Z16 && i <= REG_Z16+15 {
   2453 			reg[i] = (i - REG_Z16) & 7
   2454 			if i >= REG_Z16+8 {
   2455 				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
   2456 			} else {
   2457 				regrex[i] = RxrEvex
   2458 			}
   2459 		}
   2460 
   2461 		if i >= REG_CR+8 && i <= REG_CR+15 {
   2462 			regrex[i] = Rxr
   2463 		}
   2464 	}
   2465 }
   2466 
   2467 var isAndroid = objabi.GOOS == "android"
   2468 
   2469 func prefixof(ctxt *obj.Link, a *obj.Addr) int {
   2470 	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
   2471 		return 0
   2472 	}
   2473 	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
   2474 		switch a.Reg {
   2475 		case REG_CS:
   2476 			return 0x2e
   2477 
   2478 		case REG_DS:
   2479 			return 0x3e
   2480 
   2481 		case REG_ES:
   2482 			return 0x26
   2483 
   2484 		case REG_FS:
   2485 			return 0x64
   2486 
   2487 		case REG_GS:
   2488 			return 0x65
   2489 
   2490 		case REG_TLS:
   2491 			// NOTE: Systems listed here should be only systems that
   2492 			// support direct TLS references like 8(TLS) implemented as
   2493 			// direct references from FS or GS. Systems that require
   2494 			// the initial-exec model, where you load the TLS base into
   2495 			// a register and then index from that register, do not reach
   2496 			// this code and should not be listed.
   2497 			if ctxt.Arch.Family == sys.I386 {
   2498 				switch ctxt.Headtype {
   2499 				default:
   2500 					if isAndroid {
   2501 						return 0x65 // GS
   2502 					}
   2503 					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
   2504 
   2505 				case objabi.Hdarwin,
   2506 					objabi.Hdragonfly,
   2507 					objabi.Hfreebsd,
   2508 					objabi.Hnetbsd,
   2509 					objabi.Hopenbsd:
   2510 					return 0x65 // GS
   2511 				}
   2512 			}
   2513 
   2514 			switch ctxt.Headtype {
   2515 			default:
   2516 				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
   2517 
   2518 			case objabi.Hlinux:
   2519 				if isAndroid {
   2520 					return 0x64 // FS
   2521 				}
   2522 
   2523 				if ctxt.Flag_shared {
   2524 					log.Fatalf("unknown TLS base register for linux with -shared")
   2525 				} else {
   2526 					return 0x64 // FS
   2527 				}
   2528 
   2529 			case objabi.Hdragonfly,
   2530 				objabi.Hfreebsd,
   2531 				objabi.Hnetbsd,
   2532 				objabi.Hopenbsd,
   2533 				objabi.Hsolaris:
   2534 				return 0x64 // FS
   2535 
   2536 			case objabi.Hdarwin:
   2537 				return 0x65 // GS
   2538 			}
   2539 		}
   2540 	}
   2541 
   2542 	if ctxt.Arch.Family == sys.I386 {
   2543 		if a.Index == REG_TLS && ctxt.Flag_shared {
   2544 			// When building for inclusion into a shared library, an instruction of the form
   2545 			//     MOVL off(CX)(TLS*1), AX
   2546 			// becomes
   2547 			//     mov %gs:off(%ecx), %eax
   2548 			// which assumes that the correct TLS offset has been loaded into %ecx (today
   2549 			// there is only one TLS variable -- g -- so this is OK). When not building for
   2550 			// a shared library the instruction it becomes
   2551 			//     mov 0x0(%ecx), %eax
   2552 			// and a R_TLS_LE relocation, and so does not require a prefix.
   2553 			return 0x65 // GS
   2554 		}
   2555 		return 0
   2556 	}
   2557 
   2558 	switch a.Index {
   2559 	case REG_CS:
   2560 		return 0x2e
   2561 
   2562 	case REG_DS:
   2563 		return 0x3e
   2564 
   2565 	case REG_ES:
   2566 		return 0x26
   2567 
   2568 	case REG_TLS:
   2569 		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
   2570 			// When building for inclusion into a shared library, an instruction of the form
   2571 			//     MOV off(CX)(TLS*1), AX
   2572 			// becomes
   2573 			//     mov %fs:off(%rcx), %rax
   2574 			// which assumes that the correct TLS offset has been loaded into %rcx (today
   2575 			// there is only one TLS variable -- g -- so this is OK). When not building for
   2576 			// a shared library the instruction does not require a prefix.
   2577 			return 0x64
   2578 		}
   2579 
   2580 	case REG_FS:
   2581 		return 0x64
   2582 
   2583 	case REG_GS:
   2584 		return 0x65
   2585 	}
   2586 
   2587 	return 0
   2588 }
   2589 
   2590 // oclassRegList returns multisource operand class for addr.
   2591 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
   2592 	// TODO(quasilyte): when oclass register case is refactored into
   2593 	// lookup table, use it here to get register kind more easily.
   2594 	// Helper functions like regIsXmm should go away too (they will become redundant).
   2595 
   2596 	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
   2597 	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
   2598 	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
   2599 
   2600 	reg0, reg1 := decodeRegisterRange(addr.Offset)
   2601 	low := regIndex(int16(reg0))
   2602 	high := regIndex(int16(reg1))
   2603 
   2604 	if ctxt.Arch.Family == sys.I386 {
   2605 		if low >= 8 || high >= 8 {
   2606 			return Yxxx
   2607 		}
   2608 	}
   2609 
   2610 	switch high - low {
   2611 	case 3:
   2612 		switch {
   2613 		case regIsXmm(reg0) && regIsXmm(reg1):
   2614 			return YxrEvexMulti4
   2615 		case regIsYmm(reg0) && regIsYmm(reg1):
   2616 			return YyrEvexMulti4
   2617 		case regIsZmm(reg0) && regIsZmm(reg1):
   2618 			return YzrMulti4
   2619 		default:
   2620 			return Yxxx
   2621 		}
   2622 	default:
   2623 		return Yxxx
   2624 	}
   2625 }
   2626 
   2627 // oclassVMem returns V-mem (vector memory with VSIB) operand class.
   2628 // For addr that is not V-mem returns (Yxxx, false).
   2629 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
   2630 	switch addr.Index {
   2631 	case REG_X0 + 0,
   2632 		REG_X0 + 1,
   2633 		REG_X0 + 2,
   2634 		REG_X0 + 3,
   2635 		REG_X0 + 4,
   2636 		REG_X0 + 5,
   2637 		REG_X0 + 6,
   2638 		REG_X0 + 7:
   2639 		return Yxvm, true
   2640 	case REG_X8 + 0,
   2641 		REG_X8 + 1,
   2642 		REG_X8 + 2,
   2643 		REG_X8 + 3,
   2644 		REG_X8 + 4,
   2645 		REG_X8 + 5,
   2646 		REG_X8 + 6,
   2647 		REG_X8 + 7:
   2648 		if ctxt.Arch.Family == sys.I386 {
   2649 			return Yxxx, true
   2650 		}
   2651 		return Yxvm, true
   2652 	case REG_X16 + 0,
   2653 		REG_X16 + 1,
   2654 		REG_X16 + 2,
   2655 		REG_X16 + 3,
   2656 		REG_X16 + 4,
   2657 		REG_X16 + 5,
   2658 		REG_X16 + 6,
   2659 		REG_X16 + 7,
   2660 		REG_X16 + 8,
   2661 		REG_X16 + 9,
   2662 		REG_X16 + 10,
   2663 		REG_X16 + 11,
   2664 		REG_X16 + 12,
   2665 		REG_X16 + 13,
   2666 		REG_X16 + 14,
   2667 		REG_X16 + 15:
   2668 		if ctxt.Arch.Family == sys.I386 {
   2669 			return Yxxx, true
   2670 		}
   2671 		return YxvmEvex, true
   2672 
   2673 	case REG_Y0 + 0,
   2674 		REG_Y0 + 1,
   2675 		REG_Y0 + 2,
   2676 		REG_Y0 + 3,
   2677 		REG_Y0 + 4,
   2678 		REG_Y0 + 5,
   2679 		REG_Y0 + 6,
   2680 		REG_Y0 + 7:
   2681 		return Yyvm, true
   2682 	case REG_Y8 + 0,
   2683 		REG_Y8 + 1,
   2684 		REG_Y8 + 2,
   2685 		REG_Y8 + 3,
   2686 		REG_Y8 + 4,
   2687 		REG_Y8 + 5,
   2688 		REG_Y8 + 6,
   2689 		REG_Y8 + 7:
   2690 		if ctxt.Arch.Family == sys.I386 {
   2691 			return Yxxx, true
   2692 		}
   2693 		return Yyvm, true
   2694 	case REG_Y16 + 0,
   2695 		REG_Y16 + 1,
   2696 		REG_Y16 + 2,
   2697 		REG_Y16 + 3,
   2698 		REG_Y16 + 4,
   2699 		REG_Y16 + 5,
   2700 		REG_Y16 + 6,
   2701 		REG_Y16 + 7,
   2702 		REG_Y16 + 8,
   2703 		REG_Y16 + 9,
   2704 		REG_Y16 + 10,
   2705 		REG_Y16 + 11,
   2706 		REG_Y16 + 12,
   2707 		REG_Y16 + 13,
   2708 		REG_Y16 + 14,
   2709 		REG_Y16 + 15:
   2710 		if ctxt.Arch.Family == sys.I386 {
   2711 			return Yxxx, true
   2712 		}
   2713 		return YyvmEvex, true
   2714 
   2715 	case REG_Z0 + 0,
   2716 		REG_Z0 + 1,
   2717 		REG_Z0 + 2,
   2718 		REG_Z0 + 3,
   2719 		REG_Z0 + 4,
   2720 		REG_Z0 + 5,
   2721 		REG_Z0 + 6,
   2722 		REG_Z0 + 7:
   2723 		return Yzvm, true
   2724 	case REG_Z8 + 0,
   2725 		REG_Z8 + 1,
   2726 		REG_Z8 + 2,
   2727 		REG_Z8 + 3,
   2728 		REG_Z8 + 4,
   2729 		REG_Z8 + 5,
   2730 		REG_Z8 + 6,
   2731 		REG_Z8 + 7,
   2732 		REG_Z8 + 8,
   2733 		REG_Z8 + 9,
   2734 		REG_Z8 + 10,
   2735 		REG_Z8 + 11,
   2736 		REG_Z8 + 12,
   2737 		REG_Z8 + 13,
   2738 		REG_Z8 + 14,
   2739 		REG_Z8 + 15,
   2740 		REG_Z8 + 16,
   2741 		REG_Z8 + 17,
   2742 		REG_Z8 + 18,
   2743 		REG_Z8 + 19,
   2744 		REG_Z8 + 20,
   2745 		REG_Z8 + 21,
   2746 		REG_Z8 + 22,
   2747 		REG_Z8 + 23:
   2748 		if ctxt.Arch.Family == sys.I386 {
   2749 			return Yxxx, true
   2750 		}
   2751 		return Yzvm, true
   2752 	}
   2753 
   2754 	return Yxxx, false
   2755 }
   2756 
   2757 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   2758 	switch a.Type {
   2759 	case obj.TYPE_REGLIST:
   2760 		return oclassRegList(ctxt, a)
   2761 
   2762 	case obj.TYPE_NONE:
   2763 		return Ynone
   2764 
   2765 	case obj.TYPE_BRANCH:
   2766 		return Ybr
   2767 
   2768 	case obj.TYPE_INDIR:
   2769 		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
   2770 			return Yindir
   2771 		}
   2772 		return Yxxx
   2773 
   2774 	case obj.TYPE_MEM:
   2775 		// Pseudo registers have negative index, but SP is
   2776 		// not pseudo on x86, hence REG_SP check is not redundant.
   2777 		if a.Index == REG_SP || a.Index < 0 {
   2778 			// Can't use FP/SB/PC/SP as the index register.
   2779 			return Yxxx
   2780 		}
   2781 
   2782 		if vmem, ok := oclassVMem(ctxt, a); ok {
   2783 			return vmem
   2784 		}
   2785 
   2786 		if ctxt.Arch.Family == sys.AMD64 {
   2787 			switch a.Name {
   2788 			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
   2789 				// Global variables can't use index registers and their
   2790 				// base register is %rip (%rip is encoded as REG_NONE).
   2791 				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
   2792 					return Yxxx
   2793 				}
   2794 			case obj.NAME_AUTO, obj.NAME_PARAM:
   2795 				// These names must have a base of SP.  The old compiler
   2796 				// uses 0 for the base register. SSA uses REG_SP.
   2797 				if a.Reg != REG_SP && a.Reg != 0 {
   2798 					return Yxxx
   2799 				}
   2800 			case obj.NAME_NONE:
   2801 				// everything is ok
   2802 			default:
   2803 				// unknown name
   2804 				return Yxxx
   2805 			}
   2806 		}
   2807 		return Ym
   2808 
   2809 	case obj.TYPE_ADDR:
   2810 		switch a.Name {
   2811 		case obj.NAME_GOTREF:
   2812 			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
   2813 			return Yxxx
   2814 
   2815 		case obj.NAME_EXTERN,
   2816 			obj.NAME_STATIC:
   2817 			if a.Sym != nil && useAbs(ctxt, a.Sym) {
   2818 				return Yi32
   2819 			}
   2820 			return Yiauto // use pc-relative addressing
   2821 
   2822 		case obj.NAME_AUTO,
   2823 			obj.NAME_PARAM:
   2824 			return Yiauto
   2825 		}
   2826 
   2827 		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
   2828 		// and got Yi32 in an earlier version of this code.
   2829 		// Keep doing that until we fix yduff etc.
   2830 		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
   2831 			return Yi32
   2832 		}
   2833 
   2834 		if a.Sym != nil || a.Name != obj.NAME_NONE {
   2835 			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
   2836 		}
   2837 		fallthrough
   2838 
   2839 	case obj.TYPE_CONST:
   2840 		if a.Sym != nil {
   2841 			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
   2842 		}
   2843 
   2844 		v := a.Offset
   2845 		if ctxt.Arch.Family == sys.I386 {
   2846 			v = int64(int32(v))
   2847 		}
   2848 		switch {
   2849 		case v == 0:
   2850 			return Yi0
   2851 		case v == 1:
   2852 			return Yi1
   2853 		case v >= 0 && v <= 3:
   2854 			return Yu2
   2855 		case v >= 0 && v <= 127:
   2856 			return Yu7
   2857 		case v >= 0 && v <= 255:
   2858 			return Yu8
   2859 		case v >= -128 && v <= 127:
   2860 			return Yi8
   2861 		}
   2862 		if ctxt.Arch.Family == sys.I386 {
   2863 			return Yi32
   2864 		}
   2865 		l := int32(v)
   2866 		if int64(l) == v {
   2867 			return Ys32 // can sign extend
   2868 		}
   2869 		if v>>32 == 0 {
   2870 			return Yi32 // unsigned
   2871 		}
   2872 		return Yi64
   2873 
   2874 	case obj.TYPE_TEXTSIZE:
   2875 		return Ytextsize
   2876 	}
   2877 
   2878 	if a.Type != obj.TYPE_REG {
   2879 		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
   2880 		return Yxxx
   2881 	}
   2882 
   2883 	switch a.Reg {
   2884 	case REG_AL:
   2885 		return Yal
   2886 
   2887 	case REG_AX:
   2888 		return Yax
   2889 
   2890 		/*
   2891 			case REG_SPB:
   2892 		*/
   2893 	case REG_BPB,
   2894 		REG_SIB,
   2895 		REG_DIB,
   2896 		REG_R8B,
   2897 		REG_R9B,
   2898 		REG_R10B,
   2899 		REG_R11B,
   2900 		REG_R12B,
   2901 		REG_R13B,
   2902 		REG_R14B,
   2903 		REG_R15B:
   2904 		if ctxt.Arch.Family == sys.I386 {
   2905 			return Yxxx
   2906 		}
   2907 		fallthrough
   2908 
   2909 	case REG_DL,
   2910 		REG_BL,
   2911 		REG_AH,
   2912 		REG_CH,
   2913 		REG_DH,
   2914 		REG_BH:
   2915 		return Yrb
   2916 
   2917 	case REG_CL:
   2918 		return Ycl
   2919 
   2920 	case REG_CX:
   2921 		return Ycx
   2922 
   2923 	case REG_DX, REG_BX:
   2924 		return Yrx
   2925 
   2926 	case REG_R8, // not really Yrl
   2927 		REG_R9,
   2928 		REG_R10,
   2929 		REG_R11,
   2930 		REG_R12,
   2931 		REG_R13,
   2932 		REG_R14,
   2933 		REG_R15:
   2934 		if ctxt.Arch.Family == sys.I386 {
   2935 			return Yxxx
   2936 		}
   2937 		fallthrough
   2938 
   2939 	case REG_SP, REG_BP, REG_SI, REG_DI:
   2940 		if ctxt.Arch.Family == sys.I386 {
   2941 			return Yrl32
   2942 		}
   2943 		return Yrl
   2944 
   2945 	case REG_F0 + 0:
   2946 		return Yf0
   2947 
   2948 	case REG_F0 + 1,
   2949 		REG_F0 + 2,
   2950 		REG_F0 + 3,
   2951 		REG_F0 + 4,
   2952 		REG_F0 + 5,
   2953 		REG_F0 + 6,
   2954 		REG_F0 + 7:
   2955 		return Yrf
   2956 
   2957 	case REG_M0 + 0,
   2958 		REG_M0 + 1,
   2959 		REG_M0 + 2,
   2960 		REG_M0 + 3,
   2961 		REG_M0 + 4,
   2962 		REG_M0 + 5,
   2963 		REG_M0 + 6,
   2964 		REG_M0 + 7:
   2965 		return Ymr
   2966 
   2967 	case REG_X0:
   2968 		return Yxr0
   2969 
   2970 	case REG_X0 + 1,
   2971 		REG_X0 + 2,
   2972 		REG_X0 + 3,
   2973 		REG_X0 + 4,
   2974 		REG_X0 + 5,
   2975 		REG_X0 + 6,
   2976 		REG_X0 + 7,
   2977 		REG_X0 + 8,
   2978 		REG_X0 + 9,
   2979 		REG_X0 + 10,
   2980 		REG_X0 + 11,
   2981 		REG_X0 + 12,
   2982 		REG_X0 + 13,
   2983 		REG_X0 + 14,
   2984 		REG_X0 + 15:
   2985 		return Yxr
   2986 
   2987 	case REG_X0 + 16,
   2988 		REG_X0 + 17,
   2989 		REG_X0 + 18,
   2990 		REG_X0 + 19,
   2991 		REG_X0 + 20,
   2992 		REG_X0 + 21,
   2993 		REG_X0 + 22,
   2994 		REG_X0 + 23,
   2995 		REG_X0 + 24,
   2996 		REG_X0 + 25,
   2997 		REG_X0 + 26,
   2998 		REG_X0 + 27,
   2999 		REG_X0 + 28,
   3000 		REG_X0 + 29,
   3001 		REG_X0 + 30,
   3002 		REG_X0 + 31:
   3003 		return YxrEvex
   3004 
   3005 	case REG_Y0 + 0,
   3006 		REG_Y0 + 1,
   3007 		REG_Y0 + 2,
   3008 		REG_Y0 + 3,
   3009 		REG_Y0 + 4,
   3010 		REG_Y0 + 5,
   3011 		REG_Y0 + 6,
   3012 		REG_Y0 + 7,
   3013 		REG_Y0 + 8,
   3014 		REG_Y0 + 9,
   3015 		REG_Y0 + 10,
   3016 		REG_Y0 + 11,
   3017 		REG_Y0 + 12,
   3018 		REG_Y0 + 13,
   3019 		REG_Y0 + 14,
   3020 		REG_Y0 + 15:
   3021 		return Yyr
   3022 
   3023 	case REG_Y0 + 16,
   3024 		REG_Y0 + 17,
   3025 		REG_Y0 + 18,
   3026 		REG_Y0 + 19,
   3027 		REG_Y0 + 20,
   3028 		REG_Y0 + 21,
   3029 		REG_Y0 + 22,
   3030 		REG_Y0 + 23,
   3031 		REG_Y0 + 24,
   3032 		REG_Y0 + 25,
   3033 		REG_Y0 + 26,
   3034 		REG_Y0 + 27,
   3035 		REG_Y0 + 28,
   3036 		REG_Y0 + 29,
   3037 		REG_Y0 + 30,
   3038 		REG_Y0 + 31:
   3039 		return YyrEvex
   3040 
   3041 	case REG_Z0 + 0,
   3042 		REG_Z0 + 1,
   3043 		REG_Z0 + 2,
   3044 		REG_Z0 + 3,
   3045 		REG_Z0 + 4,
   3046 		REG_Z0 + 5,
   3047 		REG_Z0 + 6,
   3048 		REG_Z0 + 7:
   3049 		return Yzr
   3050 
   3051 	case REG_Z0 + 8,
   3052 		REG_Z0 + 9,
   3053 		REG_Z0 + 10,
   3054 		REG_Z0 + 11,
   3055 		REG_Z0 + 12,
   3056 		REG_Z0 + 13,
   3057 		REG_Z0 + 14,
   3058 		REG_Z0 + 15,
   3059 		REG_Z0 + 16,
   3060 		REG_Z0 + 17,
   3061 		REG_Z0 + 18,
   3062 		REG_Z0 + 19,
   3063 		REG_Z0 + 20,
   3064 		REG_Z0 + 21,
   3065 		REG_Z0 + 22,
   3066 		REG_Z0 + 23,
   3067 		REG_Z0 + 24,
   3068 		REG_Z0 + 25,
   3069 		REG_Z0 + 26,
   3070 		REG_Z0 + 27,
   3071 		REG_Z0 + 28,
   3072 		REG_Z0 + 29,
   3073 		REG_Z0 + 30,
   3074 		REG_Z0 + 31:
   3075 		if ctxt.Arch.Family == sys.I386 {
   3076 			return Yxxx
   3077 		}
   3078 		return Yzr
   3079 
   3080 	case REG_K0:
   3081 		return Yk0
   3082 
   3083 	case REG_K0 + 1,
   3084 		REG_K0 + 2,
   3085 		REG_K0 + 3,
   3086 		REG_K0 + 4,
   3087 		REG_K0 + 5,
   3088 		REG_K0 + 6,
   3089 		REG_K0 + 7:
   3090 		return Yknot0
   3091 
   3092 	case REG_CS:
   3093 		return Ycs
   3094 	case REG_SS:
   3095 		return Yss
   3096 	case REG_DS:
   3097 		return Yds
   3098 	case REG_ES:
   3099 		return Yes
   3100 	case REG_FS:
   3101 		return Yfs
   3102 	case REG_GS:
   3103 		return Ygs
   3104 	case REG_TLS:
   3105 		return Ytls
   3106 
   3107 	case REG_GDTR:
   3108 		return Ygdtr
   3109 	case REG_IDTR:
   3110 		return Yidtr
   3111 	case REG_LDTR:
   3112 		return Yldtr
   3113 	case REG_MSW:
   3114 		return Ymsw
   3115 	case REG_TASK:
   3116 		return Ytask
   3117 
   3118 	case REG_CR + 0:
   3119 		return Ycr0
   3120 	case REG_CR + 1:
   3121 		return Ycr1
   3122 	case REG_CR + 2:
   3123 		return Ycr2
   3124 	case REG_CR + 3:
   3125 		return Ycr3
   3126 	case REG_CR + 4:
   3127 		return Ycr4
   3128 	case REG_CR + 5:
   3129 		return Ycr5
   3130 	case REG_CR + 6:
   3131 		return Ycr6
   3132 	case REG_CR + 7:
   3133 		return Ycr7
   3134 	case REG_CR + 8:
   3135 		return Ycr8
   3136 
   3137 	case REG_DR + 0:
   3138 		return Ydr0
   3139 	case REG_DR + 1:
   3140 		return Ydr1
   3141 	case REG_DR + 2:
   3142 		return Ydr2
   3143 	case REG_DR + 3:
   3144 		return Ydr3
   3145 	case REG_DR + 4:
   3146 		return Ydr4
   3147 	case REG_DR + 5:
   3148 		return Ydr5
   3149 	case REG_DR + 6:
   3150 		return Ydr6
   3151 	case REG_DR + 7:
   3152 		return Ydr7
   3153 
   3154 	case REG_TR + 0:
   3155 		return Ytr0
   3156 	case REG_TR + 1:
   3157 		return Ytr1
   3158 	case REG_TR + 2:
   3159 		return Ytr2
   3160 	case REG_TR + 3:
   3161 		return Ytr3
   3162 	case REG_TR + 4:
   3163 		return Ytr4
   3164 	case REG_TR + 5:
   3165 		return Ytr5
   3166 	case REG_TR + 6:
   3167 		return Ytr6
   3168 	case REG_TR + 7:
   3169 		return Ytr7
   3170 	}
   3171 
   3172 	return Yxxx
   3173 }
   3174 
   3175 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
   3176 // and hold assembly state.
   3177 type AsmBuf struct {
   3178 	buf      [100]byte
   3179 	off      int
   3180 	rexflag  int
   3181 	vexflag  bool // Per inst: true for VEX-encoded
   3182 	evexflag bool // Per inst: true for EVEX-encoded
   3183 	rep      bool
   3184 	repn     bool
   3185 	lock     bool
   3186 
   3187 	evex evexBits // Initialized when evexflag is true
   3188 }
   3189 
   3190 // Put1 appends one byte to the end of the buffer.
   3191 func (ab *AsmBuf) Put1(x byte) {
   3192 	ab.buf[ab.off] = x
   3193 	ab.off++
   3194 }
   3195 
   3196 // Put2 appends two bytes to the end of the buffer.
   3197 func (ab *AsmBuf) Put2(x, y byte) {
   3198 	ab.buf[ab.off+0] = x
   3199 	ab.buf[ab.off+1] = y
   3200 	ab.off += 2
   3201 }
   3202 
   3203 // Put3 appends three bytes to the end of the buffer.
   3204 func (ab *AsmBuf) Put3(x, y, z byte) {
   3205 	ab.buf[ab.off+0] = x
   3206 	ab.buf[ab.off+1] = y
   3207 	ab.buf[ab.off+2] = z
   3208 	ab.off += 3
   3209 }
   3210 
   3211 // Put4 appends four bytes to the end of the buffer.
   3212 func (ab *AsmBuf) Put4(x, y, z, w byte) {
   3213 	ab.buf[ab.off+0] = x
   3214 	ab.buf[ab.off+1] = y
   3215 	ab.buf[ab.off+2] = z
   3216 	ab.buf[ab.off+3] = w
   3217 	ab.off += 4
   3218 }
   3219 
   3220 // PutInt16 writes v into the buffer using little-endian encoding.
   3221 func (ab *AsmBuf) PutInt16(v int16) {
   3222 	ab.buf[ab.off+0] = byte(v)
   3223 	ab.buf[ab.off+1] = byte(v >> 8)
   3224 	ab.off += 2
   3225 }
   3226 
   3227 // PutInt32 writes v into the buffer using little-endian encoding.
   3228 func (ab *AsmBuf) PutInt32(v int32) {
   3229 	ab.buf[ab.off+0] = byte(v)
   3230 	ab.buf[ab.off+1] = byte(v >> 8)
   3231 	ab.buf[ab.off+2] = byte(v >> 16)
   3232 	ab.buf[ab.off+3] = byte(v >> 24)
   3233 	ab.off += 4
   3234 }
   3235 
   3236 // PutInt64 writes v into the buffer using little-endian encoding.
   3237 func (ab *AsmBuf) PutInt64(v int64) {
   3238 	ab.buf[ab.off+0] = byte(v)
   3239 	ab.buf[ab.off+1] = byte(v >> 8)
   3240 	ab.buf[ab.off+2] = byte(v >> 16)
   3241 	ab.buf[ab.off+3] = byte(v >> 24)
   3242 	ab.buf[ab.off+4] = byte(v >> 32)
   3243 	ab.buf[ab.off+5] = byte(v >> 40)
   3244 	ab.buf[ab.off+6] = byte(v >> 48)
   3245 	ab.buf[ab.off+7] = byte(v >> 56)
   3246 	ab.off += 8
   3247 }
   3248 
   3249 // Put copies b into the buffer.
   3250 func (ab *AsmBuf) Put(b []byte) {
   3251 	copy(ab.buf[ab.off:], b)
   3252 	ab.off += len(b)
   3253 }
   3254 
   3255 // PutOpBytesLit writes zero terminated sequence of bytes from op,
   3256 // starting at specified offset (e.g. z counter value).
   3257 // Trailing 0 is not written.
   3258 //
   3259 // Intended to be used for literal Z cases.
   3260 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
   3261 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
   3262 	for int(op[offset]) != 0 {
   3263 		ab.Put1(byte(op[offset]))
   3264 		offset++
   3265 	}
   3266 }
   3267 
   3268 // Insert inserts b at offset i.
   3269 func (ab *AsmBuf) Insert(i int, b byte) {
   3270 	ab.off++
   3271 	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
   3272 	ab.buf[i] = b
   3273 }
   3274 
   3275 // Last returns the byte at the end of the buffer.
   3276 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
   3277 
   3278 // Len returns the length of the buffer.
   3279 func (ab *AsmBuf) Len() int { return ab.off }
   3280 
   3281 // Bytes returns the contents of the buffer.
   3282 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
   3283 
   3284 // Reset empties the buffer.
   3285 func (ab *AsmBuf) Reset() { ab.off = 0 }
   3286 
   3287 // At returns the byte at offset i.
   3288 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
   3289 
   3290 // asmidx emits SIB byte.
   3291 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
   3292 	var i int
   3293 
   3294 	// X/Y index register is used in VSIB.
   3295 	switch index {
   3296 	default:
   3297 		goto bad
   3298 
   3299 	case REG_NONE:
   3300 		i = 4 << 3
   3301 		goto bas
   3302 
   3303 	case REG_R8,
   3304 		REG_R9,
   3305 		REG_R10,
   3306 		REG_R11,
   3307 		REG_R12,
   3308 		REG_R13,
   3309 		REG_R14,
   3310 		REG_R15,
   3311 		REG_X8,
   3312 		REG_X9,
   3313 		REG_X10,
   3314 		REG_X11,
   3315 		REG_X12,
   3316 		REG_X13,
   3317 		REG_X14,
   3318 		REG_X15,
   3319 		REG_X16,
   3320 		REG_X17,
   3321 		REG_X18,
   3322 		REG_X19,
   3323 		REG_X20,
   3324 		REG_X21,
   3325 		REG_X22,
   3326 		REG_X23,
   3327 		REG_X24,
   3328 		REG_X25,
   3329 		REG_X26,
   3330 		REG_X27,
   3331 		REG_X28,
   3332 		REG_X29,
   3333 		REG_X30,
   3334 		REG_X31,
   3335 		REG_Y8,
   3336 		REG_Y9,
   3337 		REG_Y10,
   3338 		REG_Y11,
   3339 		REG_Y12,
   3340 		REG_Y13,
   3341 		REG_Y14,
   3342 		REG_Y15,
   3343 		REG_Y16,
   3344 		REG_Y17,
   3345 		REG_Y18,
   3346 		REG_Y19,
   3347 		REG_Y20,
   3348 		REG_Y21,
   3349 		REG_Y22,
   3350 		REG_Y23,
   3351 		REG_Y24,
   3352 		REG_Y25,
   3353 		REG_Y26,
   3354 		REG_Y27,
   3355 		REG_Y28,
   3356 		REG_Y29,
   3357 		REG_Y30,
   3358 		REG_Y31,
   3359 		REG_Z8,
   3360 		REG_Z9,
   3361 		REG_Z10,
   3362 		REG_Z11,
   3363 		REG_Z12,
   3364 		REG_Z13,
   3365 		REG_Z14,
   3366 		REG_Z15,
   3367 		REG_Z16,
   3368 		REG_Z17,
   3369 		REG_Z18,
   3370 		REG_Z19,
   3371 		REG_Z20,
   3372 		REG_Z21,
   3373 		REG_Z22,
   3374 		REG_Z23,
   3375 		REG_Z24,
   3376 		REG_Z25,
   3377 		REG_Z26,
   3378 		REG_Z27,
   3379 		REG_Z28,
   3380 		REG_Z29,
   3381 		REG_Z30,
   3382 		REG_Z31:
   3383 		if ctxt.Arch.Family == sys.I386 {
   3384 			goto bad
   3385 		}
   3386 		fallthrough
   3387 
   3388 	case REG_AX,
   3389 		REG_CX,
   3390 		REG_DX,
   3391 		REG_BX,
   3392 		REG_BP,
   3393 		REG_SI,
   3394 		REG_DI,
   3395 		REG_X0,
   3396 		REG_X1,
   3397 		REG_X2,
   3398 		REG_X3,
   3399 		REG_X4,
   3400 		REG_X5,
   3401 		REG_X6,
   3402 		REG_X7,
   3403 		REG_Y0,
   3404 		REG_Y1,
   3405 		REG_Y2,
   3406 		REG_Y3,
   3407 		REG_Y4,
   3408 		REG_Y5,
   3409 		REG_Y6,
   3410 		REG_Y7,
   3411 		REG_Z0,
   3412 		REG_Z1,
   3413 		REG_Z2,
   3414 		REG_Z3,
   3415 		REG_Z4,
   3416 		REG_Z5,
   3417 		REG_Z6,
   3418 		REG_Z7:
   3419 		i = reg[index] << 3
   3420 	}
   3421 
   3422 	switch scale {
   3423 	default:
   3424 		goto bad
   3425 
   3426 	case 1:
   3427 		break
   3428 
   3429 	case 2:
   3430 		i |= 1 << 6
   3431 
   3432 	case 4:
   3433 		i |= 2 << 6
   3434 
   3435 	case 8:
   3436 		i |= 3 << 6
   3437 	}
   3438 
   3439 bas:
   3440 	switch base {
   3441 	default:
   3442 		goto bad
   3443 
   3444 	case REG_NONE: // must be mod=00
   3445 		i |= 5
   3446 
   3447 	case REG_R8,
   3448 		REG_R9,
   3449 		REG_R10,
   3450 		REG_R11,
   3451 		REG_R12,
   3452 		REG_R13,
   3453 		REG_R14,
   3454 		REG_R15:
   3455 		if ctxt.Arch.Family == sys.I386 {
   3456 			goto bad
   3457 		}
   3458 		fallthrough
   3459 
   3460 	case REG_AX,
   3461 		REG_CX,
   3462 		REG_DX,
   3463 		REG_BX,
   3464 		REG_SP,
   3465 		REG_BP,
   3466 		REG_SI,
   3467 		REG_DI:
   3468 		i |= reg[base]
   3469 	}
   3470 
   3471 	ab.Put1(byte(i))
   3472 	return
   3473 
   3474 bad:
   3475 	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
   3476 	ab.Put1(0)
   3477 }
   3478 
   3479 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
   3480 	var rel obj.Reloc
   3481 
   3482 	v := vaddr(ctxt, p, a, &rel)
   3483 	if rel.Siz != 0 {
   3484 		if rel.Siz != 4 {
   3485 			ctxt.Diag("bad reloc")
   3486 		}
   3487 		r := obj.Addrel(cursym)
   3488 		*r = rel
   3489 		r.Off = int32(p.Pc + int64(ab.Len()))
   3490 	}
   3491 
   3492 	ab.PutInt32(int32(v))
   3493 }
   3494 
   3495 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
   3496 	if r != nil {
   3497 		*r = obj.Reloc{}
   3498 	}
   3499 
   3500 	switch a.Name {
   3501 	case obj.NAME_STATIC,
   3502 		obj.NAME_GOTREF,
   3503 		obj.NAME_EXTERN:
   3504 		s := a.Sym
   3505 		if r == nil {
   3506 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   3507 			log.Fatalf("reloc")
   3508 		}
   3509 
   3510 		if a.Name == obj.NAME_GOTREF {
   3511 			r.Siz = 4
   3512 			r.Type = objabi.R_GOTPCREL
   3513 		} else if useAbs(ctxt, s) {
   3514 			r.Siz = 4
   3515 			r.Type = objabi.R_ADDR
   3516 		} else {
   3517 			r.Siz = 4
   3518 			r.Type = objabi.R_PCREL
   3519 		}
   3520 
   3521 		r.Off = -1 // caller must fill in
   3522 		r.Sym = s
   3523 		r.Add = a.Offset
   3524 
   3525 		return 0
   3526 	}
   3527 
   3528 	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
   3529 		if r == nil {
   3530 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   3531 			log.Fatalf("reloc")
   3532 		}
   3533 
   3534 		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
   3535 			r.Type = objabi.R_TLS_LE
   3536 			r.Siz = 4
   3537 			r.Off = -1 // caller must fill in
   3538 			r.Add = a.Offset
   3539 		}
   3540 		return 0
   3541 	}
   3542 
   3543 	return a.Offset
   3544 }
   3545 
   3546 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
   3547 	var base int
   3548 	var rel obj.Reloc
   3549 
   3550 	rex &= 0x40 | Rxr
   3551 	if a.Offset != int64(int32(a.Offset)) {
   3552 		// The rules are slightly different for 386 and AMD64,
   3553 		// mostly for historical reasons. We may unify them later,
   3554 		// but it must be discussed beforehand.
   3555 		//
   3556 		// For 64bit mode only LEAL is allowed to overflow.
   3557 		// It's how https://golang.org/cl/59630 made it.
   3558 		// crypto/sha1/sha1block_amd64.s depends on this feature.
   3559 		//
   3560 		// For 32bit mode rules are more permissive.
   3561 		// If offset fits uint32, it's permitted.
   3562 		// This is allowed for assembly that wants to use 32-bit hex
   3563 		// constants, e.g. LEAL 0x99999999(AX), AX.
   3564 		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
   3565 			(ctxt.Arch.Family != sys.AMD64 &&
   3566 				int64(uint32(a.Offset)) == a.Offset &&
   3567 				ab.rexflag&Rxw == 0)
   3568 		if !overflowOK {
   3569 			ctxt.Diag("offset too large in %s", p)
   3570 		}
   3571 	}
   3572 	v := int32(a.Offset)
   3573 	rel.Siz = 0
   3574 
   3575 	switch a.Type {
   3576 	case obj.TYPE_ADDR:
   3577 		if a.Name == obj.NAME_NONE {
   3578 			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
   3579 		}
   3580 		if a.Index == REG_TLS {
   3581 			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
   3582 		}
   3583 		goto bad
   3584 
   3585 	case obj.TYPE_REG:
   3586 		const regFirst = REG_AL
   3587 		const regLast = REG_Z31
   3588 		if a.Reg < regFirst || regLast < a.Reg {
   3589 			goto bad
   3590 		}
   3591 		if v != 0 {
   3592 			goto bad
   3593 		}
   3594 		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
   3595 		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
   3596 		return
   3597 	}
   3598 
   3599 	if a.Type != obj.TYPE_MEM {
   3600 		goto bad
   3601 	}
   3602 
   3603 	if a.Index != REG_NONE && a.Index != REG_TLS {
   3604 		base := int(a.Reg)
   3605 		switch a.Name {
   3606 		case obj.NAME_EXTERN,
   3607 			obj.NAME_GOTREF,
   3608 			obj.NAME_STATIC:
   3609 			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
   3610 				goto bad
   3611 			}
   3612 			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
   3613 				// The base register has already been set. It holds the PC
   3614 				// of this instruction returned by a PC-reading thunk.
   3615 				// See obj6.go:rewriteToPcrel.
   3616 			} else {
   3617 				base = REG_NONE
   3618 			}
   3619 			v = int32(vaddr(ctxt, p, a, &rel))
   3620 
   3621 		case obj.NAME_AUTO,
   3622 			obj.NAME_PARAM:
   3623 			base = REG_SP
   3624 		}
   3625 
   3626 		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
   3627 		if base == REG_NONE {
   3628 			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
   3629 			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3630 			goto putrelv
   3631 		}
   3632 
   3633 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   3634 			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
   3635 			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3636 			return
   3637 		}
   3638 
   3639 		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
   3640 			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
   3641 			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3642 			ab.Put1(disp8)
   3643 			return
   3644 		}
   3645 
   3646 		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
   3647 		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3648 		goto putrelv
   3649 	}
   3650 
   3651 	base = int(a.Reg)
   3652 	switch a.Name {
   3653 	case obj.NAME_STATIC,
   3654 		obj.NAME_GOTREF,
   3655 		obj.NAME_EXTERN:
   3656 		if a.Sym == nil {
   3657 			ctxt.Diag("bad addr: %v", p)
   3658 		}
   3659 		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
   3660 			// The base register has already been set. It holds the PC
   3661 			// of this instruction returned by a PC-reading thunk.
   3662 			// See obj6.go:rewriteToPcrel.
   3663 		} else {
   3664 			base = REG_NONE
   3665 		}
   3666 		v = int32(vaddr(ctxt, p, a, &rel))
   3667 
   3668 	case obj.NAME_AUTO,
   3669 		obj.NAME_PARAM:
   3670 		base = REG_SP
   3671 	}
   3672 
   3673 	if base == REG_TLS {
   3674 		v = int32(vaddr(ctxt, p, a, &rel))
   3675 	}
   3676 
   3677 	ab.rexflag |= regrex[base]&Rxb | rex
   3678 	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
   3679 		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
   3680 			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
   3681 				ctxt.Diag("%v has offset against gotref", p)
   3682 			}
   3683 			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
   3684 			goto putrelv
   3685 		}
   3686 
   3687 		// temporary
   3688 		ab.Put2(
   3689 			byte(0<<6|4<<0|r<<3), // sib present
   3690 			0<<6|4<<3|5<<0,       // DS:d32
   3691 		)
   3692 		goto putrelv
   3693 	}
   3694 
   3695 	if base == REG_SP || base == REG_R12 {
   3696 		if v == 0 {
   3697 			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
   3698 			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
   3699 			return
   3700 		}
   3701 
   3702 		if disp8, ok := toDisp8(v, p, ab); ok {
   3703 			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
   3704 			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
   3705 			ab.Put1(disp8)
   3706 			return
   3707 		}
   3708 
   3709 		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
   3710 		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
   3711 		goto putrelv
   3712 	}
   3713 
   3714 	if REG_AX <= base && base <= REG_R15 {
   3715 		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
   3716 			rel = obj.Reloc{}
   3717 			rel.Type = objabi.R_TLS_LE
   3718 			rel.Siz = 4
   3719 			rel.Sym = nil
   3720 			rel.Add = int64(v)
   3721 			v = 0
   3722 		}
   3723 
   3724 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   3725 			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
   3726 			return
   3727 		}
   3728 
   3729 		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
   3730 			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
   3731 			return
   3732 		}
   3733 
   3734 		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
   3735 		goto putrelv
   3736 	}
   3737 
   3738 	goto bad
   3739 
   3740 putrelv:
   3741 	if rel.Siz != 0 {
   3742 		if rel.Siz != 4 {
   3743 			ctxt.Diag("bad rel")
   3744 			goto bad
   3745 		}
   3746 
   3747 		r := obj.Addrel(cursym)
   3748 		*r = rel
   3749 		r.Off = int32(p.Pc + int64(ab.Len()))
   3750 	}
   3751 
   3752 	ab.PutInt32(v)
   3753 	return
   3754 
   3755 bad:
   3756 	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
   3757 }
   3758 
   3759 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
   3760 	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
   3761 }
   3762 
   3763 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
   3764 	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
   3765 }
   3766 
   3767 func bytereg(a *obj.Addr, t *uint8) {
   3768 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
   3769 		a.Reg += REG_AL - REG_AX
   3770 		*t = 0
   3771 	}
   3772 }
   3773 
   3774 func unbytereg(a *obj.Addr, t *uint8) {
   3775 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
   3776 		a.Reg += REG_AX - REG_AL
   3777 		*t = 0
   3778 	}
   3779 }
   3780 
   3781 const (
   3782 	movLit uint8 = iota // Like Zlit
   3783 	movRegMem
   3784 	movMemReg
   3785 	movRegMem2op
   3786 	movMemReg2op
   3787 	movFullPtr // Load full pointer, trash heap (unsupported)
   3788 	movDoubleShift
   3789 	movTLSReg
   3790 )
   3791 
   3792 var ymovtab = []movtab{
   3793 	// push
   3794 	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
   3795 	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
   3796 	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
   3797 	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
   3798 	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
   3799 	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
   3800 	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
   3801 	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
   3802 	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
   3803 	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
   3804 	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
   3805 	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
   3806 	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
   3807 	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
   3808 
   3809 	// pop
   3810 	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
   3811 	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
   3812 	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
   3813 	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
   3814 	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
   3815 	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
   3816 	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
   3817 	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
   3818 	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
   3819 	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
   3820 	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
   3821 	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
   3822 
   3823 	// mov seg
   3824 	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
   3825 	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
   3826 	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
   3827 	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
   3828 	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
   3829 	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
   3830 	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
   3831 	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
   3832 	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
   3833 	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
   3834 	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
   3835 	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
   3836 
   3837 	// mov cr
   3838 	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
   3839 	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
   3840 	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
   3841 	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
   3842 	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
   3843 	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
   3844 	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
   3845 	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
   3846 	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
   3847 	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
   3848 	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
   3849 	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
   3850 	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
   3851 	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
   3852 	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
   3853 	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
   3854 	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
   3855 	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
   3856 	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
   3857 	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
   3858 
   3859 	// mov dr
   3860 	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
   3861 	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
   3862 	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
   3863 	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
   3864 	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
   3865 	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
   3866 	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
   3867 	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
   3868 	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
   3869 	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
   3870 	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
   3871 	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
   3872 	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
   3873 	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
   3874 	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
   3875 	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
   3876 
   3877 	// mov tr
   3878 	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
   3879 	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
   3880 	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
   3881 	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
   3882 
   3883 	// lgdt, sgdt, lidt, sidt
   3884 	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
   3885 	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
   3886 	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
   3887 	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
   3888 	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
   3889 	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
   3890 	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
   3891 	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
   3892 
   3893 	// lldt, sldt
   3894 	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
   3895 	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
   3896 
   3897 	// lmsw, smsw
   3898 	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
   3899 	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
   3900 
   3901 	// ltr, str
   3902 	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
   3903 	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
   3904 
   3905 	/* load full pointer - unsupported
   3906 	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
   3907 	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
   3908 	*/
   3909 
   3910 	// double shift
   3911 	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
   3912 	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
   3913 	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
   3914 	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
   3915 	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
   3916 	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
   3917 	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3918 	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3919 	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3920 	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
   3921 	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
   3922 	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
   3923 	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3924 	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3925 	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3926 	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
   3927 	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
   3928 	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
   3929 
   3930 	// load TLS base
   3931 	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
   3932 	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
   3933 	{0, 0, 0, 0, 0, [4]uint8{}},
   3934 }
   3935 
   3936 func isax(a *obj.Addr) bool {
   3937 	switch a.Reg {
   3938 	case REG_AX, REG_AL, REG_AH:
   3939 		return true
   3940 	}
   3941 
   3942 	if a.Index == REG_AX {
   3943 		return true
   3944 	}
   3945 	return false
   3946 }
   3947 
   3948 func subreg(p *obj.Prog, from int, to int) {
   3949 	if false { /* debug['Q'] */
   3950 		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
   3951 	}
   3952 
   3953 	if int(p.From.Reg) == from {
   3954 		p.From.Reg = int16(to)
   3955 		p.Ft = 0
   3956 	}
   3957 
   3958 	if int(p.To.Reg) == from {
   3959 		p.To.Reg = int16(to)
   3960 		p.Tt = 0
   3961 	}
   3962 
   3963 	if int(p.From.Index) == from {
   3964 		p.From.Index = int16(to)
   3965 		p.Ft = 0
   3966 	}
   3967 
   3968 	if int(p.To.Index) == from {
   3969 		p.To.Index = int16(to)
   3970 		p.Tt = 0
   3971 	}
   3972 
   3973 	if false { /* debug['Q'] */
   3974 		fmt.Printf("%v\n", p)
   3975 	}
   3976 }
   3977 
   3978 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
   3979 	switch op {
   3980 	case Pm, Pe, Pf2, Pf3:
   3981 		if osize != 1 {
   3982 			if op != Pm {
   3983 				ab.Put1(byte(op))
   3984 			}
   3985 			ab.Put1(Pm)
   3986 			z++
   3987 			op = int(o.op[z])
   3988 			break
   3989 		}
   3990 		fallthrough
   3991 
   3992 	default:
   3993 		if ab.Len() == 0 || ab.Last() != Pm {
   3994 			ab.Put1(Pm)
   3995 		}
   3996 	}
   3997 
   3998 	ab.Put1(byte(op))
   3999 	return z
   4000 }
   4001 
   4002 var bpduff1 = []byte{
   4003 	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
   4004 	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
   4005 }
   4006 
   4007 var bpduff2 = []byte{
   4008 	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
   4009 }
   4010 
   4011 // asmevex emits EVEX pregis and opcode byte.
   4012 // In addition to asmvex r/m, vvvv and reg fields also requires optional
   4013 // K-masking register.
   4014 //
   4015 // Expects asmbuf.evex to be properly initialized.
   4016 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
   4017 	ab.evexflag = true
   4018 	evex := ab.evex
   4019 
   4020 	rexR := byte(1)
   4021 	evexR := byte(1)
   4022 	rexX := byte(1)
   4023 	rexB := byte(1)
   4024 	if r != nil {
   4025 		if regrex[r.Reg]&Rxr != 0 {
   4026 			rexR = 0 // "ModR/M.reg" selector 4th bit.
   4027 		}
   4028 		if regrex[r.Reg]&RxrEvex != 0 {
   4029 			evexR = 0 // "ModR/M.reg" selector 5th bit.
   4030 		}
   4031 	}
   4032 	if rm != nil {
   4033 		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
   4034 			rexX = 0
   4035 		} else if regrex[rm.Index]&Rxx != 0 {
   4036 			rexX = 0
   4037 		}
   4038 		if regrex[rm.Reg]&Rxb != 0 {
   4039 			rexB = 0
   4040 		}
   4041 	}
   4042 	// P0 = [R][X][B][R'][00][mm]
   4043 	p0 := (rexR << 7) |
   4044 		(rexX << 6) |
   4045 		(rexB << 5) |
   4046 		(evexR << 4) |
   4047 		(0 << 2) |
   4048 		(evex.M() << 0)
   4049 
   4050 	vexV := byte(0)
   4051 	if v != nil {
   4052 		// 4bit-wide reg index.
   4053 		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
   4054 	}
   4055 	vexV ^= 0x0F
   4056 	// P1 = [W][vvvv][1][pp]
   4057 	p1 := (evex.W() << 7) |
   4058 		(vexV << 3) |
   4059 		(1 << 2) |
   4060 		(evex.P() << 0)
   4061 
   4062 	suffix := evexSuffixMap[p.Scond]
   4063 	evexZ := byte(0)
   4064 	evexLL := evex.L()
   4065 	evexB := byte(0)
   4066 	evexV := byte(1)
   4067 	evexA := byte(0)
   4068 	if suffix.zeroing {
   4069 		if !evex.ZeroingEnabled() {
   4070 			ctxt.Diag("unsupported zeroing: %v", p)
   4071 		}
   4072 		evexZ = 1
   4073 	}
   4074 	switch {
   4075 	case suffix.rounding != rcUnset:
   4076 		if rm != nil && rm.Type == obj.TYPE_MEM {
   4077 			ctxt.Diag("illegal rounding with memory argument: %v", p)
   4078 		} else if !evex.RoundingEnabled() {
   4079 			ctxt.Diag("unsupported rounding: %v", p)
   4080 		}
   4081 		evexB = 1
   4082 		evexLL = suffix.rounding
   4083 	case suffix.broadcast:
   4084 		if rm == nil || rm.Type != obj.TYPE_MEM {
   4085 			ctxt.Diag("illegal broadcast without memory argument: %v", p)
   4086 		} else if !evex.BroadcastEnabled() {
   4087 			ctxt.Diag("unsupported broadcast: %v", p)
   4088 		}
   4089 		evexB = 1
   4090 	case suffix.sae:
   4091 		if rm != nil && rm.Type == obj.TYPE_MEM {
   4092 			ctxt.Diag("illegal SAE with memory argument: %v", p)
   4093 		} else if !evex.SaeEnabled() {
   4094 			ctxt.Diag("unsupported SAE: %v", p)
   4095 		}
   4096 		evexB = 1
   4097 	}
   4098 	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
   4099 		evexV = 0
   4100 	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
   4101 		evexV = 0 // VSR selector 5th bit.
   4102 	}
   4103 	if k != nil {
   4104 		evexA = byte(reg[k.Reg])
   4105 	}
   4106 	// P2 = [z][L'L][b][V'][aaa]
   4107 	p2 := (evexZ << 7) |
   4108 		(evexLL << 5) |
   4109 		(evexB << 4) |
   4110 		(evexV << 3) |
   4111 		(evexA << 0)
   4112 
   4113 	const evexEscapeByte = 0x62
   4114 	ab.Put4(evexEscapeByte, p0, p1, p2)
   4115 	ab.Put1(evex.opcode)
   4116 }
   4117 
   4118 // Emit VEX prefix and opcode byte.
   4119 // The three addresses are the r/m, vvvv, and reg fields.
   4120 // The reg and rm arguments appear in the same order as the
   4121 // arguments to asmand, which typically follows the call to asmvex.
   4122 // The final two arguments are the VEX prefix (see encoding above)
   4123 // and the opcode byte.
   4124 // For details about vex prefix see:
   4125 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
   4126 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
   4127 	ab.vexflag = true
   4128 	rexR := 0
   4129 	if r != nil {
   4130 		rexR = regrex[r.Reg] & Rxr
   4131 	}
   4132 	rexB := 0
   4133 	rexX := 0
   4134 	if rm != nil {
   4135 		rexB = regrex[rm.Reg] & Rxb
   4136 		rexX = regrex[rm.Index] & Rxx
   4137 	}
   4138 	vexM := (vex >> 3) & 0x7
   4139 	vexWLP := vex & 0x87
   4140 	vexV := byte(0)
   4141 	if v != nil {
   4142 		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
   4143 	}
   4144 	vexV ^= 0xF
   4145 	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
   4146 		// Can use 2-byte encoding.
   4147 		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
   4148 	} else {
   4149 		// Must use 3-byte encoding.
   4150 		ab.Put3(0xc4,
   4151 			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
   4152 			vexV<<3|vexWLP,
   4153 		)
   4154 	}
   4155 	ab.Put1(opcode)
   4156 }
   4157 
   4158 // regIndex returns register index that fits in 5 bits.
   4159 //
   4160 //	R         : 3 bit | legacy instructions     | N/A
   4161 //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
   4162 //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
   4163 //
   4164 // Examples:
   4165 //	REG_Z30 => 30
   4166 //	REG_X15 => 15
   4167 //	REG_R9  => 9
   4168 //	REG_AX  => 0
   4169 //
   4170 func regIndex(r int16) int {
   4171 	lower3bits := reg[r]
   4172 	high4bit := regrex[r] & Rxr << 1
   4173 	high5bit := regrex[r] & RxrEvex << 0
   4174 	return lower3bits | high4bit | high5bit
   4175 }
   4176 
   4177 // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
   4178 // Reports errors via ctxt.
   4179 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
   4180 	// If any pair of the index, mask, or destination registers
   4181 	// are the same, illegal instruction trap (#UD) is triggered.
   4182 	index := regIndex(p.GetFrom3().Index)
   4183 	mask := regIndex(p.From.Reg)
   4184 	dest := regIndex(p.To.Reg)
   4185 	if dest == mask || dest == index || mask == index {
   4186 		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
   4187 		return false
   4188 	}
   4189 
   4190 	return true
   4191 }
   4192 
   4193 // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
   4194 // Reports errors via ctxt.
   4195 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
   4196 	// Illegal instruction trap (#UD) is triggered if the destination vector
   4197 	// register is the same as index vector in VSIB.
   4198 	index := regIndex(p.From.Index)
   4199 	dest := regIndex(p.To.Reg)
   4200 	if dest == index {
   4201 		ctxt.Diag("index and destination registers should be distinct: %v", p)
   4202 		return false
   4203 	}
   4204 
   4205 	return true
   4206 }
   4207 
   4208 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
   4209 	o := opindex[p.As&obj.AMask]
   4210 
   4211 	if o == nil {
   4212 		ctxt.Diag("asmins: missing op %v", p)
   4213 		return
   4214 	}
   4215 
   4216 	if pre := prefixof(ctxt, &p.From); pre != 0 {
   4217 		ab.Put1(byte(pre))
   4218 	}
   4219 	if pre := prefixof(ctxt, &p.To); pre != 0 {
   4220 		ab.Put1(byte(pre))
   4221 	}
   4222 
   4223 	// Checks to warn about instruction/arguments combinations that
   4224 	// will unconditionally trigger illegal instruction trap (#UD).
   4225 	switch p.As {
   4226 	case AVGATHERDPD,
   4227 		AVGATHERQPD,
   4228 		AVGATHERDPS,
   4229 		AVGATHERQPS,
   4230 		AVPGATHERDD,
   4231 		AVPGATHERQD,
   4232 		AVPGATHERDQ,
   4233 		AVPGATHERQQ:
   4234 		// AVX512 gather requires explicit K mask.
   4235 		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
   4236 			if !avx512gatherValid(ctxt, p) {
   4237 				return
   4238 			}
   4239 		} else {
   4240 			if !avx2gatherValid(ctxt, p) {
   4241 				return
   4242 			}
   4243 		}
   4244 	}
   4245 
   4246 	if p.Ft == 0 {
   4247 		p.Ft = uint8(oclass(ctxt, p, &p.From))
   4248 	}
   4249 	if p.Tt == 0 {
   4250 		p.Tt = uint8(oclass(ctxt, p, &p.To))
   4251 	}
   4252 
   4253 	ft := int(p.Ft) * Ymax
   4254 	var f3t int
   4255 	tt := int(p.Tt) * Ymax
   4256 
   4257 	xo := obj.Bool2int(o.op[0] == 0x0f)
   4258 	z := 0
   4259 	var a *obj.Addr
   4260 	var l int
   4261 	var op int
   4262 	var q *obj.Prog
   4263 	var r *obj.Reloc
   4264 	var rel obj.Reloc
   4265 	var v int64
   4266 
   4267 	args := make([]int, 0, argListMax)
   4268 	if ft != Ynone*Ymax {
   4269 		args = append(args, ft)
   4270 	}
   4271 	for i := range p.RestArgs {
   4272 		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
   4273 	}
   4274 	if tt != Ynone*Ymax {
   4275 		args = append(args, tt)
   4276 	}
   4277 
   4278 	for _, yt := range o.ytab {
   4279 		// ytab matching is purely args-based,
   4280 		// but AVX512 suffixes like "Z" or "RU_SAE" will
   4281 		// add EVEX-only filter that will reject non-EVEX matches.
   4282 		//
   4283 		// Consider "VADDPD.BCST 2032(DX), X0, X0".
   4284 		// Without this rule, operands will lead to VEX-encoded form
   4285 		// and produce "c5b15813" encoding.
   4286 		if !yt.match(args) {
   4287 			// "xo" is always zero for VEX/EVEX encoded insts.
   4288 			z += int(yt.zoffset) + xo
   4289 		} else {
   4290 			if p.Scond != 0 && !evexZcase(yt.zcase) {
   4291 				// Do not signal error and continue to search
   4292 				// for matching EVEX-encoded form.
   4293 				z += int(yt.zoffset)
   4294 				continue
   4295 			}
   4296 
   4297 			switch o.prefix {
   4298 			case Px1: // first option valid only in 32-bit mode
   4299 				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
   4300 					z += int(yt.zoffset) + xo
   4301 					continue
   4302 				}
   4303 			case Pq: // 16 bit escape and opcode escape
   4304 				ab.Put2(Pe, Pm)
   4305 
   4306 			case Pq3: // 16 bit escape and opcode escape + REX.W
   4307 				ab.rexflag |= Pw
   4308 				ab.Put2(Pe, Pm)
   4309 
   4310 			case Pq4: // 66 0F 38
   4311 				ab.Put3(0x66, 0x0F, 0x38)
   4312 
   4313 			case Pq4w: // 66 0F 38 + REX.W
   4314 				ab.rexflag |= Pw
   4315 				ab.Put3(0x66, 0x0F, 0x38)
   4316 
   4317 			case Pq5: // F3 0F 38
   4318 				ab.Put3(0xF3, 0x0F, 0x38)
   4319 
   4320 			case Pq5w: //  F3 0F 38 + REX.W
   4321 				ab.rexflag |= Pw
   4322 				ab.Put3(0xF3, 0x0F, 0x38)
   4323 
   4324 			case Pf2, // xmm opcode escape
   4325 				Pf3:
   4326 				ab.Put2(o.prefix, Pm)
   4327 
   4328 			case Pef3:
   4329 				ab.Put3(Pe, Pf3, Pm)
   4330 
   4331 			case Pfw: // xmm opcode escape + REX.W
   4332 				ab.rexflag |= Pw
   4333 				ab.Put2(Pf3, Pm)
   4334 
   4335 			case Pm: // opcode escape
   4336 				ab.Put1(Pm)
   4337 
   4338 			case Pe: // 16 bit escape
   4339 				ab.Put1(Pe)
   4340 
   4341 			case Pw: // 64-bit escape
   4342 				if ctxt.Arch.Family != sys.AMD64 {
   4343 					ctxt.Diag("asmins: illegal 64: %v", p)
   4344 				}
   4345 				ab.rexflag |= Pw
   4346 
   4347 			case Pw8: // 64-bit escape if z >= 8
   4348 				if z >= 8 {
   4349 					if ctxt.Arch.Family != sys.AMD64 {
   4350 						ctxt.Diag("asmins: illegal 64: %v", p)
   4351 					}
   4352 					ab.rexflag |= Pw
   4353 				}
   4354 
   4355 			case Pb: // botch
   4356 				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
   4357 					goto bad
   4358 				}
   4359 				// NOTE(rsc): This is probably safe to do always,
   4360 				// but when enabled it chooses different encodings
   4361 				// than the old cmd/internal/obj/i386 code did,
   4362 				// which breaks our "same bits out" checks.
   4363 				// In particular, CMPB AX, $0 encodes as 80 f8 00
   4364 				// in the original obj/i386, and it would encode
   4365 				// (using a valid, shorter form) as 3c 00 if we enabled
   4366 				// the call to bytereg here.
   4367 				if ctxt.Arch.Family == sys.AMD64 {
   4368 					bytereg(&p.From, &p.Ft)
   4369 					bytereg(&p.To, &p.Tt)
   4370 				}
   4371 
   4372 			case P32: // 32 bit but illegal if 64-bit mode
   4373 				if ctxt.Arch.Family == sys.AMD64 {
   4374 					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
   4375 				}
   4376 
   4377 			case Py: // 64-bit only, no prefix
   4378 				if ctxt.Arch.Family != sys.AMD64 {
   4379 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
   4380 				}
   4381 
   4382 			case Py1: // 64-bit only if z < 1, no prefix
   4383 				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
   4384 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
   4385 				}
   4386 
   4387 			case Py3: // 64-bit only if z < 3, no prefix
   4388 				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
   4389 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
   4390 				}
   4391 			}
   4392 
   4393 			if z >= len(o.op) {
   4394 				log.Fatalf("asmins bad table %v", p)
   4395 			}
   4396 			op = int(o.op[z])
   4397 			if op == 0x0f {
   4398 				ab.Put1(byte(op))
   4399 				z++
   4400 				op = int(o.op[z])
   4401 			}
   4402 
   4403 			switch yt.zcase {
   4404 			default:
   4405 				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
   4406 				return
   4407 
   4408 			case Zpseudo:
   4409 				break
   4410 
   4411 			case Zlit:
   4412 				ab.PutOpBytesLit(z, &o.op)
   4413 
   4414 			case Zlitr_m:
   4415 				ab.PutOpBytesLit(z, &o.op)
   4416 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4417 
   4418 			case Zlitm_r:
   4419 				ab.PutOpBytesLit(z, &o.op)
   4420 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4421 
   4422 			case Zlit_m_r:
   4423 				ab.PutOpBytesLit(z, &o.op)
   4424 				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   4425 
   4426 			case Zmb_r:
   4427 				bytereg(&p.From, &p.Ft)
   4428 				fallthrough
   4429 
   4430 			case Zm_r:
   4431 				ab.Put1(byte(op))
   4432 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4433 
   4434 			case Z_m_r:
   4435 				ab.Put1(byte(op))
   4436 				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   4437 
   4438 			case Zm2_r:
   4439 				ab.Put2(byte(op), o.op[z+1])
   4440 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4441 
   4442 			case Zm_r_xm:
   4443 				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4444 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4445 
   4446 			case Zm_r_xm_nr:
   4447 				ab.rexflag = 0
   4448 				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4449 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4450 
   4451 			case Zm_r_i_xm:
   4452 				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4453 				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
   4454 				ab.Put1(byte(p.To.Offset))
   4455 
   4456 			case Zibm_r, Zibr_m:
   4457 				ab.PutOpBytesLit(z, &o.op)
   4458 				if yt.zcase == Zibr_m {
   4459 					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
   4460 				} else {
   4461 					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   4462 				}
   4463 				switch {
   4464 				default:
   4465 					ab.Put1(byte(p.From.Offset))
   4466 				case yt.args[0] == Yi32 && o.prefix == Pe:
   4467 					ab.PutInt16(int16(p.From.Offset))
   4468 				case yt.args[0] == Yi32:
   4469 					ab.PutInt32(int32(p.From.Offset))
   4470 				}
   4471 
   4472 			case Zaut_r:
   4473 				ab.Put1(0x8d) // leal
   4474 				if p.From.Type != obj.TYPE_ADDR {
   4475 					ctxt.Diag("asmins: Zaut sb type ADDR")
   4476 				}
   4477 				p.From.Type = obj.TYPE_MEM
   4478 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4479 				p.From.Type = obj.TYPE_ADDR
   4480 
   4481 			case Zm_o:
   4482 				ab.Put1(byte(op))
   4483 				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
   4484 
   4485 			case Zr_m:
   4486 				ab.Put1(byte(op))
   4487 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4488 
   4489 			case Zvex:
   4490 				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
   4491 
   4492 			case Zvex_rm_v_r:
   4493 				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
   4494 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4495 
   4496 			case Zvex_rm_v_ro:
   4497 				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
   4498 				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
   4499 
   4500 			case Zvex_i_rm_vo:
   4501 				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
   4502 				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
   4503 				ab.Put1(byte(p.From.Offset))
   4504 
   4505 			case Zvex_i_r_v:
   4506 				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
   4507 				regnum := byte(0x7)
   4508 				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
   4509 					regnum &= byte(p.GetFrom3().Reg - REG_X0)
   4510 				} else {
   4511 					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
   4512 				}
   4513 				ab.Put1(o.op[z+2] | regnum)
   4514 				ab.Put1(byte(p.From.Offset))
   4515 
   4516 			case Zvex_i_rm_v_r:
   4517 				imm, from, from3, to := unpackOps4(p)
   4518 				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
   4519 				ab.asmand(ctxt, cursym, p, from, to)
   4520 				ab.Put1(byte(imm.Offset))
   4521 
   4522 			case Zvex_i_rm_r:
   4523 				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
   4524 				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   4525 				ab.Put1(byte(p.From.Offset))
   4526 
   4527 			case Zvex_v_rm_r:
   4528 				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
   4529 				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   4530 
   4531 			case Zvex_r_v_rm:
   4532 				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
   4533 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4534 
   4535 			case Zvex_rm_r_vo:
   4536 				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
   4537 				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
   4538 
   4539 			case Zvex_i_r_rm:
   4540 				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
   4541 				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
   4542 				ab.Put1(byte(p.From.Offset))
   4543 
   4544 			case Zvex_hr_rm_v_r:
   4545 				hr, from, from3, to := unpackOps4(p)
   4546 				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
   4547 				ab.asmand(ctxt, cursym, p, from, to)
   4548 				ab.Put1(byte(regIndex(hr.Reg) << 4))
   4549 
   4550 			case Zevex_k_rmo:
   4551 				ab.evex = newEVEXBits(z, &o.op)
   4552 				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
   4553 				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
   4554 
   4555 			case Zevex_i_rm_vo:
   4556 				ab.evex = newEVEXBits(z, &o.op)
   4557 				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
   4558 				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
   4559 				ab.Put1(byte(p.From.Offset))
   4560 
   4561 			case Zevex_i_rm_k_vo:
   4562 				imm, from, kmask, to := unpackOps4(p)
   4563 				ab.evex = newEVEXBits(z, &o.op)
   4564 				ab.asmevex(ctxt, p, from, to, nil, kmask)
   4565 				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
   4566 				ab.Put1(byte(imm.Offset))
   4567 
   4568 			case Zevex_i_r_rm:
   4569 				ab.evex = newEVEXBits(z, &o.op)
   4570 				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
   4571 				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
   4572 				ab.Put1(byte(p.From.Offset))
   4573 
   4574 			case Zevex_i_r_k_rm:
   4575 				imm, from, kmask, to := unpackOps4(p)
   4576 				ab.evex = newEVEXBits(z, &o.op)
   4577 				ab.asmevex(ctxt, p, to, nil, from, kmask)
   4578 				ab.asmand(ctxt, cursym, p, to, from)
   4579 				ab.Put1(byte(imm.Offset))
   4580 
   4581 			case Zevex_i_rm_r:
   4582 				ab.evex = newEVEXBits(z, &o.op)
   4583 				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
   4584 				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   4585 				ab.Put1(byte(p.From.Offset))
   4586 
   4587 			case Zevex_i_rm_k_r:
   4588 				imm, from, kmask, to := unpackOps4(p)
   4589 				ab.evex = newEVEXBits(z, &o.op)
   4590 				ab.asmevex(ctxt, p, from, nil, to, kmask)
   4591 				ab.asmand(ctxt, cursym, p, from, to)
   4592 				ab.Put1(byte(imm.Offset))
   4593 
   4594 			case Zevex_i_rm_v_r:
   4595 				imm, from, from3, to := unpackOps4(p)
   4596 				ab.evex = newEVEXBits(z, &o.op)
   4597 				ab.asmevex(ctxt, p, from, from3, to, nil)
   4598 				ab.asmand(ctxt, cursym, p, from, to)
   4599 				ab.Put1(byte(imm.Offset))
   4600 
   4601 			case Zevex_i_rm_v_k_r:
   4602 				imm, from, from3, kmask, to := unpackOps5(p)
   4603 				ab.evex = newEVEXBits(z, &o.op)
   4604 				ab.asmevex(ctxt, p, from, from3, to, kmask)
   4605 				ab.asmand(ctxt, cursym, p, from, to)
   4606 				ab.Put1(byte(imm.Offset))
   4607 
   4608 			case Zevex_r_v_rm:
   4609 				ab.evex = newEVEXBits(z, &o.op)
   4610 				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
   4611 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4612 
   4613 			case Zevex_rm_v_r:
   4614 				ab.evex = newEVEXBits(z, &o.op)
   4615 				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
   4616 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4617 
   4618 			case Zevex_rm_k_r:
   4619 				ab.evex = newEVEXBits(z, &o.op)
   4620 				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
   4621 				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   4622 
   4623 			case Zevex_r_k_rm:
   4624 				ab.evex = newEVEXBits(z, &o.op)
   4625 				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
   4626 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4627 
   4628 			case Zevex_rm_v_k_r:
   4629 				from, from3, kmask, to := unpackOps4(p)
   4630 				ab.evex = newEVEXBits(z, &o.op)
   4631 				ab.asmevex(ctxt, p, from, from3, to, kmask)
   4632 				ab.asmand(ctxt, cursym, p, from, to)
   4633 
   4634 			case Zevex_r_v_k_rm:
   4635 				from, from3, kmask, to := unpackOps4(p)
   4636 				ab.evex = newEVEXBits(z, &o.op)
   4637 				ab.asmevex(ctxt, p, to, from3, from, kmask)
   4638 				ab.asmand(ctxt, cursym, p, to, from)
   4639 
   4640 			case Zr_m_xm:
   4641 				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4642 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4643 
   4644 			case Zr_m_xm_nr:
   4645 				ab.rexflag = 0
   4646 				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4647 				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
   4648 
   4649 			case Zo_m:
   4650 				ab.Put1(byte(op))
   4651 				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4652 
   4653 			case Zcallindreg:
   4654 				r = obj.Addrel(cursym)
   4655 				r.Off = int32(p.Pc)
   4656 				r.Type = objabi.R_CALLIND
   4657 				r.Siz = 0
   4658 				fallthrough
   4659 
   4660 			case Zo_m64:
   4661 				ab.Put1(byte(op))
   4662 				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
   4663 
   4664 			case Zm_ibo:
   4665 				ab.Put1(byte(op))
   4666 				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
   4667 				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
   4668 
   4669 			case Zibo_m:
   4670 				ab.Put1(byte(op))
   4671 				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4672 				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   4673 
   4674 			case Zibo_m_xm:
   4675 				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4676 				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4677 				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   4678 
   4679 			case Z_ib, Zib_:
   4680 				if yt.zcase == Zib_ {
   4681 					a = &p.From
   4682 				} else {
   4683 					a = &p.To
   4684 				}
   4685 				ab.Put1(byte(op))
   4686 				if p.As == AXABORT {
   4687 					ab.Put1(o.op[z+1])
   4688 				}
   4689 				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
   4690 
   4691 			case Zib_rp:
   4692 				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   4693 				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
   4694 
   4695 			case Zil_rp:
   4696 				ab.rexflag |= regrex[p.To.Reg] & Rxb
   4697 				ab.Put1(byte(op + reg[p.To.Reg]))
   4698 				if o.prefix == Pe {
   4699 					v = vaddr(ctxt, p, &p.From, nil)
   4700 					ab.PutInt16(int16(v))
   4701 				} else {
   4702 					ab.relput4(ctxt, cursym, p, &p.From)
   4703 				}
   4704 
   4705 			case Zo_iw:
   4706 				ab.Put1(byte(op))
   4707 				if p.From.Type != obj.TYPE_NONE {
   4708 					v = vaddr(ctxt, p, &p.From, nil)
   4709 					ab.PutInt16(int16(v))
   4710 				}
   4711 
   4712 			case Ziq_rp:
   4713 				v = vaddr(ctxt, p, &p.From, &rel)
   4714 				l = int(v >> 32)
   4715 				if l == 0 && rel.Siz != 8 {
   4716 					ab.rexflag &^= (0x40 | Rxw)
   4717 
   4718 					ab.rexflag |= regrex[p.To.Reg] & Rxb
   4719 					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
   4720 					if rel.Type != 0 {
   4721 						r = obj.Addrel(cursym)
   4722 						*r = rel
   4723 						r.Off = int32(p.Pc + int64(ab.Len()))
   4724 					}
   4725 
   4726 					ab.PutInt32(int32(v))
   4727 				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
   4728 					ab.Put1(0xc7)
   4729 					ab.asmando(ctxt, cursym, p, &p.To, 0)
   4730 
   4731 					ab.PutInt32(int32(v)) // need all 8
   4732 				} else {
   4733 					ab.rexflag |= regrex[p.To.Reg] & Rxb
   4734 					ab.Put1(byte(op + reg[p.To.Reg]))
   4735 					if rel.Type != 0 {
   4736 						r = obj.Addrel(cursym)
   4737 						*r = rel
   4738 						r.Off = int32(p.Pc + int64(ab.Len()))
   4739 					}
   4740 
   4741 					ab.PutInt64(v)
   4742 				}
   4743 
   4744 			case Zib_rr:
   4745 				ab.Put1(byte(op))
   4746 				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
   4747 				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   4748 
   4749 			case Z_il, Zil_:
   4750 				if yt.zcase == Zil_ {
   4751 					a = &p.From
   4752 				} else {
   4753 					a = &p.To
   4754 				}
   4755 				ab.Put1(byte(op))
   4756 				if o.prefix == Pe {
   4757 					v = vaddr(ctxt, p, a, nil)
   4758 					ab.PutInt16(int16(v))
   4759 				} else {
   4760 					ab.relput4(ctxt, cursym, p, a)
   4761 				}
   4762 
   4763 			case Zm_ilo, Zilo_m:
   4764 				ab.Put1(byte(op))
   4765 				if yt.zcase == Zilo_m {
   4766 					a = &p.From
   4767 					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4768 				} else {
   4769 					a = &p.To
   4770 					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
   4771 				}
   4772 
   4773 				if o.prefix == Pe {
   4774 					v = vaddr(ctxt, p, a, nil)
   4775 					ab.PutInt16(int16(v))
   4776 				} else {
   4777 					ab.relput4(ctxt, cursym, p, a)
   4778 				}
   4779 
   4780 			case Zil_rr:
   4781 				ab.Put1(byte(op))
   4782 				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
   4783 				if o.prefix == Pe {
   4784 					v = vaddr(ctxt, p, &p.From, nil)
   4785 					ab.PutInt16(int16(v))
   4786 				} else {
   4787 					ab.relput4(ctxt, cursym, p, &p.From)
   4788 				}
   4789 
   4790 			case Z_rp:
   4791 				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   4792 				ab.Put1(byte(op + reg[p.To.Reg]))
   4793 
   4794 			case Zrp_:
   4795 				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
   4796 				ab.Put1(byte(op + reg[p.From.Reg]))
   4797 
   4798 			case Zcallcon, Zjmpcon:
   4799 				if yt.zcase == Zcallcon {
   4800 					ab.Put1(byte(op))
   4801 				} else {
   4802 					ab.Put1(o.op[z+1])
   4803 				}
   4804 				r = obj.Addrel(cursym)
   4805 				r.Off = int32(p.Pc + int64(ab.Len()))
   4806 				r.Type = objabi.R_PCREL
   4807 				r.Siz = 4
   4808 				r.Add = p.To.Offset
   4809 				ab.PutInt32(0)
   4810 
   4811 			case Zcallind:
   4812 				ab.Put2(byte(op), o.op[z+1])
   4813 				r = obj.Addrel(cursym)
   4814 				r.Off = int32(p.Pc + int64(ab.Len()))
   4815 				if ctxt.Arch.Family == sys.AMD64 {
   4816 					r.Type = objabi.R_PCREL
   4817 				} else {
   4818 					r.Type = objabi.R_ADDR
   4819 				}
   4820 				r.Siz = 4
   4821 				r.Add = p.To.Offset
   4822 				r.Sym = p.To.Sym
   4823 				ab.PutInt32(0)
   4824 
   4825 			case Zcall, Zcallduff:
   4826 				if p.To.Sym == nil {
   4827 					ctxt.Diag("call without target")
   4828 					ctxt.DiagFlush()
   4829 					log.Fatalf("bad code")
   4830 				}
   4831 
   4832 				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
   4833 					ctxt.Diag("directly calling duff when dynamically linking Go")
   4834 				}
   4835 
   4836 				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
   4837 					// Maintain BP around call, since duffcopy/duffzero can't do it
   4838 					// (the call jumps into the middle of the function).
   4839 					// This makes it possible to see call sites for duffcopy/duffzero in
   4840 					// BP-based profiling tools like Linux perf (which is the
   4841 					// whole point of maintaining frame pointers in Go).
   4842 					// MOVQ BP, -16(SP)
   4843 					// LEAQ -16(SP), BP
   4844 					ab.Put(bpduff1)
   4845 				}
   4846 				ab.Put1(byte(op))
   4847 				r = obj.Addrel(cursym)
   4848 				r.Off = int32(p.Pc + int64(ab.Len()))
   4849 				r.Sym = p.To.Sym
   4850 				r.Add = p.To.Offset
   4851 				r.Type = objabi.R_CALL
   4852 				r.Siz = 4
   4853 				ab.PutInt32(0)
   4854 
   4855 				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
   4856 					// Pop BP pushed above.
   4857 					// MOVQ 0(BP), BP
   4858 					ab.Put(bpduff2)
   4859 				}
   4860 
   4861 			// TODO: jump across functions needs reloc
   4862 			case Zbr, Zjmp, Zloop:
   4863 				if p.As == AXBEGIN {
   4864 					ab.Put1(byte(op))
   4865 				}
   4866 				if p.To.Sym != nil {
   4867 					if yt.zcase != Zjmp {
   4868 						ctxt.Diag("branch to ATEXT")
   4869 						ctxt.DiagFlush()
   4870 						log.Fatalf("bad code")
   4871 					}
   4872 
   4873 					ab.Put1(o.op[z+1])
   4874 					r = obj.Addrel(cursym)
   4875 					r.Off = int32(p.Pc + int64(ab.Len()))
   4876 					r.Sym = p.To.Sym
   4877 					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
   4878 					// it can point to a trampoline instead of the destination itself.
   4879 					r.Type = objabi.R_CALL
   4880 					r.Siz = 4
   4881 					ab.PutInt32(0)
   4882 					break
   4883 				}
   4884 
   4885 				// Assumes q is in this function.
   4886 				// TODO: Check in input, preserve in brchain.
   4887 
   4888 				// Fill in backward jump now.
   4889 				q = p.To.Target()
   4890 
   4891 				if q == nil {
   4892 					ctxt.Diag("jmp/branch/loop without target")
   4893 					ctxt.DiagFlush()
   4894 					log.Fatalf("bad code")
   4895 				}
   4896 
   4897 				if p.Back&branchBackwards != 0 {
   4898 					v = q.Pc - (p.Pc + 2)
   4899 					if v >= -128 && p.As != AXBEGIN {
   4900 						if p.As == AJCXZL {
   4901 							ab.Put1(0x67)
   4902 						}
   4903 						ab.Put2(byte(op), byte(v))
   4904 					} else if yt.zcase == Zloop {
   4905 						ctxt.Diag("loop too far: %v", p)
   4906 					} else {
   4907 						v -= 5 - 2
   4908 						if p.As == AXBEGIN {
   4909 							v--
   4910 						}
   4911 						if yt.zcase == Zbr {
   4912 							ab.Put1(0x0f)
   4913 							v--
   4914 						}
   4915 
   4916 						ab.Put1(o.op[z+1])
   4917 						ab.PutInt32(int32(v))
   4918 					}
   4919 
   4920 					break
   4921 				}
   4922 
   4923 				// Annotate target; will fill in later.
   4924 				p.Forwd = q.Rel
   4925 
   4926 				q.Rel = p
   4927 				if p.Back&branchShort != 0 && p.As != AXBEGIN {
   4928 					if p.As == AJCXZL {
   4929 						ab.Put1(0x67)
   4930 					}
   4931 					ab.Put2(byte(op), 0)
   4932 				} else if yt.zcase == Zloop {
   4933 					ctxt.Diag("loop too far: %v", p)
   4934 				} else {
   4935 					if yt.zcase == Zbr {
   4936 						ab.Put1(0x0f)
   4937 					}
   4938 					ab.Put1(o.op[z+1])
   4939 					ab.PutInt32(0)
   4940 				}
   4941 
   4942 			case Zbyte:
   4943 				v = vaddr(ctxt, p, &p.From, &rel)
   4944 				if rel.Siz != 0 {
   4945 					rel.Siz = uint8(op)
   4946 					r = obj.Addrel(cursym)
   4947 					*r = rel
   4948 					r.Off = int32(p.Pc + int64(ab.Len()))
   4949 				}
   4950 
   4951 				ab.Put1(byte(v))
   4952 				if op > 1 {
   4953 					ab.Put1(byte(v >> 8))
   4954 					if op > 2 {
   4955 						ab.PutInt16(int16(v >> 16))
   4956 						if op > 4 {
   4957 							ab.PutInt32(int32(v >> 32))
   4958 						}
   4959 					}
   4960 				}
   4961 			}
   4962 
   4963 			return
   4964 		}
   4965 	}
   4966 	f3t = Ynone * Ymax
   4967 	if p.GetFrom3() != nil {
   4968 		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
   4969 	}
   4970 	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
   4971 		var pp obj.Prog
   4972 		var t []byte
   4973 		if p.As == mo[0].as {
   4974 			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
   4975 				t = mo[0].op[:]
   4976 				switch mo[0].code {
   4977 				default:
   4978 					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
   4979 
   4980 				case movLit:
   4981 					for z = 0; t[z] != 0; z++ {
   4982 						ab.Put1(t[z])
   4983 					}
   4984 
   4985 				case movRegMem:
   4986 					ab.Put1(t[0])
   4987 					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
   4988 
   4989 				case movMemReg:
   4990 					ab.Put1(t[0])
   4991 					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
   4992 
   4993 				case movRegMem2op: // r,m - 2op
   4994 					ab.Put2(t[0], t[1])
   4995 					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
   4996 					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
   4997 
   4998 				case movMemReg2op:
   4999 					ab.Put2(t[0], t[1])
   5000 					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
   5001 					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
   5002 
   5003 				case movFullPtr:
   5004 					if t[0] != 0 {
   5005 						ab.Put1(t[0])
   5006 					}
   5007 					switch p.To.Index {
   5008 					default:
   5009 						goto bad
   5010 
   5011 					case REG_DS:
   5012 						ab.Put1(0xc5)
   5013 
   5014 					case REG_SS:
   5015 						ab.Put2(0x0f, 0xb2)
   5016 
   5017 					case REG_ES:
   5018 						ab.Put1(0xc4)
   5019 
   5020 					case REG_FS:
   5021 						ab.Put2(0x0f, 0xb4)
   5022 
   5023 					case REG_GS:
   5024 						ab.Put2(0x0f, 0xb5)
   5025 					}
   5026 
   5027 					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
   5028 
   5029 				case movDoubleShift:
   5030 					if t[0] == Pw {
   5031 						if ctxt.Arch.Family != sys.AMD64 {
   5032 							ctxt.Diag("asmins: illegal 64: %v", p)
   5033 						}
   5034 						ab.rexflag |= Pw
   5035 						t = t[1:]
   5036 					} else if t[0] == Pe {
   5037 						ab.Put1(Pe)
   5038 						t = t[1:]
   5039 					}
   5040 
   5041 					switch p.From.Type {
   5042 					default:
   5043 						goto bad
   5044 
   5045 					case obj.TYPE_CONST:
   5046 						ab.Put2(0x0f, t[0])
   5047 						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
   5048 						ab.Put1(byte(p.From.Offset))
   5049 
   5050 					case obj.TYPE_REG:
   5051 						switch p.From.Reg {
   5052 						default:
   5053 							goto bad
   5054 
   5055 						case REG_CL, REG_CX:
   5056 							ab.Put2(0x0f, t[1])
   5057 							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
   5058 						}
   5059 					}
   5060 
   5061 				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   5062 				// where you load the TLS base register into a register and then index off that
   5063 				// register to access the actual TLS variables. Systems that allow direct TLS access
   5064 				// are handled in prefixof above and should not be listed here.
   5065 				case movTLSReg:
   5066 					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
   5067 						ctxt.Diag("invalid load of TLS: %v", p)
   5068 					}
   5069 
   5070 					if ctxt.Arch.Family == sys.I386 {
   5071 						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   5072 						// where you load the TLS base register into a register and then index off that
   5073 						// register to access the actual TLS variables. Systems that allow direct TLS access
   5074 						// are handled in prefixof above and should not be listed here.
   5075 						switch ctxt.Headtype {
   5076 						default:
   5077 							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
   5078 
   5079 						case objabi.Hlinux, objabi.Hfreebsd:
   5080 							if ctxt.Flag_shared {
   5081 								// Note that this is not generating the same insns as the other cases.
   5082 								//     MOV TLS, dst
   5083 								// becomes
   5084 								//     call __x86.get_pc_thunk.dst
   5085 								//     movl (gotpc + g@gotntpoff)(dst), dst
   5086 								// which is encoded as
   5087 								//     call __x86.get_pc_thunk.dst
   5088 								//     movq 0(dst), dst
   5089 								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
   5090 								// is g, which we can't check here, but will when we assemble the second
   5091 								// instruction.
   5092 								dst := p.To.Reg
   5093 								ab.Put1(0xe8)
   5094 								r = obj.Addrel(cursym)
   5095 								r.Off = int32(p.Pc + int64(ab.Len()))
   5096 								r.Type = objabi.R_CALL
   5097 								r.Siz = 4
   5098 								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
   5099 								ab.PutInt32(0)
   5100 
   5101 								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
   5102 								r = obj.Addrel(cursym)
   5103 								r.Off = int32(p.Pc + int64(ab.Len()))
   5104 								r.Type = objabi.R_TLS_IE
   5105 								r.Siz = 4
   5106 								r.Add = 2
   5107 								ab.PutInt32(0)
   5108 							} else {
   5109 								// ELF TLS base is 0(GS).
   5110 								pp.From = p.From
   5111 
   5112 								pp.From.Type = obj.TYPE_MEM
   5113 								pp.From.Reg = REG_GS
   5114 								pp.From.Offset = 0
   5115 								pp.From.Index = REG_NONE
   5116 								pp.From.Scale = 0
   5117 								ab.Put2(0x65, // GS
   5118 									0x8B)
   5119 								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
   5120 							}
   5121 						case objabi.Hplan9:
   5122 							pp.From = obj.Addr{}
   5123 							pp.From.Type = obj.TYPE_MEM
   5124 							pp.From.Name = obj.NAME_EXTERN
   5125 							pp.From.Sym = plan9privates
   5126 							pp.From.Offset = 0
   5127 							pp.From.Index = REG_NONE
   5128 							ab.Put1(0x8B)
   5129 							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
   5130 
   5131 						case objabi.Hwindows:
   5132 							// Windows TLS base is always 0x14(FS).
   5133 							pp.From = p.From
   5134 
   5135 							pp.From.Type = obj.TYPE_MEM
   5136 							pp.From.Reg = REG_FS
   5137 							pp.From.Offset = 0x14
   5138 							pp.From.Index = REG_NONE
   5139 							pp.From.Scale = 0
   5140 							ab.Put2(0x64, // FS
   5141 								0x8B)
   5142 							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
   5143 						}
   5144 						break
   5145 					}
   5146 
   5147 					switch ctxt.Headtype {
   5148 					default:
   5149 						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
   5150 
   5151 					case objabi.Hlinux, objabi.Hfreebsd:
   5152 						if !ctxt.Flag_shared {
   5153 							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
   5154 						}
   5155 						// Note that this is not generating the same insn as the other cases.
   5156 						//     MOV TLS, R_to
   5157 						// becomes
   5158 						//     movq g@gottpoff(%rip), R_to
   5159 						// which is encoded as
   5160 						//     movq 0(%rip), R_to
   5161 						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
   5162 						// is g, which we can't check here, but will when we assemble the second
   5163 						// instruction.
   5164 						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
   5165 
   5166 						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
   5167 						r = obj.Addrel(cursym)
   5168 						r.Off = int32(p.Pc + int64(ab.Len()))
   5169 						r.Type = objabi.R_TLS_IE
   5170 						r.Siz = 4
   5171 						r.Add = -4
   5172 						ab.PutInt32(0)
   5173 
   5174 					case objabi.Hplan9:
   5175 						pp.From = obj.Addr{}
   5176 						pp.From.Type = obj.TYPE_MEM
   5177 						pp.From.Name = obj.NAME_EXTERN
   5178 						pp.From.Sym = plan9privates
   5179 						pp.From.Offset = 0
   5180 						pp.From.Index = REG_NONE
   5181 						ab.rexflag |= Pw
   5182 						ab.Put1(0x8B)
   5183 						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
   5184 
   5185 					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
   5186 						// TLS base is 0(FS).
   5187 						pp.From = p.From
   5188 
   5189 						pp.From.Type = obj.TYPE_MEM
   5190 						pp.From.Name = obj.NAME_NONE
   5191 						pp.From.Reg = REG_NONE
   5192 						pp.From.Offset = 0
   5193 						pp.From.Index = REG_NONE
   5194 						pp.From.Scale = 0
   5195 						ab.rexflag |= Pw
   5196 						ab.Put2(0x64, // FS
   5197 							0x8B)
   5198 						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
   5199 
   5200 					case objabi.Hwindows:
   5201 						// Windows TLS base is always 0x28(GS).
   5202 						pp.From = p.From
   5203 
   5204 						pp.From.Type = obj.TYPE_MEM
   5205 						pp.From.Name = obj.NAME_NONE
   5206 						pp.From.Reg = REG_GS
   5207 						pp.From.Offset = 0x28
   5208 						pp.From.Index = REG_NONE
   5209 						pp.From.Scale = 0
   5210 						ab.rexflag |= Pw
   5211 						ab.Put2(0x65, // GS
   5212 							0x8B)
   5213 						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
   5214 					}
   5215 				}
   5216 				return
   5217 			}
   5218 		}
   5219 	}
   5220 	goto bad
   5221 
   5222 bad:
   5223 	if ctxt.Arch.Family != sys.AMD64 {
   5224 		// here, the assembly has failed.
   5225 		// if it's a byte instruction that has
   5226 		// unaddressable registers, try to
   5227 		// exchange registers and reissue the
   5228 		// instruction with the operands renamed.
   5229 		pp := *p
   5230 
   5231 		unbytereg(&pp.From, &pp.Ft)
   5232 		unbytereg(&pp.To, &pp.Tt)
   5233 
   5234 		z := int(p.From.Reg)
   5235 		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   5236 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   5237 			// For now, different to keep bit-for-bit compatibility.
   5238 			if ctxt.Arch.Family == sys.I386 {
   5239 				breg := byteswapreg(ctxt, &p.To)
   5240 				if breg != REG_AX {
   5241 					ab.Put1(0x87) // xchg lhs,bx
   5242 					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
   5243 					subreg(&pp, z, breg)
   5244 					ab.doasm(ctxt, cursym, &pp)
   5245 					ab.Put1(0x87) // xchg lhs,bx
   5246 					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
   5247 				} else {
   5248 					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   5249 					subreg(&pp, z, REG_AX)
   5250 					ab.doasm(ctxt, cursym, &pp)
   5251 					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   5252 				}
   5253 				return
   5254 			}
   5255 
   5256 			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
   5257 				// We certainly don't want to exchange
   5258 				// with AX if the op is MUL or DIV.
   5259 				ab.Put1(0x87) // xchg lhs,bx
   5260 				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
   5261 				subreg(&pp, z, REG_BX)
   5262 				ab.doasm(ctxt, cursym, &pp)
   5263 				ab.Put1(0x87) // xchg lhs,bx
   5264 				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
   5265 			} else {
   5266 				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   5267 				subreg(&pp, z, REG_AX)
   5268 				ab.doasm(ctxt, cursym, &pp)
   5269 				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   5270 			}
   5271 			return
   5272 		}
   5273 
   5274 		z = int(p.To.Reg)
   5275 		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   5276 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   5277 			// For now, different to keep bit-for-bit compatibility.
   5278 			if ctxt.Arch.Family == sys.I386 {
   5279 				breg := byteswapreg(ctxt, &p.From)
   5280 				if breg != REG_AX {
   5281 					ab.Put1(0x87) //xchg rhs,bx
   5282 					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
   5283 					subreg(&pp, z, breg)
   5284 					ab.doasm(ctxt, cursym, &pp)
   5285 					ab.Put1(0x87) // xchg rhs,bx
   5286 					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
   5287 				} else {
   5288 					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   5289 					subreg(&pp, z, REG_AX)
   5290 					ab.doasm(ctxt, cursym, &pp)
   5291 					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   5292 				}
   5293 				return
   5294 			}
   5295 
   5296 			if isax(&p.From) {
   5297 				ab.Put1(0x87) // xchg rhs,bx
   5298 				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
   5299 				subreg(&pp, z, REG_BX)
   5300 				ab.doasm(ctxt, cursym, &pp)
   5301 				ab.Put1(0x87) // xchg rhs,bx
   5302 				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
   5303 			} else {
   5304 				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   5305 				subreg(&pp, z, REG_AX)
   5306 				ab.doasm(ctxt, cursym, &pp)
   5307 				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   5308 			}
   5309 			return
   5310 		}
   5311 	}
   5312 
   5313 	ctxt.Diag("invalid instruction: %v", p)
   5314 }
   5315 
   5316 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
   5317 // which is not referenced in a.
   5318 // If a is empty, it returns BX to account for MULB-like instructions
   5319 // that might use DX and AX.
   5320 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
   5321 	cana, canb, canc, cand := true, true, true, true
   5322 	if a.Type == obj.TYPE_NONE {
   5323 		cana, cand = false, false
   5324 	}
   5325 
   5326 	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
   5327 		switch a.Reg {
   5328 		case REG_NONE:
   5329 			cana, cand = false, false
   5330 		case REG_AX, REG_AL, REG_AH:
   5331 			cana = false
   5332 		case REG_BX, REG_BL, REG_BH:
   5333 			canb = false
   5334 		case REG_CX, REG_CL, REG_CH:
   5335 			canc = false
   5336 		case REG_DX, REG_DL, REG_DH:
   5337 			cand = false
   5338 		}
   5339 	}
   5340 
   5341 	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
   5342 		switch a.Index {
   5343 		case REG_AX:
   5344 			cana = false
   5345 		case REG_BX:
   5346 			canb = false
   5347 		case REG_CX:
   5348 			canc = false
   5349 		case REG_DX:
   5350 			cand = false
   5351 		}
   5352 	}
   5353 
   5354 	switch {
   5355 	case cana:
   5356 		return REG_AX
   5357 	case canb:
   5358 		return REG_BX
   5359 	case canc:
   5360 		return REG_CX
   5361 	case cand:
   5362 		return REG_DX
   5363 	default:
   5364 		ctxt.Diag("impossible byte register")
   5365 		ctxt.DiagFlush()
   5366 		log.Fatalf("bad code")
   5367 		return 0
   5368 	}
   5369 }
   5370 
   5371 func isbadbyte(a *obj.Addr) bool {
   5372 	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
   5373 }
   5374 
   5375 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
   5376 	ab.Reset()
   5377 
   5378 	ab.rexflag = 0
   5379 	ab.vexflag = false
   5380 	ab.evexflag = false
   5381 	mark := ab.Len()
   5382 	ab.doasm(ctxt, cursym, p)
   5383 	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
   5384 		// as befits the whole approach of the architecture,
   5385 		// the rex prefix must appear before the first opcode byte
   5386 		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
   5387 		// before the 0f opcode escape!), or it might be ignored.
   5388 		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
   5389 		if ctxt.Arch.Family != sys.AMD64 {
   5390 			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
   5391 		}
   5392 		n := ab.Len()
   5393 		var np int
   5394 		for np = mark; np < n; np++ {
   5395 			c := ab.At(np)
   5396 			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
   5397 				break
   5398 			}
   5399 		}
   5400 		ab.Insert(np, byte(0x40|ab.rexflag))
   5401 	}
   5402 
   5403 	n := ab.Len()
   5404 	for i := len(cursym.R) - 1; i >= 0; i-- {
   5405 		r := &cursym.R[i]
   5406 		if int64(r.Off) < p.Pc {
   5407 			break
   5408 		}
   5409 		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
   5410 			r.Off++
   5411 		}
   5412 		if r.Type == objabi.R_PCREL {
   5413 			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
   5414 				// PC-relative addressing is relative to the end of the instruction,
   5415 				// but the relocations applied by the linker are relative to the end
   5416 				// of the relocation. Because immediate instruction
   5417 				// arguments can follow the PC-relative memory reference in the
   5418 				// instruction encoding, the two may not coincide. In this case,
   5419 				// adjust addend so that linker can keep relocating relative to the
   5420 				// end of the relocation.
   5421 				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
   5422 			} else if ctxt.Arch.Family == sys.I386 {
   5423 				// On 386 PC-relative addressing (for non-call/jmp instructions)
   5424 				// assumes that the previous instruction loaded the PC of the end
   5425 				// of that instruction into CX, so the adjustment is relative to
   5426 				// that.
   5427 				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
   5428 			}
   5429 		}
   5430 		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
   5431 			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
   5432 			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
   5433 		}
   5434 
   5435 	}
   5436 }
   5437 
   5438 // unpackOps4 extracts 4 operands from p.
   5439 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
   5440 	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
   5441 }
   5442 
   5443 // unpackOps5 extracts 5 operands from p.
   5444 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
   5445 	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
   5446 }