gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

encodeblock_amd64.s (512643B)


      1 // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
      2 
      3 //go:build !appengine && !noasm && gc && !noasm
      4 
      5 #include "textflag.h"
      6 
      7 // func _dummy_()
      8 TEXT ·_dummy_(SB), $0
      9 #ifdef GOAMD64_v4
     10 #ifndef GOAMD64_v3
     11 #define GOAMD64_v3
     12 #endif
     13 #endif
     14 	RET
     15 
     16 // func encodeBlockAsm(dst []byte, src []byte) int
     17 // Requires: BMI, SSE2
     18 TEXT ·encodeBlockAsm(SB), $65560-56
     19 	MOVQ dst_base+0(FP), AX
     20 	MOVQ $0x00000200, CX
     21 	LEAQ 24(SP), DX
     22 	PXOR X0, X0
     23 
     24 zero_loop_encodeBlockAsm:
     25 	MOVOU X0, (DX)
     26 	MOVOU X0, 16(DX)
     27 	MOVOU X0, 32(DX)
     28 	MOVOU X0, 48(DX)
     29 	MOVOU X0, 64(DX)
     30 	MOVOU X0, 80(DX)
     31 	MOVOU X0, 96(DX)
     32 	MOVOU X0, 112(DX)
     33 	ADDQ  $0x80, DX
     34 	DECQ  CX
     35 	JNZ   zero_loop_encodeBlockAsm
     36 	MOVL  $0x00000000, 12(SP)
     37 	MOVQ  src_len+32(FP), CX
     38 	LEAQ  -9(CX), DX
     39 	LEAQ  -8(CX), BX
     40 	MOVL  BX, 8(SP)
     41 	SHRQ  $0x05, CX
     42 	SUBL  CX, DX
     43 	LEAQ  (AX)(DX*1), DX
     44 	MOVQ  DX, (SP)
     45 	MOVL  $0x00000001, CX
     46 	MOVL  CX, 16(SP)
     47 	MOVQ  src_base+24(FP), DX
     48 
     49 search_loop_encodeBlockAsm:
     50 	MOVL  CX, BX
     51 	SUBL  12(SP), BX
     52 	SHRL  $0x06, BX
     53 	LEAL  4(CX)(BX*1), BX
     54 	CMPL  BX, 8(SP)
     55 	JAE   emit_remainder_encodeBlockAsm
     56 	MOVQ  (DX)(CX*1), SI
     57 	MOVL  BX, 20(SP)
     58 	MOVQ  $0x0000cf1bbcdcbf9b, R8
     59 	MOVQ  SI, R9
     60 	MOVQ  SI, R10
     61 	SHRQ  $0x08, R10
     62 	SHLQ  $0x10, R9
     63 	IMULQ R8, R9
     64 	SHRQ  $0x32, R9
     65 	SHLQ  $0x10, R10
     66 	IMULQ R8, R10
     67 	SHRQ  $0x32, R10
     68 	MOVL  24(SP)(R9*4), BX
     69 	MOVL  24(SP)(R10*4), DI
     70 	MOVL  CX, 24(SP)(R9*4)
     71 	LEAL  1(CX), R9
     72 	MOVL  R9, 24(SP)(R10*4)
     73 	MOVQ  SI, R9
     74 	SHRQ  $0x10, R9
     75 	SHLQ  $0x10, R9
     76 	IMULQ R8, R9
     77 	SHRQ  $0x32, R9
     78 	MOVL  CX, R8
     79 	SUBL  16(SP), R8
     80 	MOVL  1(DX)(R8*1), R10
     81 	MOVQ  SI, R8
     82 	SHRQ  $0x08, R8
     83 	CMPL  R8, R10
     84 	JNE   no_repeat_found_encodeBlockAsm
     85 	LEAL  1(CX), SI
     86 	MOVL  12(SP), DI
     87 	MOVL  SI, BX
     88 	SUBL  16(SP), BX
     89 	JZ    repeat_extend_back_end_encodeBlockAsm
     90 
     91 repeat_extend_back_loop_encodeBlockAsm:
     92 	CMPL SI, DI
     93 	JBE  repeat_extend_back_end_encodeBlockAsm
     94 	MOVB -1(DX)(BX*1), R8
     95 	MOVB -1(DX)(SI*1), R9
     96 	CMPB R8, R9
     97 	JNE  repeat_extend_back_end_encodeBlockAsm
     98 	LEAL -1(SI), SI
     99 	DECL BX
    100 	JNZ  repeat_extend_back_loop_encodeBlockAsm
    101 
    102 repeat_extend_back_end_encodeBlockAsm:
    103 	MOVL 12(SP), BX
    104 	CMPL BX, SI
    105 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
    106 	MOVL SI, R8
    107 	MOVL SI, 12(SP)
    108 	LEAQ (DX)(BX*1), R9
    109 	SUBL BX, R8
    110 	LEAL -1(R8), BX
    111 	CMPL BX, $0x3c
    112 	JB   one_byte_repeat_emit_encodeBlockAsm
    113 	CMPL BX, $0x00000100
    114 	JB   two_bytes_repeat_emit_encodeBlockAsm
    115 	CMPL BX, $0x00010000
    116 	JB   three_bytes_repeat_emit_encodeBlockAsm
    117 	CMPL BX, $0x01000000
    118 	JB   four_bytes_repeat_emit_encodeBlockAsm
    119 	MOVB $0xfc, (AX)
    120 	MOVL BX, 1(AX)
    121 	ADDQ $0x05, AX
    122 	JMP  memmove_long_repeat_emit_encodeBlockAsm
    123 
    124 four_bytes_repeat_emit_encodeBlockAsm:
    125 	MOVL BX, R10
    126 	SHRL $0x10, R10
    127 	MOVB $0xf8, (AX)
    128 	MOVW BX, 1(AX)
    129 	MOVB R10, 3(AX)
    130 	ADDQ $0x04, AX
    131 	JMP  memmove_long_repeat_emit_encodeBlockAsm
    132 
    133 three_bytes_repeat_emit_encodeBlockAsm:
    134 	MOVB $0xf4, (AX)
    135 	MOVW BX, 1(AX)
    136 	ADDQ $0x03, AX
    137 	JMP  memmove_long_repeat_emit_encodeBlockAsm
    138 
    139 two_bytes_repeat_emit_encodeBlockAsm:
    140 	MOVB $0xf0, (AX)
    141 	MOVB BL, 1(AX)
    142 	ADDQ $0x02, AX
    143 	CMPL BX, $0x40
    144 	JB   memmove_repeat_emit_encodeBlockAsm
    145 	JMP  memmove_long_repeat_emit_encodeBlockAsm
    146 
    147 one_byte_repeat_emit_encodeBlockAsm:
    148 	SHLB $0x02, BL
    149 	MOVB BL, (AX)
    150 	ADDQ $0x01, AX
    151 
    152 memmove_repeat_emit_encodeBlockAsm:
    153 	LEAQ (AX)(R8*1), BX
    154 
    155 	// genMemMoveShort
    156 	CMPQ R8, $0x08
    157 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
    158 	CMPQ R8, $0x10
    159 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
    160 	CMPQ R8, $0x20
    161 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
    162 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
    163 
    164 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
    165 	MOVQ (R9), R10
    166 	MOVQ R10, (AX)
    167 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
    168 
    169 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
    170 	MOVQ (R9), R10
    171 	MOVQ -8(R9)(R8*1), R9
    172 	MOVQ R10, (AX)
    173 	MOVQ R9, -8(AX)(R8*1)
    174 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
    175 
    176 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
    177 	MOVOU (R9), X0
    178 	MOVOU -16(R9)(R8*1), X1
    179 	MOVOU X0, (AX)
    180 	MOVOU X1, -16(AX)(R8*1)
    181 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm
    182 
    183 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
    184 	MOVOU (R9), X0
    185 	MOVOU 16(R9), X1
    186 	MOVOU -32(R9)(R8*1), X2
    187 	MOVOU -16(R9)(R8*1), X3
    188 	MOVOU X0, (AX)
    189 	MOVOU X1, 16(AX)
    190 	MOVOU X2, -32(AX)(R8*1)
    191 	MOVOU X3, -16(AX)(R8*1)
    192 
    193 memmove_end_copy_repeat_emit_encodeBlockAsm:
    194 	MOVQ BX, AX
    195 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm
    196 
    197 memmove_long_repeat_emit_encodeBlockAsm:
    198 	LEAQ (AX)(R8*1), BX
    199 
    200 	// genMemMoveLong
    201 	MOVOU (R9), X0
    202 	MOVOU 16(R9), X1
    203 	MOVOU -32(R9)(R8*1), X2
    204 	MOVOU -16(R9)(R8*1), X3
    205 	MOVQ  R8, R11
    206 	SHRQ  $0x05, R11
    207 	MOVQ  AX, R10
    208 	ANDL  $0x0000001f, R10
    209 	MOVQ  $0x00000040, R12
    210 	SUBQ  R10, R12
    211 	DECQ  R11
    212 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
    213 	LEAQ  -32(R9)(R12*1), R10
    214 	LEAQ  -32(AX)(R12*1), R13
    215 
    216 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
    217 	MOVOU (R10), X4
    218 	MOVOU 16(R10), X5
    219 	MOVOA X4, (R13)
    220 	MOVOA X5, 16(R13)
    221 	ADDQ  $0x20, R13
    222 	ADDQ  $0x20, R10
    223 	ADDQ  $0x20, R12
    224 	DECQ  R11
    225 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
    226 
    227 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
    228 	MOVOU -32(R9)(R12*1), X4
    229 	MOVOU -16(R9)(R12*1), X5
    230 	MOVOA X4, -32(AX)(R12*1)
    231 	MOVOA X5, -16(AX)(R12*1)
    232 	ADDQ  $0x20, R12
    233 	CMPQ  R8, R12
    234 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
    235 	MOVOU X0, (AX)
    236 	MOVOU X1, 16(AX)
    237 	MOVOU X2, -32(AX)(R8*1)
    238 	MOVOU X3, -16(AX)(R8*1)
    239 	MOVQ  BX, AX
    240 
    241 emit_literal_done_repeat_emit_encodeBlockAsm:
    242 	ADDL $0x05, CX
    243 	MOVL CX, BX
    244 	SUBL 16(SP), BX
    245 	MOVQ src_len+32(FP), R8
    246 	SUBL CX, R8
    247 	LEAQ (DX)(CX*1), R9
    248 	LEAQ (DX)(BX*1), BX
    249 
    250 	// matchLen
    251 	XORL R11, R11
    252 	CMPL R8, $0x08
    253 	JB   matchlen_match4_repeat_extend_encodeBlockAsm
    254 
    255 matchlen_loopback_repeat_extend_encodeBlockAsm:
    256 	MOVQ  (R9)(R11*1), R10
    257 	XORQ  (BX)(R11*1), R10
    258 	TESTQ R10, R10
    259 	JZ    matchlen_loop_repeat_extend_encodeBlockAsm
    260 
    261 #ifdef GOAMD64_v3
    262 	TZCNTQ R10, R10
    263 
    264 #else
    265 	BSFQ R10, R10
    266 
    267 #endif
    268 	SARQ $0x03, R10
    269 	LEAL (R11)(R10*1), R11
    270 	JMP  repeat_extend_forward_end_encodeBlockAsm
    271 
    272 matchlen_loop_repeat_extend_encodeBlockAsm:
    273 	LEAL -8(R8), R8
    274 	LEAL 8(R11), R11
    275 	CMPL R8, $0x08
    276 	JAE  matchlen_loopback_repeat_extend_encodeBlockAsm
    277 	JZ   repeat_extend_forward_end_encodeBlockAsm
    278 
    279 matchlen_match4_repeat_extend_encodeBlockAsm:
    280 	CMPL R8, $0x04
    281 	JB   matchlen_match2_repeat_extend_encodeBlockAsm
    282 	MOVL (R9)(R11*1), R10
    283 	CMPL (BX)(R11*1), R10
    284 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm
    285 	SUBL $0x04, R8
    286 	LEAL 4(R11), R11
    287 
    288 matchlen_match2_repeat_extend_encodeBlockAsm:
    289 	CMPL R8, $0x02
    290 	JB   matchlen_match1_repeat_extend_encodeBlockAsm
    291 	MOVW (R9)(R11*1), R10
    292 	CMPW (BX)(R11*1), R10
    293 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm
    294 	SUBL $0x02, R8
    295 	LEAL 2(R11), R11
    296 
    297 matchlen_match1_repeat_extend_encodeBlockAsm:
    298 	CMPL R8, $0x01
    299 	JB   repeat_extend_forward_end_encodeBlockAsm
    300 	MOVB (R9)(R11*1), R10
    301 	CMPB (BX)(R11*1), R10
    302 	JNE  repeat_extend_forward_end_encodeBlockAsm
    303 	LEAL 1(R11), R11
    304 
    305 repeat_extend_forward_end_encodeBlockAsm:
    306 	ADDL  R11, CX
    307 	MOVL  CX, BX
    308 	SUBL  SI, BX
    309 	MOVL  16(SP), SI
    310 	TESTL DI, DI
    311 	JZ    repeat_as_copy_encodeBlockAsm
    312 
    313 	// emitRepeat
    314 emit_repeat_again_match_repeat_encodeBlockAsm:
    315 	MOVL BX, DI
    316 	LEAL -4(BX), BX
    317 	CMPL DI, $0x08
    318 	JBE  repeat_two_match_repeat_encodeBlockAsm
    319 	CMPL DI, $0x0c
    320 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
    321 	CMPL SI, $0x00000800
    322 	JB   repeat_two_offset_match_repeat_encodeBlockAsm
    323 
    324 cant_repeat_two_offset_match_repeat_encodeBlockAsm:
    325 	CMPL BX, $0x00000104
    326 	JB   repeat_three_match_repeat_encodeBlockAsm
    327 	CMPL BX, $0x00010100
    328 	JB   repeat_four_match_repeat_encodeBlockAsm
    329 	CMPL BX, $0x0100ffff
    330 	JB   repeat_five_match_repeat_encodeBlockAsm
    331 	LEAL -16842747(BX), BX
    332 	MOVL $0xfffb001d, (AX)
    333 	MOVB $0xff, 4(AX)
    334 	ADDQ $0x05, AX
    335 	JMP  emit_repeat_again_match_repeat_encodeBlockAsm
    336 
    337 repeat_five_match_repeat_encodeBlockAsm:
    338 	LEAL -65536(BX), BX
    339 	MOVL BX, SI
    340 	MOVW $0x001d, (AX)
    341 	MOVW BX, 2(AX)
    342 	SARL $0x10, SI
    343 	MOVB SI, 4(AX)
    344 	ADDQ $0x05, AX
    345 	JMP  repeat_end_emit_encodeBlockAsm
    346 
    347 repeat_four_match_repeat_encodeBlockAsm:
    348 	LEAL -256(BX), BX
    349 	MOVW $0x0019, (AX)
    350 	MOVW BX, 2(AX)
    351 	ADDQ $0x04, AX
    352 	JMP  repeat_end_emit_encodeBlockAsm
    353 
    354 repeat_three_match_repeat_encodeBlockAsm:
    355 	LEAL -4(BX), BX
    356 	MOVW $0x0015, (AX)
    357 	MOVB BL, 2(AX)
    358 	ADDQ $0x03, AX
    359 	JMP  repeat_end_emit_encodeBlockAsm
    360 
    361 repeat_two_match_repeat_encodeBlockAsm:
    362 	SHLL $0x02, BX
    363 	ORL  $0x01, BX
    364 	MOVW BX, (AX)
    365 	ADDQ $0x02, AX
    366 	JMP  repeat_end_emit_encodeBlockAsm
    367 
    368 repeat_two_offset_match_repeat_encodeBlockAsm:
    369 	XORQ DI, DI
    370 	LEAL 1(DI)(BX*4), BX
    371 	MOVB SI, 1(AX)
    372 	SARL $0x08, SI
    373 	SHLL $0x05, SI
    374 	ORL  SI, BX
    375 	MOVB BL, (AX)
    376 	ADDQ $0x02, AX
    377 	JMP  repeat_end_emit_encodeBlockAsm
    378 
    379 repeat_as_copy_encodeBlockAsm:
    380 	// emitCopy
    381 	CMPL SI, $0x00010000
    382 	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm
    383 	CMPL BX, $0x40
    384 	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
    385 	MOVB $0xff, (AX)
    386 	MOVL SI, 1(AX)
    387 	LEAL -64(BX), BX
    388 	ADDQ $0x05, AX
    389 	CMPL BX, $0x04
    390 	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm
    391 
    392 	// emitRepeat
    393 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
    394 	MOVL BX, DI
    395 	LEAL -4(BX), BX
    396 	CMPL DI, $0x08
    397 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
    398 	CMPL DI, $0x0c
    399 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
    400 	CMPL SI, $0x00000800
    401 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
    402 
    403 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
    404 	CMPL BX, $0x00000104
    405 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
    406 	CMPL BX, $0x00010100
    407 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
    408 	CMPL BX, $0x0100ffff
    409 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
    410 	LEAL -16842747(BX), BX
    411 	MOVL $0xfffb001d, (AX)
    412 	MOVB $0xff, 4(AX)
    413 	ADDQ $0x05, AX
    414 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
    415 
    416 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
    417 	LEAL -65536(BX), BX
    418 	MOVL BX, SI
    419 	MOVW $0x001d, (AX)
    420 	MOVW BX, 2(AX)
    421 	SARL $0x10, SI
    422 	MOVB SI, 4(AX)
    423 	ADDQ $0x05, AX
    424 	JMP  repeat_end_emit_encodeBlockAsm
    425 
    426 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
    427 	LEAL -256(BX), BX
    428 	MOVW $0x0019, (AX)
    429 	MOVW BX, 2(AX)
    430 	ADDQ $0x04, AX
    431 	JMP  repeat_end_emit_encodeBlockAsm
    432 
    433 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
    434 	LEAL -4(BX), BX
    435 	MOVW $0x0015, (AX)
    436 	MOVB BL, 2(AX)
    437 	ADDQ $0x03, AX
    438 	JMP  repeat_end_emit_encodeBlockAsm
    439 
    440 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
    441 	SHLL $0x02, BX
    442 	ORL  $0x01, BX
    443 	MOVW BX, (AX)
    444 	ADDQ $0x02, AX
    445 	JMP  repeat_end_emit_encodeBlockAsm
    446 
    447 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
    448 	XORQ DI, DI
    449 	LEAL 1(DI)(BX*4), BX
    450 	MOVB SI, 1(AX)
    451 	SARL $0x08, SI
    452 	SHLL $0x05, SI
    453 	ORL  SI, BX
    454 	MOVB BL, (AX)
    455 	ADDQ $0x02, AX
    456 	JMP  repeat_end_emit_encodeBlockAsm
    457 
    458 four_bytes_remain_repeat_as_copy_encodeBlockAsm:
    459 	TESTL BX, BX
    460 	JZ    repeat_end_emit_encodeBlockAsm
    461 	XORL  DI, DI
    462 	LEAL  -1(DI)(BX*4), BX
    463 	MOVB  BL, (AX)
    464 	MOVL  SI, 1(AX)
    465 	ADDQ  $0x05, AX
    466 	JMP   repeat_end_emit_encodeBlockAsm
    467 
    468 two_byte_offset_repeat_as_copy_encodeBlockAsm:
    469 	CMPL BX, $0x40
    470 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
    471 	CMPL SI, $0x00000800
    472 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm
    473 	MOVL $0x00000001, DI
    474 	LEAL 16(DI), DI
    475 	MOVB SI, 1(AX)
    476 	MOVL SI, R8
    477 	SHRL $0x08, R8
    478 	SHLL $0x05, R8
    479 	ORL  R8, DI
    480 	MOVB DI, (AX)
    481 	ADDQ $0x02, AX
    482 	SUBL $0x08, BX
    483 
    484 	// emitRepeat
    485 	LEAL -4(BX), BX
    486 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    487 
    488 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    489 	MOVL BX, DI
    490 	LEAL -4(BX), BX
    491 	CMPL DI, $0x08
    492 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    493 	CMPL DI, $0x0c
    494 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    495 	CMPL SI, $0x00000800
    496 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    497 
    498 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    499 	CMPL BX, $0x00000104
    500 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    501 	CMPL BX, $0x00010100
    502 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    503 	CMPL BX, $0x0100ffff
    504 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    505 	LEAL -16842747(BX), BX
    506 	MOVL $0xfffb001d, (AX)
    507 	MOVB $0xff, 4(AX)
    508 	ADDQ $0x05, AX
    509 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
    510 
    511 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    512 	LEAL -65536(BX), BX
    513 	MOVL BX, SI
    514 	MOVW $0x001d, (AX)
    515 	MOVW BX, 2(AX)
    516 	SARL $0x10, SI
    517 	MOVB SI, 4(AX)
    518 	ADDQ $0x05, AX
    519 	JMP  repeat_end_emit_encodeBlockAsm
    520 
    521 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    522 	LEAL -256(BX), BX
    523 	MOVW $0x0019, (AX)
    524 	MOVW BX, 2(AX)
    525 	ADDQ $0x04, AX
    526 	JMP  repeat_end_emit_encodeBlockAsm
    527 
    528 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    529 	LEAL -4(BX), BX
    530 	MOVW $0x0015, (AX)
    531 	MOVB BL, 2(AX)
    532 	ADDQ $0x03, AX
    533 	JMP  repeat_end_emit_encodeBlockAsm
    534 
    535 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    536 	SHLL $0x02, BX
    537 	ORL  $0x01, BX
    538 	MOVW BX, (AX)
    539 	ADDQ $0x02, AX
    540 	JMP  repeat_end_emit_encodeBlockAsm
    541 
    542 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
    543 	XORQ DI, DI
    544 	LEAL 1(DI)(BX*4), BX
    545 	MOVB SI, 1(AX)
    546 	SARL $0x08, SI
    547 	SHLL $0x05, SI
    548 	ORL  SI, BX
    549 	MOVB BL, (AX)
    550 	ADDQ $0x02, AX
    551 	JMP  repeat_end_emit_encodeBlockAsm
    552 
    553 long_offset_short_repeat_as_copy_encodeBlockAsm:
    554 	MOVB $0xee, (AX)
    555 	MOVW SI, 1(AX)
    556 	LEAL -60(BX), BX
    557 	ADDQ $0x03, AX
    558 
    559 	// emitRepeat
    560 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    561 	MOVL BX, DI
    562 	LEAL -4(BX), BX
    563 	CMPL DI, $0x08
    564 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
    565 	CMPL DI, $0x0c
    566 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
    567 	CMPL SI, $0x00000800
    568 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
    569 
    570 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    571 	CMPL BX, $0x00000104
    572 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
    573 	CMPL BX, $0x00010100
    574 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
    575 	CMPL BX, $0x0100ffff
    576 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
    577 	LEAL -16842747(BX), BX
    578 	MOVL $0xfffb001d, (AX)
    579 	MOVB $0xff, 4(AX)
    580 	ADDQ $0x05, AX
    581 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
    582 
    583 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    584 	LEAL -65536(BX), BX
    585 	MOVL BX, SI
    586 	MOVW $0x001d, (AX)
    587 	MOVW BX, 2(AX)
    588 	SARL $0x10, SI
    589 	MOVB SI, 4(AX)
    590 	ADDQ $0x05, AX
    591 	JMP  repeat_end_emit_encodeBlockAsm
    592 
    593 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    594 	LEAL -256(BX), BX
    595 	MOVW $0x0019, (AX)
    596 	MOVW BX, 2(AX)
    597 	ADDQ $0x04, AX
    598 	JMP  repeat_end_emit_encodeBlockAsm
    599 
    600 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    601 	LEAL -4(BX), BX
    602 	MOVW $0x0015, (AX)
    603 	MOVB BL, 2(AX)
    604 	ADDQ $0x03, AX
    605 	JMP  repeat_end_emit_encodeBlockAsm
    606 
    607 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    608 	SHLL $0x02, BX
    609 	ORL  $0x01, BX
    610 	MOVW BX, (AX)
    611 	ADDQ $0x02, AX
    612 	JMP  repeat_end_emit_encodeBlockAsm
    613 
    614 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
    615 	XORQ DI, DI
    616 	LEAL 1(DI)(BX*4), BX
    617 	MOVB SI, 1(AX)
    618 	SARL $0x08, SI
    619 	SHLL $0x05, SI
    620 	ORL  SI, BX
    621 	MOVB BL, (AX)
    622 	ADDQ $0x02, AX
    623 	JMP  repeat_end_emit_encodeBlockAsm
    624 
    625 two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
    626 	MOVL BX, DI
    627 	SHLL $0x02, DI
    628 	CMPL BX, $0x0c
    629 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
    630 	CMPL SI, $0x00000800
    631 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
    632 	LEAL -15(DI), DI
    633 	MOVB SI, 1(AX)
    634 	SHRL $0x08, SI
    635 	SHLL $0x05, SI
    636 	ORL  SI, DI
    637 	MOVB DI, (AX)
    638 	ADDQ $0x02, AX
    639 	JMP  repeat_end_emit_encodeBlockAsm
    640 
    641 emit_copy_three_repeat_as_copy_encodeBlockAsm:
    642 	LEAL -2(DI), DI
    643 	MOVB DI, (AX)
    644 	MOVW SI, 1(AX)
    645 	ADDQ $0x03, AX
    646 
    647 repeat_end_emit_encodeBlockAsm:
    648 	MOVL CX, 12(SP)
    649 	JMP  search_loop_encodeBlockAsm
    650 
    651 no_repeat_found_encodeBlockAsm:
    652 	CMPL (DX)(BX*1), SI
    653 	JEQ  candidate_match_encodeBlockAsm
    654 	SHRQ $0x08, SI
    655 	MOVL 24(SP)(R9*4), BX
    656 	LEAL 2(CX), R8
    657 	CMPL (DX)(DI*1), SI
    658 	JEQ  candidate2_match_encodeBlockAsm
    659 	MOVL R8, 24(SP)(R9*4)
    660 	SHRQ $0x08, SI
    661 	CMPL (DX)(BX*1), SI
    662 	JEQ  candidate3_match_encodeBlockAsm
    663 	MOVL 20(SP), CX
    664 	JMP  search_loop_encodeBlockAsm
    665 
    666 candidate3_match_encodeBlockAsm:
    667 	ADDL $0x02, CX
    668 	JMP  candidate_match_encodeBlockAsm
    669 
    670 candidate2_match_encodeBlockAsm:
    671 	MOVL R8, 24(SP)(R9*4)
    672 	INCL CX
    673 	MOVL DI, BX
    674 
    675 candidate_match_encodeBlockAsm:
    676 	MOVL  12(SP), SI
    677 	TESTL BX, BX
    678 	JZ    match_extend_back_end_encodeBlockAsm
    679 
    680 match_extend_back_loop_encodeBlockAsm:
    681 	CMPL CX, SI
    682 	JBE  match_extend_back_end_encodeBlockAsm
    683 	MOVB -1(DX)(BX*1), DI
    684 	MOVB -1(DX)(CX*1), R8
    685 	CMPB DI, R8
    686 	JNE  match_extend_back_end_encodeBlockAsm
    687 	LEAL -1(CX), CX
    688 	DECL BX
    689 	JZ   match_extend_back_end_encodeBlockAsm
    690 	JMP  match_extend_back_loop_encodeBlockAsm
    691 
    692 match_extend_back_end_encodeBlockAsm:
    693 	MOVL CX, SI
    694 	SUBL 12(SP), SI
    695 	LEAQ 5(AX)(SI*1), SI
    696 	CMPQ SI, (SP)
    697 	JB   match_dst_size_check_encodeBlockAsm
    698 	MOVQ $0x00000000, ret+48(FP)
    699 	RET
    700 
    701 match_dst_size_check_encodeBlockAsm:
    702 	MOVL CX, SI
    703 	MOVL 12(SP), DI
    704 	CMPL DI, SI
    705 	JEQ  emit_literal_done_match_emit_encodeBlockAsm
    706 	MOVL SI, R8
    707 	MOVL SI, 12(SP)
    708 	LEAQ (DX)(DI*1), SI
    709 	SUBL DI, R8
    710 	LEAL -1(R8), DI
    711 	CMPL DI, $0x3c
    712 	JB   one_byte_match_emit_encodeBlockAsm
    713 	CMPL DI, $0x00000100
    714 	JB   two_bytes_match_emit_encodeBlockAsm
    715 	CMPL DI, $0x00010000
    716 	JB   three_bytes_match_emit_encodeBlockAsm
    717 	CMPL DI, $0x01000000
    718 	JB   four_bytes_match_emit_encodeBlockAsm
    719 	MOVB $0xfc, (AX)
    720 	MOVL DI, 1(AX)
    721 	ADDQ $0x05, AX
    722 	JMP  memmove_long_match_emit_encodeBlockAsm
    723 
    724 four_bytes_match_emit_encodeBlockAsm:
    725 	MOVL DI, R9
    726 	SHRL $0x10, R9
    727 	MOVB $0xf8, (AX)
    728 	MOVW DI, 1(AX)
    729 	MOVB R9, 3(AX)
    730 	ADDQ $0x04, AX
    731 	JMP  memmove_long_match_emit_encodeBlockAsm
    732 
    733 three_bytes_match_emit_encodeBlockAsm:
    734 	MOVB $0xf4, (AX)
    735 	MOVW DI, 1(AX)
    736 	ADDQ $0x03, AX
    737 	JMP  memmove_long_match_emit_encodeBlockAsm
    738 
    739 two_bytes_match_emit_encodeBlockAsm:
    740 	MOVB $0xf0, (AX)
    741 	MOVB DI, 1(AX)
    742 	ADDQ $0x02, AX
    743 	CMPL DI, $0x40
    744 	JB   memmove_match_emit_encodeBlockAsm
    745 	JMP  memmove_long_match_emit_encodeBlockAsm
    746 
    747 one_byte_match_emit_encodeBlockAsm:
    748 	SHLB $0x02, DI
    749 	MOVB DI, (AX)
    750 	ADDQ $0x01, AX
    751 
    752 memmove_match_emit_encodeBlockAsm:
    753 	LEAQ (AX)(R8*1), DI
    754 
    755 	// genMemMoveShort
    756 	CMPQ R8, $0x08
    757 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
    758 	CMPQ R8, $0x10
    759 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
    760 	CMPQ R8, $0x20
    761 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
    762 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
    763 
    764 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
    765 	MOVQ (SI), R9
    766 	MOVQ R9, (AX)
    767 	JMP  memmove_end_copy_match_emit_encodeBlockAsm
    768 
    769 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
    770 	MOVQ (SI), R9
    771 	MOVQ -8(SI)(R8*1), SI
    772 	MOVQ R9, (AX)
    773 	MOVQ SI, -8(AX)(R8*1)
    774 	JMP  memmove_end_copy_match_emit_encodeBlockAsm
    775 
    776 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
    777 	MOVOU (SI), X0
    778 	MOVOU -16(SI)(R8*1), X1
    779 	MOVOU X0, (AX)
    780 	MOVOU X1, -16(AX)(R8*1)
    781 	JMP   memmove_end_copy_match_emit_encodeBlockAsm
    782 
    783 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
    784 	MOVOU (SI), X0
    785 	MOVOU 16(SI), X1
    786 	MOVOU -32(SI)(R8*1), X2
    787 	MOVOU -16(SI)(R8*1), X3
    788 	MOVOU X0, (AX)
    789 	MOVOU X1, 16(AX)
    790 	MOVOU X2, -32(AX)(R8*1)
    791 	MOVOU X3, -16(AX)(R8*1)
    792 
    793 memmove_end_copy_match_emit_encodeBlockAsm:
    794 	MOVQ DI, AX
    795 	JMP  emit_literal_done_match_emit_encodeBlockAsm
    796 
    797 memmove_long_match_emit_encodeBlockAsm:
    798 	LEAQ (AX)(R8*1), DI
    799 
    800 	// genMemMoveLong
    801 	MOVOU (SI), X0
    802 	MOVOU 16(SI), X1
    803 	MOVOU -32(SI)(R8*1), X2
    804 	MOVOU -16(SI)(R8*1), X3
    805 	MOVQ  R8, R10
    806 	SHRQ  $0x05, R10
    807 	MOVQ  AX, R9
    808 	ANDL  $0x0000001f, R9
    809 	MOVQ  $0x00000040, R11
    810 	SUBQ  R9, R11
    811 	DECQ  R10
    812 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
    813 	LEAQ  -32(SI)(R11*1), R9
    814 	LEAQ  -32(AX)(R11*1), R12
    815 
    816 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
    817 	MOVOU (R9), X4
    818 	MOVOU 16(R9), X5
    819 	MOVOA X4, (R12)
    820 	MOVOA X5, 16(R12)
    821 	ADDQ  $0x20, R12
    822 	ADDQ  $0x20, R9
    823 	ADDQ  $0x20, R11
    824 	DECQ  R10
    825 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
    826 
    827 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
    828 	MOVOU -32(SI)(R11*1), X4
    829 	MOVOU -16(SI)(R11*1), X5
    830 	MOVOA X4, -32(AX)(R11*1)
    831 	MOVOA X5, -16(AX)(R11*1)
    832 	ADDQ  $0x20, R11
    833 	CMPQ  R8, R11
    834 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
    835 	MOVOU X0, (AX)
    836 	MOVOU X1, 16(AX)
    837 	MOVOU X2, -32(AX)(R8*1)
    838 	MOVOU X3, -16(AX)(R8*1)
    839 	MOVQ  DI, AX
    840 
    841 emit_literal_done_match_emit_encodeBlockAsm:
    842 match_nolit_loop_encodeBlockAsm:
    843 	MOVL CX, SI
    844 	SUBL BX, SI
    845 	MOVL SI, 16(SP)
    846 	ADDL $0x04, CX
    847 	ADDL $0x04, BX
    848 	MOVQ src_len+32(FP), SI
    849 	SUBL CX, SI
    850 	LEAQ (DX)(CX*1), DI
    851 	LEAQ (DX)(BX*1), BX
    852 
    853 	// matchLen
    854 	XORL R9, R9
    855 	CMPL SI, $0x08
    856 	JB   matchlen_match4_match_nolit_encodeBlockAsm
    857 
    858 matchlen_loopback_match_nolit_encodeBlockAsm:
    859 	MOVQ  (DI)(R9*1), R8
    860 	XORQ  (BX)(R9*1), R8
    861 	TESTQ R8, R8
    862 	JZ    matchlen_loop_match_nolit_encodeBlockAsm
    863 
    864 #ifdef GOAMD64_v3
    865 	TZCNTQ R8, R8
    866 
    867 #else
    868 	BSFQ R8, R8
    869 
    870 #endif
    871 	SARQ $0x03, R8
    872 	LEAL (R9)(R8*1), R9
    873 	JMP  match_nolit_end_encodeBlockAsm
    874 
    875 matchlen_loop_match_nolit_encodeBlockAsm:
    876 	LEAL -8(SI), SI
    877 	LEAL 8(R9), R9
    878 	CMPL SI, $0x08
    879 	JAE  matchlen_loopback_match_nolit_encodeBlockAsm
    880 	JZ   match_nolit_end_encodeBlockAsm
    881 
    882 matchlen_match4_match_nolit_encodeBlockAsm:
    883 	CMPL SI, $0x04
    884 	JB   matchlen_match2_match_nolit_encodeBlockAsm
    885 	MOVL (DI)(R9*1), R8
    886 	CMPL (BX)(R9*1), R8
    887 	JNE  matchlen_match2_match_nolit_encodeBlockAsm
    888 	SUBL $0x04, SI
    889 	LEAL 4(R9), R9
    890 
    891 matchlen_match2_match_nolit_encodeBlockAsm:
    892 	CMPL SI, $0x02
    893 	JB   matchlen_match1_match_nolit_encodeBlockAsm
    894 	MOVW (DI)(R9*1), R8
    895 	CMPW (BX)(R9*1), R8
    896 	JNE  matchlen_match1_match_nolit_encodeBlockAsm
    897 	SUBL $0x02, SI
    898 	LEAL 2(R9), R9
    899 
    900 matchlen_match1_match_nolit_encodeBlockAsm:
    901 	CMPL SI, $0x01
    902 	JB   match_nolit_end_encodeBlockAsm
    903 	MOVB (DI)(R9*1), R8
    904 	CMPB (BX)(R9*1), R8
    905 	JNE  match_nolit_end_encodeBlockAsm
    906 	LEAL 1(R9), R9
    907 
    908 match_nolit_end_encodeBlockAsm:
    909 	ADDL R9, CX
    910 	MOVL 16(SP), BX
    911 	ADDL $0x04, R9
    912 	MOVL CX, 12(SP)
    913 
    914 	// emitCopy
    915 	CMPL BX, $0x00010000
    916 	JB   two_byte_offset_match_nolit_encodeBlockAsm
    917 	CMPL R9, $0x40
    918 	JBE  four_bytes_remain_match_nolit_encodeBlockAsm
    919 	MOVB $0xff, (AX)
    920 	MOVL BX, 1(AX)
    921 	LEAL -64(R9), R9
    922 	ADDQ $0x05, AX
    923 	CMPL R9, $0x04
    924 	JB   four_bytes_remain_match_nolit_encodeBlockAsm
    925 
    926 	// emitRepeat
    927 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
    928 	MOVL R9, SI
    929 	LEAL -4(R9), R9
    930 	CMPL SI, $0x08
    931 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
    932 	CMPL SI, $0x0c
    933 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
    934 	CMPL BX, $0x00000800
    935 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
    936 
    937 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
    938 	CMPL R9, $0x00000104
    939 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy
    940 	CMPL R9, $0x00010100
    941 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy
    942 	CMPL R9, $0x0100ffff
    943 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy
    944 	LEAL -16842747(R9), R9
    945 	MOVL $0xfffb001d, (AX)
    946 	MOVB $0xff, 4(AX)
    947 	ADDQ $0x05, AX
    948 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
    949 
    950 repeat_five_match_nolit_encodeBlockAsm_emit_copy:
    951 	LEAL -65536(R9), R9
    952 	MOVL R9, BX
    953 	MOVW $0x001d, (AX)
    954 	MOVW R9, 2(AX)
    955 	SARL $0x10, BX
    956 	MOVB BL, 4(AX)
    957 	ADDQ $0x05, AX
    958 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
    959 
    960 repeat_four_match_nolit_encodeBlockAsm_emit_copy:
    961 	LEAL -256(R9), R9
    962 	MOVW $0x0019, (AX)
    963 	MOVW R9, 2(AX)
    964 	ADDQ $0x04, AX
    965 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
    966 
    967 repeat_three_match_nolit_encodeBlockAsm_emit_copy:
    968 	LEAL -4(R9), R9
    969 	MOVW $0x0015, (AX)
    970 	MOVB R9, 2(AX)
    971 	ADDQ $0x03, AX
    972 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
    973 
    974 repeat_two_match_nolit_encodeBlockAsm_emit_copy:
    975 	SHLL $0x02, R9
    976 	ORL  $0x01, R9
    977 	MOVW R9, (AX)
    978 	ADDQ $0x02, AX
    979 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
    980 
    981 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
    982 	XORQ SI, SI
    983 	LEAL 1(SI)(R9*4), R9
    984 	MOVB BL, 1(AX)
    985 	SARL $0x08, BX
    986 	SHLL $0x05, BX
    987 	ORL  BX, R9
    988 	MOVB R9, (AX)
    989 	ADDQ $0x02, AX
    990 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
    991 
    992 four_bytes_remain_match_nolit_encodeBlockAsm:
    993 	TESTL R9, R9
    994 	JZ    match_nolit_emitcopy_end_encodeBlockAsm
    995 	XORL  SI, SI
    996 	LEAL  -1(SI)(R9*4), R9
    997 	MOVB  R9, (AX)
    998 	MOVL  BX, 1(AX)
    999 	ADDQ  $0x05, AX
   1000 	JMP   match_nolit_emitcopy_end_encodeBlockAsm
   1001 
   1002 two_byte_offset_match_nolit_encodeBlockAsm:
   1003 	CMPL R9, $0x40
   1004 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm
   1005 	CMPL BX, $0x00000800
   1006 	JAE  long_offset_short_match_nolit_encodeBlockAsm
   1007 	MOVL $0x00000001, SI
   1008 	LEAL 16(SI), SI
   1009 	MOVB BL, 1(AX)
   1010 	MOVL BX, DI
   1011 	SHRL $0x08, DI
   1012 	SHLL $0x05, DI
   1013 	ORL  DI, SI
   1014 	MOVB SI, (AX)
   1015 	ADDQ $0x02, AX
   1016 	SUBL $0x08, R9
   1017 
   1018 	// emitRepeat
   1019 	LEAL -4(R9), R9
   1020 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1021 
   1022 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1023 	MOVL R9, SI
   1024 	LEAL -4(R9), R9
   1025 	CMPL SI, $0x08
   1026 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1027 	CMPL SI, $0x0c
   1028 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1029 	CMPL BX, $0x00000800
   1030 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1031 
   1032 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1033 	CMPL R9, $0x00000104
   1034 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1035 	CMPL R9, $0x00010100
   1036 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1037 	CMPL R9, $0x0100ffff
   1038 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1039 	LEAL -16842747(R9), R9
   1040 	MOVL $0xfffb001d, (AX)
   1041 	MOVB $0xff, 4(AX)
   1042 	ADDQ $0x05, AX
   1043 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
   1044 
   1045 repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1046 	LEAL -65536(R9), R9
   1047 	MOVL R9, BX
   1048 	MOVW $0x001d, (AX)
   1049 	MOVW R9, 2(AX)
   1050 	SARL $0x10, BX
   1051 	MOVB BL, 4(AX)
   1052 	ADDQ $0x05, AX
   1053 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1054 
   1055 repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1056 	LEAL -256(R9), R9
   1057 	MOVW $0x0019, (AX)
   1058 	MOVW R9, 2(AX)
   1059 	ADDQ $0x04, AX
   1060 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1061 
   1062 repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1063 	LEAL -4(R9), R9
   1064 	MOVW $0x0015, (AX)
   1065 	MOVB R9, 2(AX)
   1066 	ADDQ $0x03, AX
   1067 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1068 
   1069 repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1070 	SHLL $0x02, R9
   1071 	ORL  $0x01, R9
   1072 	MOVW R9, (AX)
   1073 	ADDQ $0x02, AX
   1074 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1075 
   1076 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
   1077 	XORQ SI, SI
   1078 	LEAL 1(SI)(R9*4), R9
   1079 	MOVB BL, 1(AX)
   1080 	SARL $0x08, BX
   1081 	SHLL $0x05, BX
   1082 	ORL  BX, R9
   1083 	MOVB R9, (AX)
   1084 	ADDQ $0x02, AX
   1085 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1086 
   1087 long_offset_short_match_nolit_encodeBlockAsm:
   1088 	MOVB $0xee, (AX)
   1089 	MOVW BX, 1(AX)
   1090 	LEAL -60(R9), R9
   1091 	ADDQ $0x03, AX
   1092 
   1093 	// emitRepeat
   1094 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
   1095 	MOVL R9, SI
   1096 	LEAL -4(R9), R9
   1097 	CMPL SI, $0x08
   1098 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
   1099 	CMPL SI, $0x0c
   1100 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
   1101 	CMPL BX, $0x00000800
   1102 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
   1103 
   1104 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
   1105 	CMPL R9, $0x00000104
   1106 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
   1107 	CMPL R9, $0x00010100
   1108 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
   1109 	CMPL R9, $0x0100ffff
   1110 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
   1111 	LEAL -16842747(R9), R9
   1112 	MOVL $0xfffb001d, (AX)
   1113 	MOVB $0xff, 4(AX)
   1114 	ADDQ $0x05, AX
   1115 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
   1116 
   1117 repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
   1118 	LEAL -65536(R9), R9
   1119 	MOVL R9, BX
   1120 	MOVW $0x001d, (AX)
   1121 	MOVW R9, 2(AX)
   1122 	SARL $0x10, BX
   1123 	MOVB BL, 4(AX)
   1124 	ADDQ $0x05, AX
   1125 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1126 
   1127 repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
   1128 	LEAL -256(R9), R9
   1129 	MOVW $0x0019, (AX)
   1130 	MOVW R9, 2(AX)
   1131 	ADDQ $0x04, AX
   1132 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1133 
   1134 repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
   1135 	LEAL -4(R9), R9
   1136 	MOVW $0x0015, (AX)
   1137 	MOVB R9, 2(AX)
   1138 	ADDQ $0x03, AX
   1139 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1140 
   1141 repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
   1142 	SHLL $0x02, R9
   1143 	ORL  $0x01, R9
   1144 	MOVW R9, (AX)
   1145 	ADDQ $0x02, AX
   1146 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1147 
   1148 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
   1149 	XORQ SI, SI
   1150 	LEAL 1(SI)(R9*4), R9
   1151 	MOVB BL, 1(AX)
   1152 	SARL $0x08, BX
   1153 	SHLL $0x05, BX
   1154 	ORL  BX, R9
   1155 	MOVB R9, (AX)
   1156 	ADDQ $0x02, AX
   1157 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1158 
   1159 two_byte_offset_short_match_nolit_encodeBlockAsm:
   1160 	MOVL R9, SI
   1161 	SHLL $0x02, SI
   1162 	CMPL R9, $0x0c
   1163 	JAE  emit_copy_three_match_nolit_encodeBlockAsm
   1164 	CMPL BX, $0x00000800
   1165 	JAE  emit_copy_three_match_nolit_encodeBlockAsm
   1166 	LEAL -15(SI), SI
   1167 	MOVB BL, 1(AX)
   1168 	SHRL $0x08, BX
   1169 	SHLL $0x05, BX
   1170 	ORL  BX, SI
   1171 	MOVB SI, (AX)
   1172 	ADDQ $0x02, AX
   1173 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
   1174 
   1175 emit_copy_three_match_nolit_encodeBlockAsm:
   1176 	LEAL -2(SI), SI
   1177 	MOVB SI, (AX)
   1178 	MOVW BX, 1(AX)
   1179 	ADDQ $0x03, AX
   1180 
   1181 match_nolit_emitcopy_end_encodeBlockAsm:
   1182 	CMPL CX, 8(SP)
   1183 	JAE  emit_remainder_encodeBlockAsm
   1184 	MOVQ -2(DX)(CX*1), SI
   1185 	CMPQ AX, (SP)
   1186 	JB   match_nolit_dst_ok_encodeBlockAsm
   1187 	MOVQ $0x00000000, ret+48(FP)
   1188 	RET
   1189 
   1190 match_nolit_dst_ok_encodeBlockAsm:
   1191 	MOVQ  $0x0000cf1bbcdcbf9b, R8
   1192 	MOVQ  SI, DI
   1193 	SHRQ  $0x10, SI
   1194 	MOVQ  SI, BX
   1195 	SHLQ  $0x10, DI
   1196 	IMULQ R8, DI
   1197 	SHRQ  $0x32, DI
   1198 	SHLQ  $0x10, BX
   1199 	IMULQ R8, BX
   1200 	SHRQ  $0x32, BX
   1201 	LEAL  -2(CX), R8
   1202 	LEAQ  24(SP)(BX*4), R9
   1203 	MOVL  (R9), BX
   1204 	MOVL  R8, 24(SP)(DI*4)
   1205 	MOVL  CX, (R9)
   1206 	CMPL  (DX)(BX*1), SI
   1207 	JEQ   match_nolit_loop_encodeBlockAsm
   1208 	INCL  CX
   1209 	JMP   search_loop_encodeBlockAsm
   1210 
   1211 emit_remainder_encodeBlockAsm:
   1212 	MOVQ src_len+32(FP), CX
   1213 	SUBL 12(SP), CX
   1214 	LEAQ 5(AX)(CX*1), CX
   1215 	CMPQ CX, (SP)
   1216 	JB   emit_remainder_ok_encodeBlockAsm
   1217 	MOVQ $0x00000000, ret+48(FP)
   1218 	RET
   1219 
   1220 emit_remainder_ok_encodeBlockAsm:
   1221 	MOVQ src_len+32(FP), CX
   1222 	MOVL 12(SP), BX
   1223 	CMPL BX, CX
   1224 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
   1225 	MOVL CX, SI
   1226 	MOVL CX, 12(SP)
   1227 	LEAQ (DX)(BX*1), CX
   1228 	SUBL BX, SI
   1229 	LEAL -1(SI), DX
   1230 	CMPL DX, $0x3c
   1231 	JB   one_byte_emit_remainder_encodeBlockAsm
   1232 	CMPL DX, $0x00000100
   1233 	JB   two_bytes_emit_remainder_encodeBlockAsm
   1234 	CMPL DX, $0x00010000
   1235 	JB   three_bytes_emit_remainder_encodeBlockAsm
   1236 	CMPL DX, $0x01000000
   1237 	JB   four_bytes_emit_remainder_encodeBlockAsm
   1238 	MOVB $0xfc, (AX)
   1239 	MOVL DX, 1(AX)
   1240 	ADDQ $0x05, AX
   1241 	JMP  memmove_long_emit_remainder_encodeBlockAsm
   1242 
   1243 four_bytes_emit_remainder_encodeBlockAsm:
   1244 	MOVL DX, BX
   1245 	SHRL $0x10, BX
   1246 	MOVB $0xf8, (AX)
   1247 	MOVW DX, 1(AX)
   1248 	MOVB BL, 3(AX)
   1249 	ADDQ $0x04, AX
   1250 	JMP  memmove_long_emit_remainder_encodeBlockAsm
   1251 
   1252 three_bytes_emit_remainder_encodeBlockAsm:
   1253 	MOVB $0xf4, (AX)
   1254 	MOVW DX, 1(AX)
   1255 	ADDQ $0x03, AX
   1256 	JMP  memmove_long_emit_remainder_encodeBlockAsm
   1257 
   1258 two_bytes_emit_remainder_encodeBlockAsm:
   1259 	MOVB $0xf0, (AX)
   1260 	MOVB DL, 1(AX)
   1261 	ADDQ $0x02, AX
   1262 	CMPL DX, $0x40
   1263 	JB   memmove_emit_remainder_encodeBlockAsm
   1264 	JMP  memmove_long_emit_remainder_encodeBlockAsm
   1265 
   1266 one_byte_emit_remainder_encodeBlockAsm:
   1267 	SHLB $0x02, DL
   1268 	MOVB DL, (AX)
   1269 	ADDQ $0x01, AX
   1270 
   1271 memmove_emit_remainder_encodeBlockAsm:
   1272 	LEAQ (AX)(SI*1), DX
   1273 	MOVL SI, BX
   1274 
   1275 	// genMemMoveShort
   1276 	CMPQ BX, $0x03
   1277 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
   1278 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
   1279 	CMPQ BX, $0x08
   1280 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
   1281 	CMPQ BX, $0x10
   1282 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
   1283 	CMPQ BX, $0x20
   1284 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
   1285 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
   1286 
   1287 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
   1288 	MOVB (CX), SI
   1289 	MOVB -1(CX)(BX*1), CL
   1290 	MOVB SI, (AX)
   1291 	MOVB CL, -1(AX)(BX*1)
   1292 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
   1293 
   1294 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
   1295 	MOVW (CX), SI
   1296 	MOVB 2(CX), CL
   1297 	MOVW SI, (AX)
   1298 	MOVB CL, 2(AX)
   1299 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
   1300 
   1301 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
   1302 	MOVL (CX), SI
   1303 	MOVL -4(CX)(BX*1), CX
   1304 	MOVL SI, (AX)
   1305 	MOVL CX, -4(AX)(BX*1)
   1306 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
   1307 
   1308 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
   1309 	MOVQ (CX), SI
   1310 	MOVQ -8(CX)(BX*1), CX
   1311 	MOVQ SI, (AX)
   1312 	MOVQ CX, -8(AX)(BX*1)
   1313 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
   1314 
   1315 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
   1316 	MOVOU (CX), X0
   1317 	MOVOU -16(CX)(BX*1), X1
   1318 	MOVOU X0, (AX)
   1319 	MOVOU X1, -16(AX)(BX*1)
   1320 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm
   1321 
   1322 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
   1323 	MOVOU (CX), X0
   1324 	MOVOU 16(CX), X1
   1325 	MOVOU -32(CX)(BX*1), X2
   1326 	MOVOU -16(CX)(BX*1), X3
   1327 	MOVOU X0, (AX)
   1328 	MOVOU X1, 16(AX)
   1329 	MOVOU X2, -32(AX)(BX*1)
   1330 	MOVOU X3, -16(AX)(BX*1)
   1331 
   1332 memmove_end_copy_emit_remainder_encodeBlockAsm:
   1333 	MOVQ DX, AX
   1334 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm
   1335 
   1336 memmove_long_emit_remainder_encodeBlockAsm:
   1337 	LEAQ (AX)(SI*1), DX
   1338 	MOVL SI, BX
   1339 
   1340 	// genMemMoveLong
   1341 	MOVOU (CX), X0
   1342 	MOVOU 16(CX), X1
   1343 	MOVOU -32(CX)(BX*1), X2
   1344 	MOVOU -16(CX)(BX*1), X3
   1345 	MOVQ  BX, DI
   1346 	SHRQ  $0x05, DI
   1347 	MOVQ  AX, SI
   1348 	ANDL  $0x0000001f, SI
   1349 	MOVQ  $0x00000040, R8
   1350 	SUBQ  SI, R8
   1351 	DECQ  DI
   1352 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
   1353 	LEAQ  -32(CX)(R8*1), SI
   1354 	LEAQ  -32(AX)(R8*1), R9
   1355 
   1356 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
   1357 	MOVOU (SI), X4
   1358 	MOVOU 16(SI), X5
   1359 	MOVOA X4, (R9)
   1360 	MOVOA X5, 16(R9)
   1361 	ADDQ  $0x20, R9
   1362 	ADDQ  $0x20, SI
   1363 	ADDQ  $0x20, R8
   1364 	DECQ  DI
   1365 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
   1366 
   1367 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
   1368 	MOVOU -32(CX)(R8*1), X4
   1369 	MOVOU -16(CX)(R8*1), X5
   1370 	MOVOA X4, -32(AX)(R8*1)
   1371 	MOVOA X5, -16(AX)(R8*1)
   1372 	ADDQ  $0x20, R8
   1373 	CMPQ  BX, R8
   1374 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
   1375 	MOVOU X0, (AX)
   1376 	MOVOU X1, 16(AX)
   1377 	MOVOU X2, -32(AX)(BX*1)
   1378 	MOVOU X3, -16(AX)(BX*1)
   1379 	MOVQ  DX, AX
   1380 
   1381 emit_literal_done_emit_remainder_encodeBlockAsm:
   1382 	MOVQ dst_base+0(FP), CX
   1383 	SUBQ CX, AX
   1384 	MOVQ AX, ret+48(FP)
   1385 	RET
   1386 
   1387 // func encodeBlockAsm4MB(dst []byte, src []byte) int
   1388 // Requires: BMI, SSE2
   1389 TEXT ·encodeBlockAsm4MB(SB), $65560-56
   1390 	MOVQ dst_base+0(FP), AX
   1391 	MOVQ $0x00000200, CX
   1392 	LEAQ 24(SP), DX
   1393 	PXOR X0, X0
   1394 
   1395 zero_loop_encodeBlockAsm4MB:
   1396 	MOVOU X0, (DX)
   1397 	MOVOU X0, 16(DX)
   1398 	MOVOU X0, 32(DX)
   1399 	MOVOU X0, 48(DX)
   1400 	MOVOU X0, 64(DX)
   1401 	MOVOU X0, 80(DX)
   1402 	MOVOU X0, 96(DX)
   1403 	MOVOU X0, 112(DX)
   1404 	ADDQ  $0x80, DX
   1405 	DECQ  CX
   1406 	JNZ   zero_loop_encodeBlockAsm4MB
   1407 	MOVL  $0x00000000, 12(SP)
   1408 	MOVQ  src_len+32(FP), CX
   1409 	LEAQ  -9(CX), DX
   1410 	LEAQ  -8(CX), BX
   1411 	MOVL  BX, 8(SP)
   1412 	SHRQ  $0x05, CX
   1413 	SUBL  CX, DX
   1414 	LEAQ  (AX)(DX*1), DX
   1415 	MOVQ  DX, (SP)
   1416 	MOVL  $0x00000001, CX
   1417 	MOVL  CX, 16(SP)
   1418 	MOVQ  src_base+24(FP), DX
   1419 
   1420 search_loop_encodeBlockAsm4MB:
   1421 	MOVL  CX, BX
   1422 	SUBL  12(SP), BX
   1423 	SHRL  $0x06, BX
   1424 	LEAL  4(CX)(BX*1), BX
   1425 	CMPL  BX, 8(SP)
   1426 	JAE   emit_remainder_encodeBlockAsm4MB
   1427 	MOVQ  (DX)(CX*1), SI
   1428 	MOVL  BX, 20(SP)
   1429 	MOVQ  $0x0000cf1bbcdcbf9b, R8
   1430 	MOVQ  SI, R9
   1431 	MOVQ  SI, R10
   1432 	SHRQ  $0x08, R10
   1433 	SHLQ  $0x10, R9
   1434 	IMULQ R8, R9
   1435 	SHRQ  $0x32, R9
   1436 	SHLQ  $0x10, R10
   1437 	IMULQ R8, R10
   1438 	SHRQ  $0x32, R10
   1439 	MOVL  24(SP)(R9*4), BX
   1440 	MOVL  24(SP)(R10*4), DI
   1441 	MOVL  CX, 24(SP)(R9*4)
   1442 	LEAL  1(CX), R9
   1443 	MOVL  R9, 24(SP)(R10*4)
   1444 	MOVQ  SI, R9
   1445 	SHRQ  $0x10, R9
   1446 	SHLQ  $0x10, R9
   1447 	IMULQ R8, R9
   1448 	SHRQ  $0x32, R9
   1449 	MOVL  CX, R8
   1450 	SUBL  16(SP), R8
   1451 	MOVL  1(DX)(R8*1), R10
   1452 	MOVQ  SI, R8
   1453 	SHRQ  $0x08, R8
   1454 	CMPL  R8, R10
   1455 	JNE   no_repeat_found_encodeBlockAsm4MB
   1456 	LEAL  1(CX), SI
   1457 	MOVL  12(SP), DI
   1458 	MOVL  SI, BX
   1459 	SUBL  16(SP), BX
   1460 	JZ    repeat_extend_back_end_encodeBlockAsm4MB
   1461 
   1462 repeat_extend_back_loop_encodeBlockAsm4MB:
   1463 	CMPL SI, DI
   1464 	JBE  repeat_extend_back_end_encodeBlockAsm4MB
   1465 	MOVB -1(DX)(BX*1), R8
   1466 	MOVB -1(DX)(SI*1), R9
   1467 	CMPB R8, R9
   1468 	JNE  repeat_extend_back_end_encodeBlockAsm4MB
   1469 	LEAL -1(SI), SI
   1470 	DECL BX
   1471 	JNZ  repeat_extend_back_loop_encodeBlockAsm4MB
   1472 
   1473 repeat_extend_back_end_encodeBlockAsm4MB:
   1474 	MOVL 12(SP), BX
   1475 	CMPL BX, SI
   1476 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm4MB
   1477 	MOVL SI, R8
   1478 	MOVL SI, 12(SP)
   1479 	LEAQ (DX)(BX*1), R9
   1480 	SUBL BX, R8
   1481 	LEAL -1(R8), BX
   1482 	CMPL BX, $0x3c
   1483 	JB   one_byte_repeat_emit_encodeBlockAsm4MB
   1484 	CMPL BX, $0x00000100
   1485 	JB   two_bytes_repeat_emit_encodeBlockAsm4MB
   1486 	CMPL BX, $0x00010000
   1487 	JB   three_bytes_repeat_emit_encodeBlockAsm4MB
   1488 	MOVL BX, R10
   1489 	SHRL $0x10, R10
   1490 	MOVB $0xf8, (AX)
   1491 	MOVW BX, 1(AX)
   1492 	MOVB R10, 3(AX)
   1493 	ADDQ $0x04, AX
   1494 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
   1495 
   1496 three_bytes_repeat_emit_encodeBlockAsm4MB:
   1497 	MOVB $0xf4, (AX)
   1498 	MOVW BX, 1(AX)
   1499 	ADDQ $0x03, AX
   1500 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
   1501 
   1502 two_bytes_repeat_emit_encodeBlockAsm4MB:
   1503 	MOVB $0xf0, (AX)
   1504 	MOVB BL, 1(AX)
   1505 	ADDQ $0x02, AX
   1506 	CMPL BX, $0x40
   1507 	JB   memmove_repeat_emit_encodeBlockAsm4MB
   1508 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
   1509 
   1510 one_byte_repeat_emit_encodeBlockAsm4MB:
   1511 	SHLB $0x02, BL
   1512 	MOVB BL, (AX)
   1513 	ADDQ $0x01, AX
   1514 
   1515 memmove_repeat_emit_encodeBlockAsm4MB:
   1516 	LEAQ (AX)(R8*1), BX
   1517 
   1518 	// genMemMoveShort
   1519 	CMPQ R8, $0x08
   1520 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
   1521 	CMPQ R8, $0x10
   1522 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
   1523 	CMPQ R8, $0x20
   1524 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
   1525 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
   1526 
   1527 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
   1528 	MOVQ (R9), R10
   1529 	MOVQ R10, (AX)
   1530 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
   1531 
   1532 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
   1533 	MOVQ (R9), R10
   1534 	MOVQ -8(R9)(R8*1), R9
   1535 	MOVQ R10, (AX)
   1536 	MOVQ R9, -8(AX)(R8*1)
   1537 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
   1538 
   1539 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
   1540 	MOVOU (R9), X0
   1541 	MOVOU -16(R9)(R8*1), X1
   1542 	MOVOU X0, (AX)
   1543 	MOVOU X1, -16(AX)(R8*1)
   1544 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm4MB
   1545 
   1546 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
   1547 	MOVOU (R9), X0
   1548 	MOVOU 16(R9), X1
   1549 	MOVOU -32(R9)(R8*1), X2
   1550 	MOVOU -16(R9)(R8*1), X3
   1551 	MOVOU X0, (AX)
   1552 	MOVOU X1, 16(AX)
   1553 	MOVOU X2, -32(AX)(R8*1)
   1554 	MOVOU X3, -16(AX)(R8*1)
   1555 
   1556 memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
   1557 	MOVQ BX, AX
   1558 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm4MB
   1559 
   1560 memmove_long_repeat_emit_encodeBlockAsm4MB:
   1561 	LEAQ (AX)(R8*1), BX
   1562 
   1563 	// genMemMoveLong
   1564 	MOVOU (R9), X0
   1565 	MOVOU 16(R9), X1
   1566 	MOVOU -32(R9)(R8*1), X2
   1567 	MOVOU -16(R9)(R8*1), X3
   1568 	MOVQ  R8, R11
   1569 	SHRQ  $0x05, R11
   1570 	MOVQ  AX, R10
   1571 	ANDL  $0x0000001f, R10
   1572 	MOVQ  $0x00000040, R12
   1573 	SUBQ  R10, R12
   1574 	DECQ  R11
   1575 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
   1576 	LEAQ  -32(R9)(R12*1), R10
   1577 	LEAQ  -32(AX)(R12*1), R13
   1578 
   1579 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
   1580 	MOVOU (R10), X4
   1581 	MOVOU 16(R10), X5
   1582 	MOVOA X4, (R13)
   1583 	MOVOA X5, 16(R13)
   1584 	ADDQ  $0x20, R13
   1585 	ADDQ  $0x20, R10
   1586 	ADDQ  $0x20, R12
   1587 	DECQ  R11
   1588 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
   1589 
   1590 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
   1591 	MOVOU -32(R9)(R12*1), X4
   1592 	MOVOU -16(R9)(R12*1), X5
   1593 	MOVOA X4, -32(AX)(R12*1)
   1594 	MOVOA X5, -16(AX)(R12*1)
   1595 	ADDQ  $0x20, R12
   1596 	CMPQ  R8, R12
   1597 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
   1598 	MOVOU X0, (AX)
   1599 	MOVOU X1, 16(AX)
   1600 	MOVOU X2, -32(AX)(R8*1)
   1601 	MOVOU X3, -16(AX)(R8*1)
   1602 	MOVQ  BX, AX
   1603 
   1604 emit_literal_done_repeat_emit_encodeBlockAsm4MB:
   1605 	ADDL $0x05, CX
   1606 	MOVL CX, BX
   1607 	SUBL 16(SP), BX
   1608 	MOVQ src_len+32(FP), R8
   1609 	SUBL CX, R8
   1610 	LEAQ (DX)(CX*1), R9
   1611 	LEAQ (DX)(BX*1), BX
   1612 
   1613 	// matchLen
   1614 	XORL R11, R11
   1615 	CMPL R8, $0x08
   1616 	JB   matchlen_match4_repeat_extend_encodeBlockAsm4MB
   1617 
   1618 matchlen_loopback_repeat_extend_encodeBlockAsm4MB:
   1619 	MOVQ  (R9)(R11*1), R10
   1620 	XORQ  (BX)(R11*1), R10
   1621 	TESTQ R10, R10
   1622 	JZ    matchlen_loop_repeat_extend_encodeBlockAsm4MB
   1623 
   1624 #ifdef GOAMD64_v3
   1625 	TZCNTQ R10, R10
   1626 
   1627 #else
   1628 	BSFQ R10, R10
   1629 
   1630 #endif
   1631 	SARQ $0x03, R10
   1632 	LEAL (R11)(R10*1), R11
   1633 	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
   1634 
   1635 matchlen_loop_repeat_extend_encodeBlockAsm4MB:
   1636 	LEAL -8(R8), R8
   1637 	LEAL 8(R11), R11
   1638 	CMPL R8, $0x08
   1639 	JAE  matchlen_loopback_repeat_extend_encodeBlockAsm4MB
   1640 	JZ   repeat_extend_forward_end_encodeBlockAsm4MB
   1641 
   1642 matchlen_match4_repeat_extend_encodeBlockAsm4MB:
   1643 	CMPL R8, $0x04
   1644 	JB   matchlen_match2_repeat_extend_encodeBlockAsm4MB
   1645 	MOVL (R9)(R11*1), R10
   1646 	CMPL (BX)(R11*1), R10
   1647 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm4MB
   1648 	SUBL $0x04, R8
   1649 	LEAL 4(R11), R11
   1650 
   1651 matchlen_match2_repeat_extend_encodeBlockAsm4MB:
   1652 	CMPL R8, $0x02
   1653 	JB   matchlen_match1_repeat_extend_encodeBlockAsm4MB
   1654 	MOVW (R9)(R11*1), R10
   1655 	CMPW (BX)(R11*1), R10
   1656 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm4MB
   1657 	SUBL $0x02, R8
   1658 	LEAL 2(R11), R11
   1659 
   1660 matchlen_match1_repeat_extend_encodeBlockAsm4MB:
   1661 	CMPL R8, $0x01
   1662 	JB   repeat_extend_forward_end_encodeBlockAsm4MB
   1663 	MOVB (R9)(R11*1), R10
   1664 	CMPB (BX)(R11*1), R10
   1665 	JNE  repeat_extend_forward_end_encodeBlockAsm4MB
   1666 	LEAL 1(R11), R11
   1667 
   1668 repeat_extend_forward_end_encodeBlockAsm4MB:
   1669 	ADDL  R11, CX
   1670 	MOVL  CX, BX
   1671 	SUBL  SI, BX
   1672 	MOVL  16(SP), SI
   1673 	TESTL DI, DI
   1674 	JZ    repeat_as_copy_encodeBlockAsm4MB
   1675 
   1676 	// emitRepeat
   1677 	MOVL BX, DI
   1678 	LEAL -4(BX), BX
   1679 	CMPL DI, $0x08
   1680 	JBE  repeat_two_match_repeat_encodeBlockAsm4MB
   1681 	CMPL DI, $0x0c
   1682 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
   1683 	CMPL SI, $0x00000800
   1684 	JB   repeat_two_offset_match_repeat_encodeBlockAsm4MB
   1685 
   1686 cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
   1687 	CMPL BX, $0x00000104
   1688 	JB   repeat_three_match_repeat_encodeBlockAsm4MB
   1689 	CMPL BX, $0x00010100
   1690 	JB   repeat_four_match_repeat_encodeBlockAsm4MB
   1691 	LEAL -65536(BX), BX
   1692 	MOVL BX, SI
   1693 	MOVW $0x001d, (AX)
   1694 	MOVW BX, 2(AX)
   1695 	SARL $0x10, SI
   1696 	MOVB SI, 4(AX)
   1697 	ADDQ $0x05, AX
   1698 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1699 
   1700 repeat_four_match_repeat_encodeBlockAsm4MB:
   1701 	LEAL -256(BX), BX
   1702 	MOVW $0x0019, (AX)
   1703 	MOVW BX, 2(AX)
   1704 	ADDQ $0x04, AX
   1705 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1706 
   1707 repeat_three_match_repeat_encodeBlockAsm4MB:
   1708 	LEAL -4(BX), BX
   1709 	MOVW $0x0015, (AX)
   1710 	MOVB BL, 2(AX)
   1711 	ADDQ $0x03, AX
   1712 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1713 
   1714 repeat_two_match_repeat_encodeBlockAsm4MB:
   1715 	SHLL $0x02, BX
   1716 	ORL  $0x01, BX
   1717 	MOVW BX, (AX)
   1718 	ADDQ $0x02, AX
   1719 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1720 
   1721 repeat_two_offset_match_repeat_encodeBlockAsm4MB:
   1722 	XORQ DI, DI
   1723 	LEAL 1(DI)(BX*4), BX
   1724 	MOVB SI, 1(AX)
   1725 	SARL $0x08, SI
   1726 	SHLL $0x05, SI
   1727 	ORL  SI, BX
   1728 	MOVB BL, (AX)
   1729 	ADDQ $0x02, AX
   1730 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1731 
   1732 repeat_as_copy_encodeBlockAsm4MB:
   1733 	// emitCopy
   1734 	CMPL SI, $0x00010000
   1735 	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
   1736 	CMPL BX, $0x40
   1737 	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
   1738 	MOVB $0xff, (AX)
   1739 	MOVL SI, 1(AX)
   1740 	LEAL -64(BX), BX
   1741 	ADDQ $0x05, AX
   1742 	CMPL BX, $0x04
   1743 	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
   1744 
   1745 	// emitRepeat
   1746 	MOVL BX, DI
   1747 	LEAL -4(BX), BX
   1748 	CMPL DI, $0x08
   1749 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
   1750 	CMPL DI, $0x0c
   1751 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
   1752 	CMPL SI, $0x00000800
   1753 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
   1754 
   1755 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
   1756 	CMPL BX, $0x00000104
   1757 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
   1758 	CMPL BX, $0x00010100
   1759 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
   1760 	LEAL -65536(BX), BX
   1761 	MOVL BX, SI
   1762 	MOVW $0x001d, (AX)
   1763 	MOVW BX, 2(AX)
   1764 	SARL $0x10, SI
   1765 	MOVB SI, 4(AX)
   1766 	ADDQ $0x05, AX
   1767 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1768 
   1769 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
   1770 	LEAL -256(BX), BX
   1771 	MOVW $0x0019, (AX)
   1772 	MOVW BX, 2(AX)
   1773 	ADDQ $0x04, AX
   1774 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1775 
   1776 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
   1777 	LEAL -4(BX), BX
   1778 	MOVW $0x0015, (AX)
   1779 	MOVB BL, 2(AX)
   1780 	ADDQ $0x03, AX
   1781 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1782 
   1783 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
   1784 	SHLL $0x02, BX
   1785 	ORL  $0x01, BX
   1786 	MOVW BX, (AX)
   1787 	ADDQ $0x02, AX
   1788 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1789 
   1790 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
   1791 	XORQ DI, DI
   1792 	LEAL 1(DI)(BX*4), BX
   1793 	MOVB SI, 1(AX)
   1794 	SARL $0x08, SI
   1795 	SHLL $0x05, SI
   1796 	ORL  SI, BX
   1797 	MOVB BL, (AX)
   1798 	ADDQ $0x02, AX
   1799 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1800 
   1801 four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
   1802 	TESTL BX, BX
   1803 	JZ    repeat_end_emit_encodeBlockAsm4MB
   1804 	XORL  DI, DI
   1805 	LEAL  -1(DI)(BX*4), BX
   1806 	MOVB  BL, (AX)
   1807 	MOVL  SI, 1(AX)
   1808 	ADDQ  $0x05, AX
   1809 	JMP   repeat_end_emit_encodeBlockAsm4MB
   1810 
   1811 two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
   1812 	CMPL BX, $0x40
   1813 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
   1814 	CMPL SI, $0x00000800
   1815 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm4MB
   1816 	MOVL $0x00000001, DI
   1817 	LEAL 16(DI), DI
   1818 	MOVB SI, 1(AX)
   1819 	SHRL $0x08, SI
   1820 	SHLL $0x05, SI
   1821 	ORL  SI, DI
   1822 	MOVB DI, (AX)
   1823 	ADDQ $0x02, AX
   1824 	SUBL $0x08, BX
   1825 
   1826 	// emitRepeat
   1827 	LEAL -4(BX), BX
   1828 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
   1829 	MOVL BX, DI
   1830 	LEAL -4(BX), BX
   1831 	CMPL DI, $0x08
   1832 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
   1833 	CMPL DI, $0x0c
   1834 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
   1835 	CMPL SI, $0x00000800
   1836 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
   1837 
   1838 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
   1839 	CMPL BX, $0x00000104
   1840 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
   1841 	CMPL BX, $0x00010100
   1842 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
   1843 	LEAL -65536(BX), BX
   1844 	MOVL BX, SI
   1845 	MOVW $0x001d, (AX)
   1846 	MOVW BX, 2(AX)
   1847 	SARL $0x10, SI
   1848 	MOVB SI, 4(AX)
   1849 	ADDQ $0x05, AX
   1850 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1851 
   1852 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
   1853 	LEAL -256(BX), BX
   1854 	MOVW $0x0019, (AX)
   1855 	MOVW BX, 2(AX)
   1856 	ADDQ $0x04, AX
   1857 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1858 
   1859 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
   1860 	LEAL -4(BX), BX
   1861 	MOVW $0x0015, (AX)
   1862 	MOVB BL, 2(AX)
   1863 	ADDQ $0x03, AX
   1864 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1865 
   1866 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
   1867 	SHLL $0x02, BX
   1868 	ORL  $0x01, BX
   1869 	MOVW BX, (AX)
   1870 	ADDQ $0x02, AX
   1871 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1872 
   1873 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
   1874 	XORQ DI, DI
   1875 	LEAL 1(DI)(BX*4), BX
   1876 	MOVB SI, 1(AX)
   1877 	SARL $0x08, SI
   1878 	SHLL $0x05, SI
   1879 	ORL  SI, BX
   1880 	MOVB BL, (AX)
   1881 	ADDQ $0x02, AX
   1882 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1883 
   1884 long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
   1885 	MOVB $0xee, (AX)
   1886 	MOVW SI, 1(AX)
   1887 	LEAL -60(BX), BX
   1888 	ADDQ $0x03, AX
   1889 
   1890 	// emitRepeat
   1891 	MOVL BX, DI
   1892 	LEAL -4(BX), BX
   1893 	CMPL DI, $0x08
   1894 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
   1895 	CMPL DI, $0x0c
   1896 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
   1897 	CMPL SI, $0x00000800
   1898 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
   1899 
   1900 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
   1901 	CMPL BX, $0x00000104
   1902 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
   1903 	CMPL BX, $0x00010100
   1904 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
   1905 	LEAL -65536(BX), BX
   1906 	MOVL BX, SI
   1907 	MOVW $0x001d, (AX)
   1908 	MOVW BX, 2(AX)
   1909 	SARL $0x10, SI
   1910 	MOVB SI, 4(AX)
   1911 	ADDQ $0x05, AX
   1912 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1913 
   1914 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
   1915 	LEAL -256(BX), BX
   1916 	MOVW $0x0019, (AX)
   1917 	MOVW BX, 2(AX)
   1918 	ADDQ $0x04, AX
   1919 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1920 
   1921 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
   1922 	LEAL -4(BX), BX
   1923 	MOVW $0x0015, (AX)
   1924 	MOVB BL, 2(AX)
   1925 	ADDQ $0x03, AX
   1926 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1927 
   1928 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
   1929 	SHLL $0x02, BX
   1930 	ORL  $0x01, BX
   1931 	MOVW BX, (AX)
   1932 	ADDQ $0x02, AX
   1933 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1934 
   1935 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
   1936 	XORQ DI, DI
   1937 	LEAL 1(DI)(BX*4), BX
   1938 	MOVB SI, 1(AX)
   1939 	SARL $0x08, SI
   1940 	SHLL $0x05, SI
   1941 	ORL  SI, BX
   1942 	MOVB BL, (AX)
   1943 	ADDQ $0x02, AX
   1944 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1945 
   1946 two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
   1947 	MOVL BX, DI
   1948 	SHLL $0x02, DI
   1949 	CMPL BX, $0x0c
   1950 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
   1951 	CMPL SI, $0x00000800
   1952 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
   1953 	LEAL -15(DI), DI
   1954 	MOVB SI, 1(AX)
   1955 	SHRL $0x08, SI
   1956 	SHLL $0x05, SI
   1957 	ORL  SI, DI
   1958 	MOVB DI, (AX)
   1959 	ADDQ $0x02, AX
   1960 	JMP  repeat_end_emit_encodeBlockAsm4MB
   1961 
   1962 emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
   1963 	LEAL -2(DI), DI
   1964 	MOVB DI, (AX)
   1965 	MOVW SI, 1(AX)
   1966 	ADDQ $0x03, AX
   1967 
   1968 repeat_end_emit_encodeBlockAsm4MB:
   1969 	MOVL CX, 12(SP)
   1970 	JMP  search_loop_encodeBlockAsm4MB
   1971 
   1972 no_repeat_found_encodeBlockAsm4MB:
   1973 	CMPL (DX)(BX*1), SI
   1974 	JEQ  candidate_match_encodeBlockAsm4MB
   1975 	SHRQ $0x08, SI
   1976 	MOVL 24(SP)(R9*4), BX
   1977 	LEAL 2(CX), R8
   1978 	CMPL (DX)(DI*1), SI
   1979 	JEQ  candidate2_match_encodeBlockAsm4MB
   1980 	MOVL R8, 24(SP)(R9*4)
   1981 	SHRQ $0x08, SI
   1982 	CMPL (DX)(BX*1), SI
   1983 	JEQ  candidate3_match_encodeBlockAsm4MB
   1984 	MOVL 20(SP), CX
   1985 	JMP  search_loop_encodeBlockAsm4MB
   1986 
   1987 candidate3_match_encodeBlockAsm4MB:
   1988 	ADDL $0x02, CX
   1989 	JMP  candidate_match_encodeBlockAsm4MB
   1990 
   1991 candidate2_match_encodeBlockAsm4MB:
   1992 	MOVL R8, 24(SP)(R9*4)
   1993 	INCL CX
   1994 	MOVL DI, BX
   1995 
   1996 candidate_match_encodeBlockAsm4MB:
   1997 	MOVL  12(SP), SI
   1998 	TESTL BX, BX
   1999 	JZ    match_extend_back_end_encodeBlockAsm4MB
   2000 
   2001 match_extend_back_loop_encodeBlockAsm4MB:
   2002 	CMPL CX, SI
   2003 	JBE  match_extend_back_end_encodeBlockAsm4MB
   2004 	MOVB -1(DX)(BX*1), DI
   2005 	MOVB -1(DX)(CX*1), R8
   2006 	CMPB DI, R8
   2007 	JNE  match_extend_back_end_encodeBlockAsm4MB
   2008 	LEAL -1(CX), CX
   2009 	DECL BX
   2010 	JZ   match_extend_back_end_encodeBlockAsm4MB
   2011 	JMP  match_extend_back_loop_encodeBlockAsm4MB
   2012 
   2013 match_extend_back_end_encodeBlockAsm4MB:
   2014 	MOVL CX, SI
   2015 	SUBL 12(SP), SI
   2016 	LEAQ 4(AX)(SI*1), SI
   2017 	CMPQ SI, (SP)
   2018 	JB   match_dst_size_check_encodeBlockAsm4MB
   2019 	MOVQ $0x00000000, ret+48(FP)
   2020 	RET
   2021 
   2022 match_dst_size_check_encodeBlockAsm4MB:
   2023 	MOVL CX, SI
   2024 	MOVL 12(SP), DI
   2025 	CMPL DI, SI
   2026 	JEQ  emit_literal_done_match_emit_encodeBlockAsm4MB
   2027 	MOVL SI, R8
   2028 	MOVL SI, 12(SP)
   2029 	LEAQ (DX)(DI*1), SI
   2030 	SUBL DI, R8
   2031 	LEAL -1(R8), DI
   2032 	CMPL DI, $0x3c
   2033 	JB   one_byte_match_emit_encodeBlockAsm4MB
   2034 	CMPL DI, $0x00000100
   2035 	JB   two_bytes_match_emit_encodeBlockAsm4MB
   2036 	CMPL DI, $0x00010000
   2037 	JB   three_bytes_match_emit_encodeBlockAsm4MB
   2038 	MOVL DI, R9
   2039 	SHRL $0x10, R9
   2040 	MOVB $0xf8, (AX)
   2041 	MOVW DI, 1(AX)
   2042 	MOVB R9, 3(AX)
   2043 	ADDQ $0x04, AX
   2044 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
   2045 
   2046 three_bytes_match_emit_encodeBlockAsm4MB:
   2047 	MOVB $0xf4, (AX)
   2048 	MOVW DI, 1(AX)
   2049 	ADDQ $0x03, AX
   2050 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
   2051 
   2052 two_bytes_match_emit_encodeBlockAsm4MB:
   2053 	MOVB $0xf0, (AX)
   2054 	MOVB DI, 1(AX)
   2055 	ADDQ $0x02, AX
   2056 	CMPL DI, $0x40
   2057 	JB   memmove_match_emit_encodeBlockAsm4MB
   2058 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
   2059 
   2060 one_byte_match_emit_encodeBlockAsm4MB:
   2061 	SHLB $0x02, DI
   2062 	MOVB DI, (AX)
   2063 	ADDQ $0x01, AX
   2064 
   2065 memmove_match_emit_encodeBlockAsm4MB:
   2066 	LEAQ (AX)(R8*1), DI
   2067 
   2068 	// genMemMoveShort
   2069 	CMPQ R8, $0x08
   2070 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
   2071 	CMPQ R8, $0x10
   2072 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
   2073 	CMPQ R8, $0x20
   2074 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
   2075 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
   2076 
   2077 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
   2078 	MOVQ (SI), R9
   2079 	MOVQ R9, (AX)
   2080 	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
   2081 
   2082 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
   2083 	MOVQ (SI), R9
   2084 	MOVQ -8(SI)(R8*1), SI
   2085 	MOVQ R9, (AX)
   2086 	MOVQ SI, -8(AX)(R8*1)
   2087 	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
   2088 
   2089 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
   2090 	MOVOU (SI), X0
   2091 	MOVOU -16(SI)(R8*1), X1
   2092 	MOVOU X0, (AX)
   2093 	MOVOU X1, -16(AX)(R8*1)
   2094 	JMP   memmove_end_copy_match_emit_encodeBlockAsm4MB
   2095 
   2096 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
   2097 	MOVOU (SI), X0
   2098 	MOVOU 16(SI), X1
   2099 	MOVOU -32(SI)(R8*1), X2
   2100 	MOVOU -16(SI)(R8*1), X3
   2101 	MOVOU X0, (AX)
   2102 	MOVOU X1, 16(AX)
   2103 	MOVOU X2, -32(AX)(R8*1)
   2104 	MOVOU X3, -16(AX)(R8*1)
   2105 
   2106 memmove_end_copy_match_emit_encodeBlockAsm4MB:
   2107 	MOVQ DI, AX
   2108 	JMP  emit_literal_done_match_emit_encodeBlockAsm4MB
   2109 
   2110 memmove_long_match_emit_encodeBlockAsm4MB:
   2111 	LEAQ (AX)(R8*1), DI
   2112 
   2113 	// genMemMoveLong
   2114 	MOVOU (SI), X0
   2115 	MOVOU 16(SI), X1
   2116 	MOVOU -32(SI)(R8*1), X2
   2117 	MOVOU -16(SI)(R8*1), X3
   2118 	MOVQ  R8, R10
   2119 	SHRQ  $0x05, R10
   2120 	MOVQ  AX, R9
   2121 	ANDL  $0x0000001f, R9
   2122 	MOVQ  $0x00000040, R11
   2123 	SUBQ  R9, R11
   2124 	DECQ  R10
   2125 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
   2126 	LEAQ  -32(SI)(R11*1), R9
   2127 	LEAQ  -32(AX)(R11*1), R12
   2128 
   2129 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
   2130 	MOVOU (R9), X4
   2131 	MOVOU 16(R9), X5
   2132 	MOVOA X4, (R12)
   2133 	MOVOA X5, 16(R12)
   2134 	ADDQ  $0x20, R12
   2135 	ADDQ  $0x20, R9
   2136 	ADDQ  $0x20, R11
   2137 	DECQ  R10
   2138 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
   2139 
   2140 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
   2141 	MOVOU -32(SI)(R11*1), X4
   2142 	MOVOU -16(SI)(R11*1), X5
   2143 	MOVOA X4, -32(AX)(R11*1)
   2144 	MOVOA X5, -16(AX)(R11*1)
   2145 	ADDQ  $0x20, R11
   2146 	CMPQ  R8, R11
   2147 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
   2148 	MOVOU X0, (AX)
   2149 	MOVOU X1, 16(AX)
   2150 	MOVOU X2, -32(AX)(R8*1)
   2151 	MOVOU X3, -16(AX)(R8*1)
   2152 	MOVQ  DI, AX
   2153 
   2154 emit_literal_done_match_emit_encodeBlockAsm4MB:
   2155 match_nolit_loop_encodeBlockAsm4MB:
   2156 	MOVL CX, SI
   2157 	SUBL BX, SI
   2158 	MOVL SI, 16(SP)
   2159 	ADDL $0x04, CX
   2160 	ADDL $0x04, BX
   2161 	MOVQ src_len+32(FP), SI
   2162 	SUBL CX, SI
   2163 	LEAQ (DX)(CX*1), DI
   2164 	LEAQ (DX)(BX*1), BX
   2165 
   2166 	// matchLen
   2167 	XORL R9, R9
   2168 	CMPL SI, $0x08
   2169 	JB   matchlen_match4_match_nolit_encodeBlockAsm4MB
   2170 
   2171 matchlen_loopback_match_nolit_encodeBlockAsm4MB:
   2172 	MOVQ  (DI)(R9*1), R8
   2173 	XORQ  (BX)(R9*1), R8
   2174 	TESTQ R8, R8
   2175 	JZ    matchlen_loop_match_nolit_encodeBlockAsm4MB
   2176 
   2177 #ifdef GOAMD64_v3
   2178 	TZCNTQ R8, R8
   2179 
   2180 #else
   2181 	BSFQ R8, R8
   2182 
   2183 #endif
   2184 	SARQ $0x03, R8
   2185 	LEAL (R9)(R8*1), R9
   2186 	JMP  match_nolit_end_encodeBlockAsm4MB
   2187 
   2188 matchlen_loop_match_nolit_encodeBlockAsm4MB:
   2189 	LEAL -8(SI), SI
   2190 	LEAL 8(R9), R9
   2191 	CMPL SI, $0x08
   2192 	JAE  matchlen_loopback_match_nolit_encodeBlockAsm4MB
   2193 	JZ   match_nolit_end_encodeBlockAsm4MB
   2194 
   2195 matchlen_match4_match_nolit_encodeBlockAsm4MB:
   2196 	CMPL SI, $0x04
   2197 	JB   matchlen_match2_match_nolit_encodeBlockAsm4MB
   2198 	MOVL (DI)(R9*1), R8
   2199 	CMPL (BX)(R9*1), R8
   2200 	JNE  matchlen_match2_match_nolit_encodeBlockAsm4MB
   2201 	SUBL $0x04, SI
   2202 	LEAL 4(R9), R9
   2203 
   2204 matchlen_match2_match_nolit_encodeBlockAsm4MB:
   2205 	CMPL SI, $0x02
   2206 	JB   matchlen_match1_match_nolit_encodeBlockAsm4MB
   2207 	MOVW (DI)(R9*1), R8
   2208 	CMPW (BX)(R9*1), R8
   2209 	JNE  matchlen_match1_match_nolit_encodeBlockAsm4MB
   2210 	SUBL $0x02, SI
   2211 	LEAL 2(R9), R9
   2212 
   2213 matchlen_match1_match_nolit_encodeBlockAsm4MB:
   2214 	CMPL SI, $0x01
   2215 	JB   match_nolit_end_encodeBlockAsm4MB
   2216 	MOVB (DI)(R9*1), R8
   2217 	CMPB (BX)(R9*1), R8
   2218 	JNE  match_nolit_end_encodeBlockAsm4MB
   2219 	LEAL 1(R9), R9
   2220 
   2221 match_nolit_end_encodeBlockAsm4MB:
   2222 	ADDL R9, CX
   2223 	MOVL 16(SP), BX
   2224 	ADDL $0x04, R9
   2225 	MOVL CX, 12(SP)
   2226 
   2227 	// emitCopy
   2228 	CMPL BX, $0x00010000
   2229 	JB   two_byte_offset_match_nolit_encodeBlockAsm4MB
   2230 	CMPL R9, $0x40
   2231 	JBE  four_bytes_remain_match_nolit_encodeBlockAsm4MB
   2232 	MOVB $0xff, (AX)
   2233 	MOVL BX, 1(AX)
   2234 	LEAL -64(R9), R9
   2235 	ADDQ $0x05, AX
   2236 	CMPL R9, $0x04
   2237 	JB   four_bytes_remain_match_nolit_encodeBlockAsm4MB
   2238 
   2239 	// emitRepeat
   2240 	MOVL R9, SI
   2241 	LEAL -4(R9), R9
   2242 	CMPL SI, $0x08
   2243 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
   2244 	CMPL SI, $0x0c
   2245 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
   2246 	CMPL BX, $0x00000800
   2247 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
   2248 
   2249 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
   2250 	CMPL R9, $0x00000104
   2251 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
   2252 	CMPL R9, $0x00010100
   2253 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
   2254 	LEAL -65536(R9), R9
   2255 	MOVL R9, BX
   2256 	MOVW $0x001d, (AX)
   2257 	MOVW R9, 2(AX)
   2258 	SARL $0x10, BX
   2259 	MOVB BL, 4(AX)
   2260 	ADDQ $0x05, AX
   2261 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2262 
   2263 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
   2264 	LEAL -256(R9), R9
   2265 	MOVW $0x0019, (AX)
   2266 	MOVW R9, 2(AX)
   2267 	ADDQ $0x04, AX
   2268 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2269 
   2270 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
   2271 	LEAL -4(R9), R9
   2272 	MOVW $0x0015, (AX)
   2273 	MOVB R9, 2(AX)
   2274 	ADDQ $0x03, AX
   2275 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2276 
   2277 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
   2278 	SHLL $0x02, R9
   2279 	ORL  $0x01, R9
   2280 	MOVW R9, (AX)
   2281 	ADDQ $0x02, AX
   2282 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2283 
   2284 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
   2285 	XORQ SI, SI
   2286 	LEAL 1(SI)(R9*4), R9
   2287 	MOVB BL, 1(AX)
   2288 	SARL $0x08, BX
   2289 	SHLL $0x05, BX
   2290 	ORL  BX, R9
   2291 	MOVB R9, (AX)
   2292 	ADDQ $0x02, AX
   2293 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2294 
   2295 four_bytes_remain_match_nolit_encodeBlockAsm4MB:
   2296 	TESTL R9, R9
   2297 	JZ    match_nolit_emitcopy_end_encodeBlockAsm4MB
   2298 	XORL  SI, SI
   2299 	LEAL  -1(SI)(R9*4), R9
   2300 	MOVB  R9, (AX)
   2301 	MOVL  BX, 1(AX)
   2302 	ADDQ  $0x05, AX
   2303 	JMP   match_nolit_emitcopy_end_encodeBlockAsm4MB
   2304 
   2305 two_byte_offset_match_nolit_encodeBlockAsm4MB:
   2306 	CMPL R9, $0x40
   2307 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm4MB
   2308 	CMPL BX, $0x00000800
   2309 	JAE  long_offset_short_match_nolit_encodeBlockAsm4MB
   2310 	MOVL $0x00000001, SI
   2311 	LEAL 16(SI), SI
   2312 	MOVB BL, 1(AX)
   2313 	SHRL $0x08, BX
   2314 	SHLL $0x05, BX
   2315 	ORL  BX, SI
   2316 	MOVB SI, (AX)
   2317 	ADDQ $0x02, AX
   2318 	SUBL $0x08, R9
   2319 
   2320 	// emitRepeat
   2321 	LEAL -4(R9), R9
   2322 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
   2323 	MOVL R9, SI
   2324 	LEAL -4(R9), R9
   2325 	CMPL SI, $0x08
   2326 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
   2327 	CMPL SI, $0x0c
   2328 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
   2329 	CMPL BX, $0x00000800
   2330 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
   2331 
   2332 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
   2333 	CMPL R9, $0x00000104
   2334 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
   2335 	CMPL R9, $0x00010100
   2336 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
   2337 	LEAL -65536(R9), R9
   2338 	MOVL R9, BX
   2339 	MOVW $0x001d, (AX)
   2340 	MOVW R9, 2(AX)
   2341 	SARL $0x10, BX
   2342 	MOVB BL, 4(AX)
   2343 	ADDQ $0x05, AX
   2344 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2345 
   2346 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
   2347 	LEAL -256(R9), R9
   2348 	MOVW $0x0019, (AX)
   2349 	MOVW R9, 2(AX)
   2350 	ADDQ $0x04, AX
   2351 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2352 
   2353 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
   2354 	LEAL -4(R9), R9
   2355 	MOVW $0x0015, (AX)
   2356 	MOVB R9, 2(AX)
   2357 	ADDQ $0x03, AX
   2358 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2359 
   2360 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
   2361 	SHLL $0x02, R9
   2362 	ORL  $0x01, R9
   2363 	MOVW R9, (AX)
   2364 	ADDQ $0x02, AX
   2365 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2366 
   2367 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
   2368 	XORQ SI, SI
   2369 	LEAL 1(SI)(R9*4), R9
   2370 	MOVB BL, 1(AX)
   2371 	SARL $0x08, BX
   2372 	SHLL $0x05, BX
   2373 	ORL  BX, R9
   2374 	MOVB R9, (AX)
   2375 	ADDQ $0x02, AX
   2376 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2377 
   2378 long_offset_short_match_nolit_encodeBlockAsm4MB:
   2379 	MOVB $0xee, (AX)
   2380 	MOVW BX, 1(AX)
   2381 	LEAL -60(R9), R9
   2382 	ADDQ $0x03, AX
   2383 
   2384 	// emitRepeat
   2385 	MOVL R9, SI
   2386 	LEAL -4(R9), R9
   2387 	CMPL SI, $0x08
   2388 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
   2389 	CMPL SI, $0x0c
   2390 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
   2391 	CMPL BX, $0x00000800
   2392 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
   2393 
   2394 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
   2395 	CMPL R9, $0x00000104
   2396 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
   2397 	CMPL R9, $0x00010100
   2398 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
   2399 	LEAL -65536(R9), R9
   2400 	MOVL R9, BX
   2401 	MOVW $0x001d, (AX)
   2402 	MOVW R9, 2(AX)
   2403 	SARL $0x10, BX
   2404 	MOVB BL, 4(AX)
   2405 	ADDQ $0x05, AX
   2406 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2407 
   2408 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
   2409 	LEAL -256(R9), R9
   2410 	MOVW $0x0019, (AX)
   2411 	MOVW R9, 2(AX)
   2412 	ADDQ $0x04, AX
   2413 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2414 
   2415 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
   2416 	LEAL -4(R9), R9
   2417 	MOVW $0x0015, (AX)
   2418 	MOVB R9, 2(AX)
   2419 	ADDQ $0x03, AX
   2420 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2421 
   2422 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
   2423 	SHLL $0x02, R9
   2424 	ORL  $0x01, R9
   2425 	MOVW R9, (AX)
   2426 	ADDQ $0x02, AX
   2427 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2428 
   2429 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
   2430 	XORQ SI, SI
   2431 	LEAL 1(SI)(R9*4), R9
   2432 	MOVB BL, 1(AX)
   2433 	SARL $0x08, BX
   2434 	SHLL $0x05, BX
   2435 	ORL  BX, R9
   2436 	MOVB R9, (AX)
   2437 	ADDQ $0x02, AX
   2438 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2439 
   2440 two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
   2441 	MOVL R9, SI
   2442 	SHLL $0x02, SI
   2443 	CMPL R9, $0x0c
   2444 	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
   2445 	CMPL BX, $0x00000800
   2446 	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
   2447 	LEAL -15(SI), SI
   2448 	MOVB BL, 1(AX)
   2449 	SHRL $0x08, BX
   2450 	SHLL $0x05, BX
   2451 	ORL  BX, SI
   2452 	MOVB SI, (AX)
   2453 	ADDQ $0x02, AX
   2454 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
   2455 
   2456 emit_copy_three_match_nolit_encodeBlockAsm4MB:
   2457 	LEAL -2(SI), SI
   2458 	MOVB SI, (AX)
   2459 	MOVW BX, 1(AX)
   2460 	ADDQ $0x03, AX
   2461 
   2462 match_nolit_emitcopy_end_encodeBlockAsm4MB:
   2463 	CMPL CX, 8(SP)
   2464 	JAE  emit_remainder_encodeBlockAsm4MB
   2465 	MOVQ -2(DX)(CX*1), SI
   2466 	CMPQ AX, (SP)
   2467 	JB   match_nolit_dst_ok_encodeBlockAsm4MB
   2468 	MOVQ $0x00000000, ret+48(FP)
   2469 	RET
   2470 
   2471 match_nolit_dst_ok_encodeBlockAsm4MB:
   2472 	MOVQ  $0x0000cf1bbcdcbf9b, R8
   2473 	MOVQ  SI, DI
   2474 	SHRQ  $0x10, SI
   2475 	MOVQ  SI, BX
   2476 	SHLQ  $0x10, DI
   2477 	IMULQ R8, DI
   2478 	SHRQ  $0x32, DI
   2479 	SHLQ  $0x10, BX
   2480 	IMULQ R8, BX
   2481 	SHRQ  $0x32, BX
   2482 	LEAL  -2(CX), R8
   2483 	LEAQ  24(SP)(BX*4), R9
   2484 	MOVL  (R9), BX
   2485 	MOVL  R8, 24(SP)(DI*4)
   2486 	MOVL  CX, (R9)
   2487 	CMPL  (DX)(BX*1), SI
   2488 	JEQ   match_nolit_loop_encodeBlockAsm4MB
   2489 	INCL  CX
   2490 	JMP   search_loop_encodeBlockAsm4MB
   2491 
   2492 emit_remainder_encodeBlockAsm4MB:
   2493 	MOVQ src_len+32(FP), CX
   2494 	SUBL 12(SP), CX
   2495 	LEAQ 4(AX)(CX*1), CX
   2496 	CMPQ CX, (SP)
   2497 	JB   emit_remainder_ok_encodeBlockAsm4MB
   2498 	MOVQ $0x00000000, ret+48(FP)
   2499 	RET
   2500 
   2501 emit_remainder_ok_encodeBlockAsm4MB:
   2502 	MOVQ src_len+32(FP), CX
   2503 	MOVL 12(SP), BX
   2504 	CMPL BX, CX
   2505 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm4MB
   2506 	MOVL CX, SI
   2507 	MOVL CX, 12(SP)
   2508 	LEAQ (DX)(BX*1), CX
   2509 	SUBL BX, SI
   2510 	LEAL -1(SI), DX
   2511 	CMPL DX, $0x3c
   2512 	JB   one_byte_emit_remainder_encodeBlockAsm4MB
   2513 	CMPL DX, $0x00000100
   2514 	JB   two_bytes_emit_remainder_encodeBlockAsm4MB
   2515 	CMPL DX, $0x00010000
   2516 	JB   three_bytes_emit_remainder_encodeBlockAsm4MB
   2517 	MOVL DX, BX
   2518 	SHRL $0x10, BX
   2519 	MOVB $0xf8, (AX)
   2520 	MOVW DX, 1(AX)
   2521 	MOVB BL, 3(AX)
   2522 	ADDQ $0x04, AX
   2523 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
   2524 
   2525 three_bytes_emit_remainder_encodeBlockAsm4MB:
   2526 	MOVB $0xf4, (AX)
   2527 	MOVW DX, 1(AX)
   2528 	ADDQ $0x03, AX
   2529 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
   2530 
   2531 two_bytes_emit_remainder_encodeBlockAsm4MB:
   2532 	MOVB $0xf0, (AX)
   2533 	MOVB DL, 1(AX)
   2534 	ADDQ $0x02, AX
   2535 	CMPL DX, $0x40
   2536 	JB   memmove_emit_remainder_encodeBlockAsm4MB
   2537 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
   2538 
   2539 one_byte_emit_remainder_encodeBlockAsm4MB:
   2540 	SHLB $0x02, DL
   2541 	MOVB DL, (AX)
   2542 	ADDQ $0x01, AX
   2543 
   2544 memmove_emit_remainder_encodeBlockAsm4MB:
   2545 	LEAQ (AX)(SI*1), DX
   2546 	MOVL SI, BX
   2547 
   2548 	// genMemMoveShort
   2549 	CMPQ BX, $0x03
   2550 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2
   2551 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3
   2552 	CMPQ BX, $0x08
   2553 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7
   2554 	CMPQ BX, $0x10
   2555 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
   2556 	CMPQ BX, $0x20
   2557 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
   2558 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
   2559 
   2560 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
   2561 	MOVB (CX), SI
   2562 	MOVB -1(CX)(BX*1), CL
   2563 	MOVB SI, (AX)
   2564 	MOVB CL, -1(AX)(BX*1)
   2565 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
   2566 
   2567 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
   2568 	MOVW (CX), SI
   2569 	MOVB 2(CX), CL
   2570 	MOVW SI, (AX)
   2571 	MOVB CL, 2(AX)
   2572 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
   2573 
   2574 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
   2575 	MOVL (CX), SI
   2576 	MOVL -4(CX)(BX*1), CX
   2577 	MOVL SI, (AX)
   2578 	MOVL CX, -4(AX)(BX*1)
   2579 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
   2580 
   2581 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
   2582 	MOVQ (CX), SI
   2583 	MOVQ -8(CX)(BX*1), CX
   2584 	MOVQ SI, (AX)
   2585 	MOVQ CX, -8(AX)(BX*1)
   2586 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
   2587 
   2588 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
   2589 	MOVOU (CX), X0
   2590 	MOVOU -16(CX)(BX*1), X1
   2591 	MOVOU X0, (AX)
   2592 	MOVOU X1, -16(AX)(BX*1)
   2593 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm4MB
   2594 
   2595 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
   2596 	MOVOU (CX), X0
   2597 	MOVOU 16(CX), X1
   2598 	MOVOU -32(CX)(BX*1), X2
   2599 	MOVOU -16(CX)(BX*1), X3
   2600 	MOVOU X0, (AX)
   2601 	MOVOU X1, 16(AX)
   2602 	MOVOU X2, -32(AX)(BX*1)
   2603 	MOVOU X3, -16(AX)(BX*1)
   2604 
   2605 memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
   2606 	MOVQ DX, AX
   2607 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm4MB
   2608 
   2609 memmove_long_emit_remainder_encodeBlockAsm4MB:
   2610 	LEAQ (AX)(SI*1), DX
   2611 	MOVL SI, BX
   2612 
   2613 	// genMemMoveLong
   2614 	MOVOU (CX), X0
   2615 	MOVOU 16(CX), X1
   2616 	MOVOU -32(CX)(BX*1), X2
   2617 	MOVOU -16(CX)(BX*1), X3
   2618 	MOVQ  BX, DI
   2619 	SHRQ  $0x05, DI
   2620 	MOVQ  AX, SI
   2621 	ANDL  $0x0000001f, SI
   2622 	MOVQ  $0x00000040, R8
   2623 	SUBQ  SI, R8
   2624 	DECQ  DI
   2625 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
   2626 	LEAQ  -32(CX)(R8*1), SI
   2627 	LEAQ  -32(AX)(R8*1), R9
   2628 
   2629 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
   2630 	MOVOU (SI), X4
   2631 	MOVOU 16(SI), X5
   2632 	MOVOA X4, (R9)
   2633 	MOVOA X5, 16(R9)
   2634 	ADDQ  $0x20, R9
   2635 	ADDQ  $0x20, SI
   2636 	ADDQ  $0x20, R8
   2637 	DECQ  DI
   2638 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
   2639 
   2640 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
   2641 	MOVOU -32(CX)(R8*1), X4
   2642 	MOVOU -16(CX)(R8*1), X5
   2643 	MOVOA X4, -32(AX)(R8*1)
   2644 	MOVOA X5, -16(AX)(R8*1)
   2645 	ADDQ  $0x20, R8
   2646 	CMPQ  BX, R8
   2647 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
   2648 	MOVOU X0, (AX)
   2649 	MOVOU X1, 16(AX)
   2650 	MOVOU X2, -32(AX)(BX*1)
   2651 	MOVOU X3, -16(AX)(BX*1)
   2652 	MOVQ  DX, AX
   2653 
   2654 emit_literal_done_emit_remainder_encodeBlockAsm4MB:
   2655 	MOVQ dst_base+0(FP), CX
   2656 	SUBQ CX, AX
   2657 	MOVQ AX, ret+48(FP)
   2658 	RET
   2659 
   2660 // func encodeBlockAsm12B(dst []byte, src []byte) int
   2661 // Requires: BMI, SSE2
   2662 TEXT ·encodeBlockAsm12B(SB), $16408-56
   2663 	MOVQ dst_base+0(FP), AX
   2664 	MOVQ $0x00000080, CX
   2665 	LEAQ 24(SP), DX
   2666 	PXOR X0, X0
   2667 
   2668 zero_loop_encodeBlockAsm12B:
   2669 	MOVOU X0, (DX)
   2670 	MOVOU X0, 16(DX)
   2671 	MOVOU X0, 32(DX)
   2672 	MOVOU X0, 48(DX)
   2673 	MOVOU X0, 64(DX)
   2674 	MOVOU X0, 80(DX)
   2675 	MOVOU X0, 96(DX)
   2676 	MOVOU X0, 112(DX)
   2677 	ADDQ  $0x80, DX
   2678 	DECQ  CX
   2679 	JNZ   zero_loop_encodeBlockAsm12B
   2680 	MOVL  $0x00000000, 12(SP)
   2681 	MOVQ  src_len+32(FP), CX
   2682 	LEAQ  -9(CX), DX
   2683 	LEAQ  -8(CX), BX
   2684 	MOVL  BX, 8(SP)
   2685 	SHRQ  $0x05, CX
   2686 	SUBL  CX, DX
   2687 	LEAQ  (AX)(DX*1), DX
   2688 	MOVQ  DX, (SP)
   2689 	MOVL  $0x00000001, CX
   2690 	MOVL  CX, 16(SP)
   2691 	MOVQ  src_base+24(FP), DX
   2692 
   2693 search_loop_encodeBlockAsm12B:
   2694 	MOVL  CX, BX
   2695 	SUBL  12(SP), BX
   2696 	SHRL  $0x05, BX
   2697 	LEAL  4(CX)(BX*1), BX
   2698 	CMPL  BX, 8(SP)
   2699 	JAE   emit_remainder_encodeBlockAsm12B
   2700 	MOVQ  (DX)(CX*1), SI
   2701 	MOVL  BX, 20(SP)
   2702 	MOVQ  $0x000000cf1bbcdcbb, R8
   2703 	MOVQ  SI, R9
   2704 	MOVQ  SI, R10
   2705 	SHRQ  $0x08, R10
   2706 	SHLQ  $0x18, R9
   2707 	IMULQ R8, R9
   2708 	SHRQ  $0x34, R9
   2709 	SHLQ  $0x18, R10
   2710 	IMULQ R8, R10
   2711 	SHRQ  $0x34, R10
   2712 	MOVL  24(SP)(R9*4), BX
   2713 	MOVL  24(SP)(R10*4), DI
   2714 	MOVL  CX, 24(SP)(R9*4)
   2715 	LEAL  1(CX), R9
   2716 	MOVL  R9, 24(SP)(R10*4)
   2717 	MOVQ  SI, R9
   2718 	SHRQ  $0x10, R9
   2719 	SHLQ  $0x18, R9
   2720 	IMULQ R8, R9
   2721 	SHRQ  $0x34, R9
   2722 	MOVL  CX, R8
   2723 	SUBL  16(SP), R8
   2724 	MOVL  1(DX)(R8*1), R10
   2725 	MOVQ  SI, R8
   2726 	SHRQ  $0x08, R8
   2727 	CMPL  R8, R10
   2728 	JNE   no_repeat_found_encodeBlockAsm12B
   2729 	LEAL  1(CX), SI
   2730 	MOVL  12(SP), DI
   2731 	MOVL  SI, BX
   2732 	SUBL  16(SP), BX
   2733 	JZ    repeat_extend_back_end_encodeBlockAsm12B
   2734 
   2735 repeat_extend_back_loop_encodeBlockAsm12B:
   2736 	CMPL SI, DI
   2737 	JBE  repeat_extend_back_end_encodeBlockAsm12B
   2738 	MOVB -1(DX)(BX*1), R8
   2739 	MOVB -1(DX)(SI*1), R9
   2740 	CMPB R8, R9
   2741 	JNE  repeat_extend_back_end_encodeBlockAsm12B
   2742 	LEAL -1(SI), SI
   2743 	DECL BX
   2744 	JNZ  repeat_extend_back_loop_encodeBlockAsm12B
   2745 
   2746 repeat_extend_back_end_encodeBlockAsm12B:
   2747 	MOVL 12(SP), BX
   2748 	CMPL BX, SI
   2749 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
   2750 	MOVL SI, R8
   2751 	MOVL SI, 12(SP)
   2752 	LEAQ (DX)(BX*1), R9
   2753 	SUBL BX, R8
   2754 	LEAL -1(R8), BX
   2755 	CMPL BX, $0x3c
   2756 	JB   one_byte_repeat_emit_encodeBlockAsm12B
   2757 	CMPL BX, $0x00000100
   2758 	JB   two_bytes_repeat_emit_encodeBlockAsm12B
   2759 	JB   three_bytes_repeat_emit_encodeBlockAsm12B
   2760 
   2761 three_bytes_repeat_emit_encodeBlockAsm12B:
   2762 	MOVB $0xf4, (AX)
   2763 	MOVW BX, 1(AX)
   2764 	ADDQ $0x03, AX
   2765 	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
   2766 
   2767 two_bytes_repeat_emit_encodeBlockAsm12B:
   2768 	MOVB $0xf0, (AX)
   2769 	MOVB BL, 1(AX)
   2770 	ADDQ $0x02, AX
   2771 	CMPL BX, $0x40
   2772 	JB   memmove_repeat_emit_encodeBlockAsm12B
   2773 	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
   2774 
   2775 one_byte_repeat_emit_encodeBlockAsm12B:
   2776 	SHLB $0x02, BL
   2777 	MOVB BL, (AX)
   2778 	ADDQ $0x01, AX
   2779 
   2780 memmove_repeat_emit_encodeBlockAsm12B:
   2781 	LEAQ (AX)(R8*1), BX
   2782 
   2783 	// genMemMoveShort
   2784 	CMPQ R8, $0x08
   2785 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
   2786 	CMPQ R8, $0x10
   2787 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
   2788 	CMPQ R8, $0x20
   2789 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
   2790 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
   2791 
   2792 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
   2793 	MOVQ (R9), R10
   2794 	MOVQ R10, (AX)
   2795 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
   2796 
   2797 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
   2798 	MOVQ (R9), R10
   2799 	MOVQ -8(R9)(R8*1), R9
   2800 	MOVQ R10, (AX)
   2801 	MOVQ R9, -8(AX)(R8*1)
   2802 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
   2803 
   2804 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
   2805 	MOVOU (R9), X0
   2806 	MOVOU -16(R9)(R8*1), X1
   2807 	MOVOU X0, (AX)
   2808 	MOVOU X1, -16(AX)(R8*1)
   2809 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B
   2810 
   2811 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
   2812 	MOVOU (R9), X0
   2813 	MOVOU 16(R9), X1
   2814 	MOVOU -32(R9)(R8*1), X2
   2815 	MOVOU -16(R9)(R8*1), X3
   2816 	MOVOU X0, (AX)
   2817 	MOVOU X1, 16(AX)
   2818 	MOVOU X2, -32(AX)(R8*1)
   2819 	MOVOU X3, -16(AX)(R8*1)
   2820 
   2821 memmove_end_copy_repeat_emit_encodeBlockAsm12B:
   2822 	MOVQ BX, AX
   2823 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B
   2824 
   2825 memmove_long_repeat_emit_encodeBlockAsm12B:
   2826 	LEAQ (AX)(R8*1), BX
   2827 
   2828 	// genMemMoveLong
   2829 	MOVOU (R9), X0
   2830 	MOVOU 16(R9), X1
   2831 	MOVOU -32(R9)(R8*1), X2
   2832 	MOVOU -16(R9)(R8*1), X3
   2833 	MOVQ  R8, R11
   2834 	SHRQ  $0x05, R11
   2835 	MOVQ  AX, R10
   2836 	ANDL  $0x0000001f, R10
   2837 	MOVQ  $0x00000040, R12
   2838 	SUBQ  R10, R12
   2839 	DECQ  R11
   2840 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
   2841 	LEAQ  -32(R9)(R12*1), R10
   2842 	LEAQ  -32(AX)(R12*1), R13
   2843 
   2844 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
   2845 	MOVOU (R10), X4
   2846 	MOVOU 16(R10), X5
   2847 	MOVOA X4, (R13)
   2848 	MOVOA X5, 16(R13)
   2849 	ADDQ  $0x20, R13
   2850 	ADDQ  $0x20, R10
   2851 	ADDQ  $0x20, R12
   2852 	DECQ  R11
   2853 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
   2854 
   2855 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
   2856 	MOVOU -32(R9)(R12*1), X4
   2857 	MOVOU -16(R9)(R12*1), X5
   2858 	MOVOA X4, -32(AX)(R12*1)
   2859 	MOVOA X5, -16(AX)(R12*1)
   2860 	ADDQ  $0x20, R12
   2861 	CMPQ  R8, R12
   2862 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
   2863 	MOVOU X0, (AX)
   2864 	MOVOU X1, 16(AX)
   2865 	MOVOU X2, -32(AX)(R8*1)
   2866 	MOVOU X3, -16(AX)(R8*1)
   2867 	MOVQ  BX, AX
   2868 
   2869 emit_literal_done_repeat_emit_encodeBlockAsm12B:
   2870 	ADDL $0x05, CX
   2871 	MOVL CX, BX
   2872 	SUBL 16(SP), BX
   2873 	MOVQ src_len+32(FP), R8
   2874 	SUBL CX, R8
   2875 	LEAQ (DX)(CX*1), R9
   2876 	LEAQ (DX)(BX*1), BX
   2877 
   2878 	// matchLen
   2879 	XORL R11, R11
   2880 	CMPL R8, $0x08
   2881 	JB   matchlen_match4_repeat_extend_encodeBlockAsm12B
   2882 
   2883 matchlen_loopback_repeat_extend_encodeBlockAsm12B:
   2884 	MOVQ  (R9)(R11*1), R10
   2885 	XORQ  (BX)(R11*1), R10
   2886 	TESTQ R10, R10
   2887 	JZ    matchlen_loop_repeat_extend_encodeBlockAsm12B
   2888 
   2889 #ifdef GOAMD64_v3
   2890 	TZCNTQ R10, R10
   2891 
   2892 #else
   2893 	BSFQ R10, R10
   2894 
   2895 #endif
   2896 	SARQ $0x03, R10
   2897 	LEAL (R11)(R10*1), R11
   2898 	JMP  repeat_extend_forward_end_encodeBlockAsm12B
   2899 
   2900 matchlen_loop_repeat_extend_encodeBlockAsm12B:
   2901 	LEAL -8(R8), R8
   2902 	LEAL 8(R11), R11
   2903 	CMPL R8, $0x08
   2904 	JAE  matchlen_loopback_repeat_extend_encodeBlockAsm12B
   2905 	JZ   repeat_extend_forward_end_encodeBlockAsm12B
   2906 
   2907 matchlen_match4_repeat_extend_encodeBlockAsm12B:
   2908 	CMPL R8, $0x04
   2909 	JB   matchlen_match2_repeat_extend_encodeBlockAsm12B
   2910 	MOVL (R9)(R11*1), R10
   2911 	CMPL (BX)(R11*1), R10
   2912 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm12B
   2913 	SUBL $0x04, R8
   2914 	LEAL 4(R11), R11
   2915 
   2916 matchlen_match2_repeat_extend_encodeBlockAsm12B:
   2917 	CMPL R8, $0x02
   2918 	JB   matchlen_match1_repeat_extend_encodeBlockAsm12B
   2919 	MOVW (R9)(R11*1), R10
   2920 	CMPW (BX)(R11*1), R10
   2921 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm12B
   2922 	SUBL $0x02, R8
   2923 	LEAL 2(R11), R11
   2924 
   2925 matchlen_match1_repeat_extend_encodeBlockAsm12B:
   2926 	CMPL R8, $0x01
   2927 	JB   repeat_extend_forward_end_encodeBlockAsm12B
   2928 	MOVB (R9)(R11*1), R10
   2929 	CMPB (BX)(R11*1), R10
   2930 	JNE  repeat_extend_forward_end_encodeBlockAsm12B
   2931 	LEAL 1(R11), R11
   2932 
   2933 repeat_extend_forward_end_encodeBlockAsm12B:
   2934 	ADDL  R11, CX
   2935 	MOVL  CX, BX
   2936 	SUBL  SI, BX
   2937 	MOVL  16(SP), SI
   2938 	TESTL DI, DI
   2939 	JZ    repeat_as_copy_encodeBlockAsm12B
   2940 
   2941 	// emitRepeat
   2942 	MOVL BX, DI
   2943 	LEAL -4(BX), BX
   2944 	CMPL DI, $0x08
   2945 	JBE  repeat_two_match_repeat_encodeBlockAsm12B
   2946 	CMPL DI, $0x0c
   2947 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
   2948 	CMPL SI, $0x00000800
   2949 	JB   repeat_two_offset_match_repeat_encodeBlockAsm12B
   2950 
   2951 cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
   2952 	CMPL BX, $0x00000104
   2953 	JB   repeat_three_match_repeat_encodeBlockAsm12B
   2954 	LEAL -256(BX), BX
   2955 	MOVW $0x0019, (AX)
   2956 	MOVW BX, 2(AX)
   2957 	ADDQ $0x04, AX
   2958 	JMP  repeat_end_emit_encodeBlockAsm12B
   2959 
   2960 repeat_three_match_repeat_encodeBlockAsm12B:
   2961 	LEAL -4(BX), BX
   2962 	MOVW $0x0015, (AX)
   2963 	MOVB BL, 2(AX)
   2964 	ADDQ $0x03, AX
   2965 	JMP  repeat_end_emit_encodeBlockAsm12B
   2966 
   2967 repeat_two_match_repeat_encodeBlockAsm12B:
   2968 	SHLL $0x02, BX
   2969 	ORL  $0x01, BX
   2970 	MOVW BX, (AX)
   2971 	ADDQ $0x02, AX
   2972 	JMP  repeat_end_emit_encodeBlockAsm12B
   2973 
   2974 repeat_two_offset_match_repeat_encodeBlockAsm12B:
   2975 	XORQ DI, DI
   2976 	LEAL 1(DI)(BX*4), BX
   2977 	MOVB SI, 1(AX)
   2978 	SARL $0x08, SI
   2979 	SHLL $0x05, SI
   2980 	ORL  SI, BX
   2981 	MOVB BL, (AX)
   2982 	ADDQ $0x02, AX
   2983 	JMP  repeat_end_emit_encodeBlockAsm12B
   2984 
   2985 repeat_as_copy_encodeBlockAsm12B:
   2986 	// emitCopy
   2987 	CMPL BX, $0x40
   2988 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
   2989 	CMPL SI, $0x00000800
   2990 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm12B
   2991 	MOVL $0x00000001, DI
   2992 	LEAL 16(DI), DI
   2993 	MOVB SI, 1(AX)
   2994 	SHRL $0x08, SI
   2995 	SHLL $0x05, SI
   2996 	ORL  SI, DI
   2997 	MOVB DI, (AX)
   2998 	ADDQ $0x02, AX
   2999 	SUBL $0x08, BX
   3000 
   3001 	// emitRepeat
   3002 	LEAL -4(BX), BX
   3003 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
   3004 	MOVL BX, DI
   3005 	LEAL -4(BX), BX
   3006 	CMPL DI, $0x08
   3007 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
   3008 	CMPL DI, $0x0c
   3009 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
   3010 	CMPL SI, $0x00000800
   3011 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
   3012 
   3013 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
   3014 	CMPL BX, $0x00000104
   3015 	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
   3016 	LEAL -256(BX), BX
   3017 	MOVW $0x0019, (AX)
   3018 	MOVW BX, 2(AX)
   3019 	ADDQ $0x04, AX
   3020 	JMP  repeat_end_emit_encodeBlockAsm12B
   3021 
   3022 repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
   3023 	LEAL -4(BX), BX
   3024 	MOVW $0x0015, (AX)
   3025 	MOVB BL, 2(AX)
   3026 	ADDQ $0x03, AX
   3027 	JMP  repeat_end_emit_encodeBlockAsm12B
   3028 
   3029 repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
   3030 	SHLL $0x02, BX
   3031 	ORL  $0x01, BX
   3032 	MOVW BX, (AX)
   3033 	ADDQ $0x02, AX
   3034 	JMP  repeat_end_emit_encodeBlockAsm12B
   3035 
   3036 repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
   3037 	XORQ DI, DI
   3038 	LEAL 1(DI)(BX*4), BX
   3039 	MOVB SI, 1(AX)
   3040 	SARL $0x08, SI
   3041 	SHLL $0x05, SI
   3042 	ORL  SI, BX
   3043 	MOVB BL, (AX)
   3044 	ADDQ $0x02, AX
   3045 	JMP  repeat_end_emit_encodeBlockAsm12B
   3046 
   3047 long_offset_short_repeat_as_copy_encodeBlockAsm12B:
   3048 	MOVB $0xee, (AX)
   3049 	MOVW SI, 1(AX)
   3050 	LEAL -60(BX), BX
   3051 	ADDQ $0x03, AX
   3052 
   3053 	// emitRepeat
   3054 	MOVL BX, DI
   3055 	LEAL -4(BX), BX
   3056 	CMPL DI, $0x08
   3057 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
   3058 	CMPL DI, $0x0c
   3059 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
   3060 	CMPL SI, $0x00000800
   3061 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
   3062 
   3063 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
   3064 	CMPL BX, $0x00000104
   3065 	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
   3066 	LEAL -256(BX), BX
   3067 	MOVW $0x0019, (AX)
   3068 	MOVW BX, 2(AX)
   3069 	ADDQ $0x04, AX
   3070 	JMP  repeat_end_emit_encodeBlockAsm12B
   3071 
   3072 repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
   3073 	LEAL -4(BX), BX
   3074 	MOVW $0x0015, (AX)
   3075 	MOVB BL, 2(AX)
   3076 	ADDQ $0x03, AX
   3077 	JMP  repeat_end_emit_encodeBlockAsm12B
   3078 
   3079 repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
   3080 	SHLL $0x02, BX
   3081 	ORL  $0x01, BX
   3082 	MOVW BX, (AX)
   3083 	ADDQ $0x02, AX
   3084 	JMP  repeat_end_emit_encodeBlockAsm12B
   3085 
   3086 repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
   3087 	XORQ DI, DI
   3088 	LEAL 1(DI)(BX*4), BX
   3089 	MOVB SI, 1(AX)
   3090 	SARL $0x08, SI
   3091 	SHLL $0x05, SI
   3092 	ORL  SI, BX
   3093 	MOVB BL, (AX)
   3094 	ADDQ $0x02, AX
   3095 	JMP  repeat_end_emit_encodeBlockAsm12B
   3096 
   3097 two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
   3098 	MOVL BX, DI
   3099 	SHLL $0x02, DI
   3100 	CMPL BX, $0x0c
   3101 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
   3102 	CMPL SI, $0x00000800
   3103 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
   3104 	LEAL -15(DI), DI
   3105 	MOVB SI, 1(AX)
   3106 	SHRL $0x08, SI
   3107 	SHLL $0x05, SI
   3108 	ORL  SI, DI
   3109 	MOVB DI, (AX)
   3110 	ADDQ $0x02, AX
   3111 	JMP  repeat_end_emit_encodeBlockAsm12B
   3112 
   3113 emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
   3114 	LEAL -2(DI), DI
   3115 	MOVB DI, (AX)
   3116 	MOVW SI, 1(AX)
   3117 	ADDQ $0x03, AX
   3118 
   3119 repeat_end_emit_encodeBlockAsm12B:
   3120 	MOVL CX, 12(SP)
   3121 	JMP  search_loop_encodeBlockAsm12B
   3122 
   3123 no_repeat_found_encodeBlockAsm12B:
   3124 	CMPL (DX)(BX*1), SI
   3125 	JEQ  candidate_match_encodeBlockAsm12B
   3126 	SHRQ $0x08, SI
   3127 	MOVL 24(SP)(R9*4), BX
   3128 	LEAL 2(CX), R8
   3129 	CMPL (DX)(DI*1), SI
   3130 	JEQ  candidate2_match_encodeBlockAsm12B
   3131 	MOVL R8, 24(SP)(R9*4)
   3132 	SHRQ $0x08, SI
   3133 	CMPL (DX)(BX*1), SI
   3134 	JEQ  candidate3_match_encodeBlockAsm12B
   3135 	MOVL 20(SP), CX
   3136 	JMP  search_loop_encodeBlockAsm12B
   3137 
   3138 candidate3_match_encodeBlockAsm12B:
   3139 	ADDL $0x02, CX
   3140 	JMP  candidate_match_encodeBlockAsm12B
   3141 
   3142 candidate2_match_encodeBlockAsm12B:
   3143 	MOVL R8, 24(SP)(R9*4)
   3144 	INCL CX
   3145 	MOVL DI, BX
   3146 
   3147 candidate_match_encodeBlockAsm12B:
   3148 	MOVL  12(SP), SI
   3149 	TESTL BX, BX
   3150 	JZ    match_extend_back_end_encodeBlockAsm12B
   3151 
   3152 match_extend_back_loop_encodeBlockAsm12B:
   3153 	CMPL CX, SI
   3154 	JBE  match_extend_back_end_encodeBlockAsm12B
   3155 	MOVB -1(DX)(BX*1), DI
   3156 	MOVB -1(DX)(CX*1), R8
   3157 	CMPB DI, R8
   3158 	JNE  match_extend_back_end_encodeBlockAsm12B
   3159 	LEAL -1(CX), CX
   3160 	DECL BX
   3161 	JZ   match_extend_back_end_encodeBlockAsm12B
   3162 	JMP  match_extend_back_loop_encodeBlockAsm12B
   3163 
   3164 match_extend_back_end_encodeBlockAsm12B:
   3165 	MOVL CX, SI
   3166 	SUBL 12(SP), SI
   3167 	LEAQ 3(AX)(SI*1), SI
   3168 	CMPQ SI, (SP)
   3169 	JB   match_dst_size_check_encodeBlockAsm12B
   3170 	MOVQ $0x00000000, ret+48(FP)
   3171 	RET
   3172 
   3173 match_dst_size_check_encodeBlockAsm12B:
   3174 	MOVL CX, SI
   3175 	MOVL 12(SP), DI
   3176 	CMPL DI, SI
   3177 	JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
   3178 	MOVL SI, R8
   3179 	MOVL SI, 12(SP)
   3180 	LEAQ (DX)(DI*1), SI
   3181 	SUBL DI, R8
   3182 	LEAL -1(R8), DI
   3183 	CMPL DI, $0x3c
   3184 	JB   one_byte_match_emit_encodeBlockAsm12B
   3185 	CMPL DI, $0x00000100
   3186 	JB   two_bytes_match_emit_encodeBlockAsm12B
   3187 	JB   three_bytes_match_emit_encodeBlockAsm12B
   3188 
   3189 three_bytes_match_emit_encodeBlockAsm12B:
   3190 	MOVB $0xf4, (AX)
   3191 	MOVW DI, 1(AX)
   3192 	ADDQ $0x03, AX
   3193 	JMP  memmove_long_match_emit_encodeBlockAsm12B
   3194 
   3195 two_bytes_match_emit_encodeBlockAsm12B:
   3196 	MOVB $0xf0, (AX)
   3197 	MOVB DI, 1(AX)
   3198 	ADDQ $0x02, AX
   3199 	CMPL DI, $0x40
   3200 	JB   memmove_match_emit_encodeBlockAsm12B
   3201 	JMP  memmove_long_match_emit_encodeBlockAsm12B
   3202 
   3203 one_byte_match_emit_encodeBlockAsm12B:
   3204 	SHLB $0x02, DI
   3205 	MOVB DI, (AX)
   3206 	ADDQ $0x01, AX
   3207 
   3208 memmove_match_emit_encodeBlockAsm12B:
   3209 	LEAQ (AX)(R8*1), DI
   3210 
   3211 	// genMemMoveShort
   3212 	CMPQ R8, $0x08
   3213 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
   3214 	CMPQ R8, $0x10
   3215 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
   3216 	CMPQ R8, $0x20
   3217 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
   3218 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
   3219 
   3220 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
   3221 	MOVQ (SI), R9
   3222 	MOVQ R9, (AX)
   3223 	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
   3224 
   3225 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
   3226 	MOVQ (SI), R9
   3227 	MOVQ -8(SI)(R8*1), SI
   3228 	MOVQ R9, (AX)
   3229 	MOVQ SI, -8(AX)(R8*1)
   3230 	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
   3231 
   3232 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
   3233 	MOVOU (SI), X0
   3234 	MOVOU -16(SI)(R8*1), X1
   3235 	MOVOU X0, (AX)
   3236 	MOVOU X1, -16(AX)(R8*1)
   3237 	JMP   memmove_end_copy_match_emit_encodeBlockAsm12B
   3238 
   3239 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
   3240 	MOVOU (SI), X0
   3241 	MOVOU 16(SI), X1
   3242 	MOVOU -32(SI)(R8*1), X2
   3243 	MOVOU -16(SI)(R8*1), X3
   3244 	MOVOU X0, (AX)
   3245 	MOVOU X1, 16(AX)
   3246 	MOVOU X2, -32(AX)(R8*1)
   3247 	MOVOU X3, -16(AX)(R8*1)
   3248 
   3249 memmove_end_copy_match_emit_encodeBlockAsm12B:
   3250 	MOVQ DI, AX
   3251 	JMP  emit_literal_done_match_emit_encodeBlockAsm12B
   3252 
   3253 memmove_long_match_emit_encodeBlockAsm12B:
   3254 	LEAQ (AX)(R8*1), DI
   3255 
   3256 	// genMemMoveLong
   3257 	MOVOU (SI), X0
   3258 	MOVOU 16(SI), X1
   3259 	MOVOU -32(SI)(R8*1), X2
   3260 	MOVOU -16(SI)(R8*1), X3
   3261 	MOVQ  R8, R10
   3262 	SHRQ  $0x05, R10
   3263 	MOVQ  AX, R9
   3264 	ANDL  $0x0000001f, R9
   3265 	MOVQ  $0x00000040, R11
   3266 	SUBQ  R9, R11
   3267 	DECQ  R10
   3268 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
   3269 	LEAQ  -32(SI)(R11*1), R9
   3270 	LEAQ  -32(AX)(R11*1), R12
   3271 
   3272 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
   3273 	MOVOU (R9), X4
   3274 	MOVOU 16(R9), X5
   3275 	MOVOA X4, (R12)
   3276 	MOVOA X5, 16(R12)
   3277 	ADDQ  $0x20, R12
   3278 	ADDQ  $0x20, R9
   3279 	ADDQ  $0x20, R11
   3280 	DECQ  R10
   3281 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
   3282 
   3283 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
   3284 	MOVOU -32(SI)(R11*1), X4
   3285 	MOVOU -16(SI)(R11*1), X5
   3286 	MOVOA X4, -32(AX)(R11*1)
   3287 	MOVOA X5, -16(AX)(R11*1)
   3288 	ADDQ  $0x20, R11
   3289 	CMPQ  R8, R11
   3290 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
   3291 	MOVOU X0, (AX)
   3292 	MOVOU X1, 16(AX)
   3293 	MOVOU X2, -32(AX)(R8*1)
   3294 	MOVOU X3, -16(AX)(R8*1)
   3295 	MOVQ  DI, AX
   3296 
   3297 emit_literal_done_match_emit_encodeBlockAsm12B:
   3298 match_nolit_loop_encodeBlockAsm12B:
   3299 	MOVL CX, SI
   3300 	SUBL BX, SI
   3301 	MOVL SI, 16(SP)
   3302 	ADDL $0x04, CX
   3303 	ADDL $0x04, BX
   3304 	MOVQ src_len+32(FP), SI
   3305 	SUBL CX, SI
   3306 	LEAQ (DX)(CX*1), DI
   3307 	LEAQ (DX)(BX*1), BX
   3308 
   3309 	// matchLen
   3310 	XORL R9, R9
   3311 	CMPL SI, $0x08
   3312 	JB   matchlen_match4_match_nolit_encodeBlockAsm12B
   3313 
   3314 matchlen_loopback_match_nolit_encodeBlockAsm12B:
   3315 	MOVQ  (DI)(R9*1), R8
   3316 	XORQ  (BX)(R9*1), R8
   3317 	TESTQ R8, R8
   3318 	JZ    matchlen_loop_match_nolit_encodeBlockAsm12B
   3319 
   3320 #ifdef GOAMD64_v3
   3321 	TZCNTQ R8, R8
   3322 
   3323 #else
   3324 	BSFQ R8, R8
   3325 
   3326 #endif
   3327 	SARQ $0x03, R8
   3328 	LEAL (R9)(R8*1), R9
   3329 	JMP  match_nolit_end_encodeBlockAsm12B
   3330 
   3331 matchlen_loop_match_nolit_encodeBlockAsm12B:
   3332 	LEAL -8(SI), SI
   3333 	LEAL 8(R9), R9
   3334 	CMPL SI, $0x08
   3335 	JAE  matchlen_loopback_match_nolit_encodeBlockAsm12B
   3336 	JZ   match_nolit_end_encodeBlockAsm12B
   3337 
   3338 matchlen_match4_match_nolit_encodeBlockAsm12B:
   3339 	CMPL SI, $0x04
   3340 	JB   matchlen_match2_match_nolit_encodeBlockAsm12B
   3341 	MOVL (DI)(R9*1), R8
   3342 	CMPL (BX)(R9*1), R8
   3343 	JNE  matchlen_match2_match_nolit_encodeBlockAsm12B
   3344 	SUBL $0x04, SI
   3345 	LEAL 4(R9), R9
   3346 
   3347 matchlen_match2_match_nolit_encodeBlockAsm12B:
   3348 	CMPL SI, $0x02
   3349 	JB   matchlen_match1_match_nolit_encodeBlockAsm12B
   3350 	MOVW (DI)(R9*1), R8
   3351 	CMPW (BX)(R9*1), R8
   3352 	JNE  matchlen_match1_match_nolit_encodeBlockAsm12B
   3353 	SUBL $0x02, SI
   3354 	LEAL 2(R9), R9
   3355 
   3356 matchlen_match1_match_nolit_encodeBlockAsm12B:
   3357 	CMPL SI, $0x01
   3358 	JB   match_nolit_end_encodeBlockAsm12B
   3359 	MOVB (DI)(R9*1), R8
   3360 	CMPB (BX)(R9*1), R8
   3361 	JNE  match_nolit_end_encodeBlockAsm12B
   3362 	LEAL 1(R9), R9
   3363 
   3364 match_nolit_end_encodeBlockAsm12B:
   3365 	ADDL R9, CX
   3366 	MOVL 16(SP), BX
   3367 	ADDL $0x04, R9
   3368 	MOVL CX, 12(SP)
   3369 
   3370 	// emitCopy
   3371 	CMPL R9, $0x40
   3372 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
   3373 	CMPL BX, $0x00000800
   3374 	JAE  long_offset_short_match_nolit_encodeBlockAsm12B
   3375 	MOVL $0x00000001, SI
   3376 	LEAL 16(SI), SI
   3377 	MOVB BL, 1(AX)
   3378 	SHRL $0x08, BX
   3379 	SHLL $0x05, BX
   3380 	ORL  BX, SI
   3381 	MOVB SI, (AX)
   3382 	ADDQ $0x02, AX
   3383 	SUBL $0x08, R9
   3384 
   3385 	// emitRepeat
   3386 	LEAL -4(R9), R9
   3387 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
   3388 	MOVL R9, SI
   3389 	LEAL -4(R9), R9
   3390 	CMPL SI, $0x08
   3391 	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
   3392 	CMPL SI, $0x0c
   3393 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
   3394 	CMPL BX, $0x00000800
   3395 	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
   3396 
   3397 cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
   3398 	CMPL R9, $0x00000104
   3399 	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
   3400 	LEAL -256(R9), R9
   3401 	MOVW $0x0019, (AX)
   3402 	MOVW R9, 2(AX)
   3403 	ADDQ $0x04, AX
   3404 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3405 
   3406 repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
   3407 	LEAL -4(R9), R9
   3408 	MOVW $0x0015, (AX)
   3409 	MOVB R9, 2(AX)
   3410 	ADDQ $0x03, AX
   3411 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3412 
   3413 repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
   3414 	SHLL $0x02, R9
   3415 	ORL  $0x01, R9
   3416 	MOVW R9, (AX)
   3417 	ADDQ $0x02, AX
   3418 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3419 
   3420 repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
   3421 	XORQ SI, SI
   3422 	LEAL 1(SI)(R9*4), R9
   3423 	MOVB BL, 1(AX)
   3424 	SARL $0x08, BX
   3425 	SHLL $0x05, BX
   3426 	ORL  BX, R9
   3427 	MOVB R9, (AX)
   3428 	ADDQ $0x02, AX
   3429 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3430 
   3431 long_offset_short_match_nolit_encodeBlockAsm12B:
   3432 	MOVB $0xee, (AX)
   3433 	MOVW BX, 1(AX)
   3434 	LEAL -60(R9), R9
   3435 	ADDQ $0x03, AX
   3436 
   3437 	// emitRepeat
   3438 	MOVL R9, SI
   3439 	LEAL -4(R9), R9
   3440 	CMPL SI, $0x08
   3441 	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
   3442 	CMPL SI, $0x0c
   3443 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
   3444 	CMPL BX, $0x00000800
   3445 	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
   3446 
   3447 cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
   3448 	CMPL R9, $0x00000104
   3449 	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
   3450 	LEAL -256(R9), R9
   3451 	MOVW $0x0019, (AX)
   3452 	MOVW R9, 2(AX)
   3453 	ADDQ $0x04, AX
   3454 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3455 
   3456 repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
   3457 	LEAL -4(R9), R9
   3458 	MOVW $0x0015, (AX)
   3459 	MOVB R9, 2(AX)
   3460 	ADDQ $0x03, AX
   3461 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3462 
   3463 repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
   3464 	SHLL $0x02, R9
   3465 	ORL  $0x01, R9
   3466 	MOVW R9, (AX)
   3467 	ADDQ $0x02, AX
   3468 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3469 
   3470 repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
   3471 	XORQ SI, SI
   3472 	LEAL 1(SI)(R9*4), R9
   3473 	MOVB BL, 1(AX)
   3474 	SARL $0x08, BX
   3475 	SHLL $0x05, BX
   3476 	ORL  BX, R9
   3477 	MOVB R9, (AX)
   3478 	ADDQ $0x02, AX
   3479 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3480 
   3481 two_byte_offset_short_match_nolit_encodeBlockAsm12B:
   3482 	MOVL R9, SI
   3483 	SHLL $0x02, SI
   3484 	CMPL R9, $0x0c
   3485 	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
   3486 	CMPL BX, $0x00000800
   3487 	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
   3488 	LEAL -15(SI), SI
   3489 	MOVB BL, 1(AX)
   3490 	SHRL $0x08, BX
   3491 	SHLL $0x05, BX
   3492 	ORL  BX, SI
   3493 	MOVB SI, (AX)
   3494 	ADDQ $0x02, AX
   3495 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
   3496 
   3497 emit_copy_three_match_nolit_encodeBlockAsm12B:
   3498 	LEAL -2(SI), SI
   3499 	MOVB SI, (AX)
   3500 	MOVW BX, 1(AX)
   3501 	ADDQ $0x03, AX
   3502 
   3503 match_nolit_emitcopy_end_encodeBlockAsm12B:
   3504 	CMPL CX, 8(SP)
   3505 	JAE  emit_remainder_encodeBlockAsm12B
   3506 	MOVQ -2(DX)(CX*1), SI
   3507 	CMPQ AX, (SP)
   3508 	JB   match_nolit_dst_ok_encodeBlockAsm12B
   3509 	MOVQ $0x00000000, ret+48(FP)
   3510 	RET
   3511 
   3512 match_nolit_dst_ok_encodeBlockAsm12B:
   3513 	MOVQ  $0x000000cf1bbcdcbb, R8
   3514 	MOVQ  SI, DI
   3515 	SHRQ  $0x10, SI
   3516 	MOVQ  SI, BX
   3517 	SHLQ  $0x18, DI
   3518 	IMULQ R8, DI
   3519 	SHRQ  $0x34, DI
   3520 	SHLQ  $0x18, BX
   3521 	IMULQ R8, BX
   3522 	SHRQ  $0x34, BX
   3523 	LEAL  -2(CX), R8
   3524 	LEAQ  24(SP)(BX*4), R9
   3525 	MOVL  (R9), BX
   3526 	MOVL  R8, 24(SP)(DI*4)
   3527 	MOVL  CX, (R9)
   3528 	CMPL  (DX)(BX*1), SI
   3529 	JEQ   match_nolit_loop_encodeBlockAsm12B
   3530 	INCL  CX
   3531 	JMP   search_loop_encodeBlockAsm12B
   3532 
   3533 emit_remainder_encodeBlockAsm12B:
   3534 	MOVQ src_len+32(FP), CX
   3535 	SUBL 12(SP), CX
   3536 	LEAQ 3(AX)(CX*1), CX
   3537 	CMPQ CX, (SP)
   3538 	JB   emit_remainder_ok_encodeBlockAsm12B
   3539 	MOVQ $0x00000000, ret+48(FP)
   3540 	RET
   3541 
   3542 emit_remainder_ok_encodeBlockAsm12B:
   3543 	MOVQ src_len+32(FP), CX
   3544 	MOVL 12(SP), BX
   3545 	CMPL BX, CX
   3546 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
   3547 	MOVL CX, SI
   3548 	MOVL CX, 12(SP)
   3549 	LEAQ (DX)(BX*1), CX
   3550 	SUBL BX, SI
   3551 	LEAL -1(SI), DX
   3552 	CMPL DX, $0x3c
   3553 	JB   one_byte_emit_remainder_encodeBlockAsm12B
   3554 	CMPL DX, $0x00000100
   3555 	JB   two_bytes_emit_remainder_encodeBlockAsm12B
   3556 	JB   three_bytes_emit_remainder_encodeBlockAsm12B
   3557 
   3558 three_bytes_emit_remainder_encodeBlockAsm12B:
   3559 	MOVB $0xf4, (AX)
   3560 	MOVW DX, 1(AX)
   3561 	ADDQ $0x03, AX
   3562 	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
   3563 
   3564 two_bytes_emit_remainder_encodeBlockAsm12B:
   3565 	MOVB $0xf0, (AX)
   3566 	MOVB DL, 1(AX)
   3567 	ADDQ $0x02, AX
   3568 	CMPL DX, $0x40
   3569 	JB   memmove_emit_remainder_encodeBlockAsm12B
   3570 	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
   3571 
   3572 one_byte_emit_remainder_encodeBlockAsm12B:
   3573 	SHLB $0x02, DL
   3574 	MOVB DL, (AX)
   3575 	ADDQ $0x01, AX
   3576 
   3577 memmove_emit_remainder_encodeBlockAsm12B:
   3578 	LEAQ (AX)(SI*1), DX
   3579 	MOVL SI, BX
   3580 
   3581 	// genMemMoveShort
   3582 	CMPQ BX, $0x03
   3583 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
   3584 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
   3585 	CMPQ BX, $0x08
   3586 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
   3587 	CMPQ BX, $0x10
   3588 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
   3589 	CMPQ BX, $0x20
   3590 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
   3591 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
   3592 
   3593 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
   3594 	MOVB (CX), SI
   3595 	MOVB -1(CX)(BX*1), CL
   3596 	MOVB SI, (AX)
   3597 	MOVB CL, -1(AX)(BX*1)
   3598 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
   3599 
   3600 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
   3601 	MOVW (CX), SI
   3602 	MOVB 2(CX), CL
   3603 	MOVW SI, (AX)
   3604 	MOVB CL, 2(AX)
   3605 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
   3606 
   3607 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
   3608 	MOVL (CX), SI
   3609 	MOVL -4(CX)(BX*1), CX
   3610 	MOVL SI, (AX)
   3611 	MOVL CX, -4(AX)(BX*1)
   3612 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
   3613 
   3614 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
   3615 	MOVQ (CX), SI
   3616 	MOVQ -8(CX)(BX*1), CX
   3617 	MOVQ SI, (AX)
   3618 	MOVQ CX, -8(AX)(BX*1)
   3619 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
   3620 
   3621 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
   3622 	MOVOU (CX), X0
   3623 	MOVOU -16(CX)(BX*1), X1
   3624 	MOVOU X0, (AX)
   3625 	MOVOU X1, -16(AX)(BX*1)
   3626 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B
   3627 
   3628 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
   3629 	MOVOU (CX), X0
   3630 	MOVOU 16(CX), X1
   3631 	MOVOU -32(CX)(BX*1), X2
   3632 	MOVOU -16(CX)(BX*1), X3
   3633 	MOVOU X0, (AX)
   3634 	MOVOU X1, 16(AX)
   3635 	MOVOU X2, -32(AX)(BX*1)
   3636 	MOVOU X3, -16(AX)(BX*1)
   3637 
   3638 memmove_end_copy_emit_remainder_encodeBlockAsm12B:
   3639 	MOVQ DX, AX
   3640 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B
   3641 
   3642 memmove_long_emit_remainder_encodeBlockAsm12B:
   3643 	LEAQ (AX)(SI*1), DX
   3644 	MOVL SI, BX
   3645 
   3646 	// genMemMoveLong
   3647 	MOVOU (CX), X0
   3648 	MOVOU 16(CX), X1
   3649 	MOVOU -32(CX)(BX*1), X2
   3650 	MOVOU -16(CX)(BX*1), X3
   3651 	MOVQ  BX, DI
   3652 	SHRQ  $0x05, DI
   3653 	MOVQ  AX, SI
   3654 	ANDL  $0x0000001f, SI
   3655 	MOVQ  $0x00000040, R8
   3656 	SUBQ  SI, R8
   3657 	DECQ  DI
   3658 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
   3659 	LEAQ  -32(CX)(R8*1), SI
   3660 	LEAQ  -32(AX)(R8*1), R9
   3661 
   3662 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
   3663 	MOVOU (SI), X4
   3664 	MOVOU 16(SI), X5
   3665 	MOVOA X4, (R9)
   3666 	MOVOA X5, 16(R9)
   3667 	ADDQ  $0x20, R9
   3668 	ADDQ  $0x20, SI
   3669 	ADDQ  $0x20, R8
   3670 	DECQ  DI
   3671 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
   3672 
   3673 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
   3674 	MOVOU -32(CX)(R8*1), X4
   3675 	MOVOU -16(CX)(R8*1), X5
   3676 	MOVOA X4, -32(AX)(R8*1)
   3677 	MOVOA X5, -16(AX)(R8*1)
   3678 	ADDQ  $0x20, R8
   3679 	CMPQ  BX, R8
   3680 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
   3681 	MOVOU X0, (AX)
   3682 	MOVOU X1, 16(AX)
   3683 	MOVOU X2, -32(AX)(BX*1)
   3684 	MOVOU X3, -16(AX)(BX*1)
   3685 	MOVQ  DX, AX
   3686 
   3687 emit_literal_done_emit_remainder_encodeBlockAsm12B:
   3688 	MOVQ dst_base+0(FP), CX
   3689 	SUBQ CX, AX
   3690 	MOVQ AX, ret+48(FP)
   3691 	RET
   3692 
   3693 // func encodeBlockAsm10B(dst []byte, src []byte) int
   3694 // Requires: BMI, SSE2
   3695 TEXT ·encodeBlockAsm10B(SB), $4120-56
   3696 	MOVQ dst_base+0(FP), AX
   3697 	MOVQ $0x00000020, CX
   3698 	LEAQ 24(SP), DX
   3699 	PXOR X0, X0
   3700 
   3701 zero_loop_encodeBlockAsm10B:
   3702 	MOVOU X0, (DX)
   3703 	MOVOU X0, 16(DX)
   3704 	MOVOU X0, 32(DX)
   3705 	MOVOU X0, 48(DX)
   3706 	MOVOU X0, 64(DX)
   3707 	MOVOU X0, 80(DX)
   3708 	MOVOU X0, 96(DX)
   3709 	MOVOU X0, 112(DX)
   3710 	ADDQ  $0x80, DX
   3711 	DECQ  CX
   3712 	JNZ   zero_loop_encodeBlockAsm10B
   3713 	MOVL  $0x00000000, 12(SP)
   3714 	MOVQ  src_len+32(FP), CX
   3715 	LEAQ  -9(CX), DX
   3716 	LEAQ  -8(CX), BX
   3717 	MOVL  BX, 8(SP)
   3718 	SHRQ  $0x05, CX
   3719 	SUBL  CX, DX
   3720 	LEAQ  (AX)(DX*1), DX
   3721 	MOVQ  DX, (SP)
   3722 	MOVL  $0x00000001, CX
   3723 	MOVL  CX, 16(SP)
   3724 	MOVQ  src_base+24(FP), DX
   3725 
   3726 search_loop_encodeBlockAsm10B:
   3727 	MOVL  CX, BX
   3728 	SUBL  12(SP), BX
   3729 	SHRL  $0x05, BX
   3730 	LEAL  4(CX)(BX*1), BX
   3731 	CMPL  BX, 8(SP)
   3732 	JAE   emit_remainder_encodeBlockAsm10B
   3733 	MOVQ  (DX)(CX*1), SI
   3734 	MOVL  BX, 20(SP)
   3735 	MOVQ  $0x9e3779b1, R8
   3736 	MOVQ  SI, R9
   3737 	MOVQ  SI, R10
   3738 	SHRQ  $0x08, R10
   3739 	SHLQ  $0x20, R9
   3740 	IMULQ R8, R9
   3741 	SHRQ  $0x36, R9
   3742 	SHLQ  $0x20, R10
   3743 	IMULQ R8, R10
   3744 	SHRQ  $0x36, R10
   3745 	MOVL  24(SP)(R9*4), BX
   3746 	MOVL  24(SP)(R10*4), DI
   3747 	MOVL  CX, 24(SP)(R9*4)
   3748 	LEAL  1(CX), R9
   3749 	MOVL  R9, 24(SP)(R10*4)
   3750 	MOVQ  SI, R9
   3751 	SHRQ  $0x10, R9
   3752 	SHLQ  $0x20, R9
   3753 	IMULQ R8, R9
   3754 	SHRQ  $0x36, R9
   3755 	MOVL  CX, R8
   3756 	SUBL  16(SP), R8
   3757 	MOVL  1(DX)(R8*1), R10
   3758 	MOVQ  SI, R8
   3759 	SHRQ  $0x08, R8
   3760 	CMPL  R8, R10
   3761 	JNE   no_repeat_found_encodeBlockAsm10B
   3762 	LEAL  1(CX), SI
   3763 	MOVL  12(SP), DI
   3764 	MOVL  SI, BX
   3765 	SUBL  16(SP), BX
   3766 	JZ    repeat_extend_back_end_encodeBlockAsm10B
   3767 
   3768 repeat_extend_back_loop_encodeBlockAsm10B:
   3769 	CMPL SI, DI
   3770 	JBE  repeat_extend_back_end_encodeBlockAsm10B
   3771 	MOVB -1(DX)(BX*1), R8
   3772 	MOVB -1(DX)(SI*1), R9
   3773 	CMPB R8, R9
   3774 	JNE  repeat_extend_back_end_encodeBlockAsm10B
   3775 	LEAL -1(SI), SI
   3776 	DECL BX
   3777 	JNZ  repeat_extend_back_loop_encodeBlockAsm10B
   3778 
   3779 repeat_extend_back_end_encodeBlockAsm10B:
   3780 	MOVL 12(SP), BX
   3781 	CMPL BX, SI
   3782 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
   3783 	MOVL SI, R8
   3784 	MOVL SI, 12(SP)
   3785 	LEAQ (DX)(BX*1), R9
   3786 	SUBL BX, R8
   3787 	LEAL -1(R8), BX
   3788 	CMPL BX, $0x3c
   3789 	JB   one_byte_repeat_emit_encodeBlockAsm10B
   3790 	CMPL BX, $0x00000100
   3791 	JB   two_bytes_repeat_emit_encodeBlockAsm10B
   3792 	JB   three_bytes_repeat_emit_encodeBlockAsm10B
   3793 
   3794 three_bytes_repeat_emit_encodeBlockAsm10B:
   3795 	MOVB $0xf4, (AX)
   3796 	MOVW BX, 1(AX)
   3797 	ADDQ $0x03, AX
   3798 	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
   3799 
   3800 two_bytes_repeat_emit_encodeBlockAsm10B:
   3801 	MOVB $0xf0, (AX)
   3802 	MOVB BL, 1(AX)
   3803 	ADDQ $0x02, AX
   3804 	CMPL BX, $0x40
   3805 	JB   memmove_repeat_emit_encodeBlockAsm10B
   3806 	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
   3807 
   3808 one_byte_repeat_emit_encodeBlockAsm10B:
   3809 	SHLB $0x02, BL
   3810 	MOVB BL, (AX)
   3811 	ADDQ $0x01, AX
   3812 
   3813 memmove_repeat_emit_encodeBlockAsm10B:
   3814 	LEAQ (AX)(R8*1), BX
   3815 
   3816 	// genMemMoveShort
   3817 	CMPQ R8, $0x08
   3818 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
   3819 	CMPQ R8, $0x10
   3820 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
   3821 	CMPQ R8, $0x20
   3822 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
   3823 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
   3824 
   3825 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
   3826 	MOVQ (R9), R10
   3827 	MOVQ R10, (AX)
   3828 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
   3829 
   3830 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
   3831 	MOVQ (R9), R10
   3832 	MOVQ -8(R9)(R8*1), R9
   3833 	MOVQ R10, (AX)
   3834 	MOVQ R9, -8(AX)(R8*1)
   3835 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
   3836 
   3837 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
   3838 	MOVOU (R9), X0
   3839 	MOVOU -16(R9)(R8*1), X1
   3840 	MOVOU X0, (AX)
   3841 	MOVOU X1, -16(AX)(R8*1)
   3842 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B
   3843 
   3844 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
   3845 	MOVOU (R9), X0
   3846 	MOVOU 16(R9), X1
   3847 	MOVOU -32(R9)(R8*1), X2
   3848 	MOVOU -16(R9)(R8*1), X3
   3849 	MOVOU X0, (AX)
   3850 	MOVOU X1, 16(AX)
   3851 	MOVOU X2, -32(AX)(R8*1)
   3852 	MOVOU X3, -16(AX)(R8*1)
   3853 
   3854 memmove_end_copy_repeat_emit_encodeBlockAsm10B:
   3855 	MOVQ BX, AX
   3856 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B
   3857 
   3858 memmove_long_repeat_emit_encodeBlockAsm10B:
   3859 	LEAQ (AX)(R8*1), BX
   3860 
   3861 	// genMemMoveLong
   3862 	MOVOU (R9), X0
   3863 	MOVOU 16(R9), X1
   3864 	MOVOU -32(R9)(R8*1), X2
   3865 	MOVOU -16(R9)(R8*1), X3
   3866 	MOVQ  R8, R11
   3867 	SHRQ  $0x05, R11
   3868 	MOVQ  AX, R10
   3869 	ANDL  $0x0000001f, R10
   3870 	MOVQ  $0x00000040, R12
   3871 	SUBQ  R10, R12
   3872 	DECQ  R11
   3873 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
   3874 	LEAQ  -32(R9)(R12*1), R10
   3875 	LEAQ  -32(AX)(R12*1), R13
   3876 
   3877 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
   3878 	MOVOU (R10), X4
   3879 	MOVOU 16(R10), X5
   3880 	MOVOA X4, (R13)
   3881 	MOVOA X5, 16(R13)
   3882 	ADDQ  $0x20, R13
   3883 	ADDQ  $0x20, R10
   3884 	ADDQ  $0x20, R12
   3885 	DECQ  R11
   3886 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
   3887 
   3888 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
   3889 	MOVOU -32(R9)(R12*1), X4
   3890 	MOVOU -16(R9)(R12*1), X5
   3891 	MOVOA X4, -32(AX)(R12*1)
   3892 	MOVOA X5, -16(AX)(R12*1)
   3893 	ADDQ  $0x20, R12
   3894 	CMPQ  R8, R12
   3895 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
   3896 	MOVOU X0, (AX)
   3897 	MOVOU X1, 16(AX)
   3898 	MOVOU X2, -32(AX)(R8*1)
   3899 	MOVOU X3, -16(AX)(R8*1)
   3900 	MOVQ  BX, AX
   3901 
   3902 emit_literal_done_repeat_emit_encodeBlockAsm10B:
   3903 	ADDL $0x05, CX
   3904 	MOVL CX, BX
   3905 	SUBL 16(SP), BX
   3906 	MOVQ src_len+32(FP), R8
   3907 	SUBL CX, R8
   3908 	LEAQ (DX)(CX*1), R9
   3909 	LEAQ (DX)(BX*1), BX
   3910 
   3911 	// matchLen
   3912 	XORL R11, R11
   3913 	CMPL R8, $0x08
   3914 	JB   matchlen_match4_repeat_extend_encodeBlockAsm10B
   3915 
   3916 matchlen_loopback_repeat_extend_encodeBlockAsm10B:
   3917 	MOVQ  (R9)(R11*1), R10
   3918 	XORQ  (BX)(R11*1), R10
   3919 	TESTQ R10, R10
   3920 	JZ    matchlen_loop_repeat_extend_encodeBlockAsm10B
   3921 
   3922 #ifdef GOAMD64_v3
   3923 	TZCNTQ R10, R10
   3924 
   3925 #else
   3926 	BSFQ R10, R10
   3927 
   3928 #endif
   3929 	SARQ $0x03, R10
   3930 	LEAL (R11)(R10*1), R11
   3931 	JMP  repeat_extend_forward_end_encodeBlockAsm10B
   3932 
   3933 matchlen_loop_repeat_extend_encodeBlockAsm10B:
   3934 	LEAL -8(R8), R8
   3935 	LEAL 8(R11), R11
   3936 	CMPL R8, $0x08
   3937 	JAE  matchlen_loopback_repeat_extend_encodeBlockAsm10B
   3938 	JZ   repeat_extend_forward_end_encodeBlockAsm10B
   3939 
   3940 matchlen_match4_repeat_extend_encodeBlockAsm10B:
   3941 	CMPL R8, $0x04
   3942 	JB   matchlen_match2_repeat_extend_encodeBlockAsm10B
   3943 	MOVL (R9)(R11*1), R10
   3944 	CMPL (BX)(R11*1), R10
   3945 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm10B
   3946 	SUBL $0x04, R8
   3947 	LEAL 4(R11), R11
   3948 
   3949 matchlen_match2_repeat_extend_encodeBlockAsm10B:
   3950 	CMPL R8, $0x02
   3951 	JB   matchlen_match1_repeat_extend_encodeBlockAsm10B
   3952 	MOVW (R9)(R11*1), R10
   3953 	CMPW (BX)(R11*1), R10
   3954 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm10B
   3955 	SUBL $0x02, R8
   3956 	LEAL 2(R11), R11
   3957 
   3958 matchlen_match1_repeat_extend_encodeBlockAsm10B:
   3959 	CMPL R8, $0x01
   3960 	JB   repeat_extend_forward_end_encodeBlockAsm10B
   3961 	MOVB (R9)(R11*1), R10
   3962 	CMPB (BX)(R11*1), R10
   3963 	JNE  repeat_extend_forward_end_encodeBlockAsm10B
   3964 	LEAL 1(R11), R11
   3965 
   3966 repeat_extend_forward_end_encodeBlockAsm10B:
   3967 	ADDL  R11, CX
   3968 	MOVL  CX, BX
   3969 	SUBL  SI, BX
   3970 	MOVL  16(SP), SI
   3971 	TESTL DI, DI
   3972 	JZ    repeat_as_copy_encodeBlockAsm10B
   3973 
   3974 	// emitRepeat
   3975 	MOVL BX, DI
   3976 	LEAL -4(BX), BX
   3977 	CMPL DI, $0x08
   3978 	JBE  repeat_two_match_repeat_encodeBlockAsm10B
   3979 	CMPL DI, $0x0c
   3980 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
   3981 	CMPL SI, $0x00000800
   3982 	JB   repeat_two_offset_match_repeat_encodeBlockAsm10B
   3983 
   3984 cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
   3985 	CMPL BX, $0x00000104
   3986 	JB   repeat_three_match_repeat_encodeBlockAsm10B
   3987 	LEAL -256(BX), BX
   3988 	MOVW $0x0019, (AX)
   3989 	MOVW BX, 2(AX)
   3990 	ADDQ $0x04, AX
   3991 	JMP  repeat_end_emit_encodeBlockAsm10B
   3992 
   3993 repeat_three_match_repeat_encodeBlockAsm10B:
   3994 	LEAL -4(BX), BX
   3995 	MOVW $0x0015, (AX)
   3996 	MOVB BL, 2(AX)
   3997 	ADDQ $0x03, AX
   3998 	JMP  repeat_end_emit_encodeBlockAsm10B
   3999 
   4000 repeat_two_match_repeat_encodeBlockAsm10B:
   4001 	SHLL $0x02, BX
   4002 	ORL  $0x01, BX
   4003 	MOVW BX, (AX)
   4004 	ADDQ $0x02, AX
   4005 	JMP  repeat_end_emit_encodeBlockAsm10B
   4006 
   4007 repeat_two_offset_match_repeat_encodeBlockAsm10B:
   4008 	XORQ DI, DI
   4009 	LEAL 1(DI)(BX*4), BX
   4010 	MOVB SI, 1(AX)
   4011 	SARL $0x08, SI
   4012 	SHLL $0x05, SI
   4013 	ORL  SI, BX
   4014 	MOVB BL, (AX)
   4015 	ADDQ $0x02, AX
   4016 	JMP  repeat_end_emit_encodeBlockAsm10B
   4017 
   4018 repeat_as_copy_encodeBlockAsm10B:
   4019 	// emitCopy
   4020 	CMPL BX, $0x40
   4021 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
   4022 	CMPL SI, $0x00000800
   4023 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm10B
   4024 	MOVL $0x00000001, DI
   4025 	LEAL 16(DI), DI
   4026 	MOVB SI, 1(AX)
   4027 	SHRL $0x08, SI
   4028 	SHLL $0x05, SI
   4029 	ORL  SI, DI
   4030 	MOVB DI, (AX)
   4031 	ADDQ $0x02, AX
   4032 	SUBL $0x08, BX
   4033 
   4034 	// emitRepeat
   4035 	LEAL -4(BX), BX
   4036 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
   4037 	MOVL BX, DI
   4038 	LEAL -4(BX), BX
   4039 	CMPL DI, $0x08
   4040 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
   4041 	CMPL DI, $0x0c
   4042 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
   4043 	CMPL SI, $0x00000800
   4044 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
   4045 
   4046 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
   4047 	CMPL BX, $0x00000104
   4048 	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
   4049 	LEAL -256(BX), BX
   4050 	MOVW $0x0019, (AX)
   4051 	MOVW BX, 2(AX)
   4052 	ADDQ $0x04, AX
   4053 	JMP  repeat_end_emit_encodeBlockAsm10B
   4054 
   4055 repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
   4056 	LEAL -4(BX), BX
   4057 	MOVW $0x0015, (AX)
   4058 	MOVB BL, 2(AX)
   4059 	ADDQ $0x03, AX
   4060 	JMP  repeat_end_emit_encodeBlockAsm10B
   4061 
   4062 repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
   4063 	SHLL $0x02, BX
   4064 	ORL  $0x01, BX
   4065 	MOVW BX, (AX)
   4066 	ADDQ $0x02, AX
   4067 	JMP  repeat_end_emit_encodeBlockAsm10B
   4068 
   4069 repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
   4070 	XORQ DI, DI
   4071 	LEAL 1(DI)(BX*4), BX
   4072 	MOVB SI, 1(AX)
   4073 	SARL $0x08, SI
   4074 	SHLL $0x05, SI
   4075 	ORL  SI, BX
   4076 	MOVB BL, (AX)
   4077 	ADDQ $0x02, AX
   4078 	JMP  repeat_end_emit_encodeBlockAsm10B
   4079 
   4080 long_offset_short_repeat_as_copy_encodeBlockAsm10B:
   4081 	MOVB $0xee, (AX)
   4082 	MOVW SI, 1(AX)
   4083 	LEAL -60(BX), BX
   4084 	ADDQ $0x03, AX
   4085 
   4086 	// emitRepeat
   4087 	MOVL BX, DI
   4088 	LEAL -4(BX), BX
   4089 	CMPL DI, $0x08
   4090 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
   4091 	CMPL DI, $0x0c
   4092 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
   4093 	CMPL SI, $0x00000800
   4094 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
   4095 
   4096 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
   4097 	CMPL BX, $0x00000104
   4098 	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
   4099 	LEAL -256(BX), BX
   4100 	MOVW $0x0019, (AX)
   4101 	MOVW BX, 2(AX)
   4102 	ADDQ $0x04, AX
   4103 	JMP  repeat_end_emit_encodeBlockAsm10B
   4104 
   4105 repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
   4106 	LEAL -4(BX), BX
   4107 	MOVW $0x0015, (AX)
   4108 	MOVB BL, 2(AX)
   4109 	ADDQ $0x03, AX
   4110 	JMP  repeat_end_emit_encodeBlockAsm10B
   4111 
   4112 repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
   4113 	SHLL $0x02, BX
   4114 	ORL  $0x01, BX
   4115 	MOVW BX, (AX)
   4116 	ADDQ $0x02, AX
   4117 	JMP  repeat_end_emit_encodeBlockAsm10B
   4118 
   4119 repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
   4120 	XORQ DI, DI
   4121 	LEAL 1(DI)(BX*4), BX
   4122 	MOVB SI, 1(AX)
   4123 	SARL $0x08, SI
   4124 	SHLL $0x05, SI
   4125 	ORL  SI, BX
   4126 	MOVB BL, (AX)
   4127 	ADDQ $0x02, AX
   4128 	JMP  repeat_end_emit_encodeBlockAsm10B
   4129 
   4130 two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
   4131 	MOVL BX, DI
   4132 	SHLL $0x02, DI
   4133 	CMPL BX, $0x0c
   4134 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
   4135 	CMPL SI, $0x00000800
   4136 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
   4137 	LEAL -15(DI), DI
   4138 	MOVB SI, 1(AX)
   4139 	SHRL $0x08, SI
   4140 	SHLL $0x05, SI
   4141 	ORL  SI, DI
   4142 	MOVB DI, (AX)
   4143 	ADDQ $0x02, AX
   4144 	JMP  repeat_end_emit_encodeBlockAsm10B
   4145 
   4146 emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
   4147 	LEAL -2(DI), DI
   4148 	MOVB DI, (AX)
   4149 	MOVW SI, 1(AX)
   4150 	ADDQ $0x03, AX
   4151 
   4152 repeat_end_emit_encodeBlockAsm10B:
   4153 	MOVL CX, 12(SP)
   4154 	JMP  search_loop_encodeBlockAsm10B
   4155 
   4156 no_repeat_found_encodeBlockAsm10B:
   4157 	CMPL (DX)(BX*1), SI
   4158 	JEQ  candidate_match_encodeBlockAsm10B
   4159 	SHRQ $0x08, SI
   4160 	MOVL 24(SP)(R9*4), BX
   4161 	LEAL 2(CX), R8
   4162 	CMPL (DX)(DI*1), SI
   4163 	JEQ  candidate2_match_encodeBlockAsm10B
   4164 	MOVL R8, 24(SP)(R9*4)
   4165 	SHRQ $0x08, SI
   4166 	CMPL (DX)(BX*1), SI
   4167 	JEQ  candidate3_match_encodeBlockAsm10B
   4168 	MOVL 20(SP), CX
   4169 	JMP  search_loop_encodeBlockAsm10B
   4170 
   4171 candidate3_match_encodeBlockAsm10B:
   4172 	ADDL $0x02, CX
   4173 	JMP  candidate_match_encodeBlockAsm10B
   4174 
   4175 candidate2_match_encodeBlockAsm10B:
   4176 	MOVL R8, 24(SP)(R9*4)
   4177 	INCL CX
   4178 	MOVL DI, BX
   4179 
   4180 candidate_match_encodeBlockAsm10B:
   4181 	MOVL  12(SP), SI
   4182 	TESTL BX, BX
   4183 	JZ    match_extend_back_end_encodeBlockAsm10B
   4184 
   4185 match_extend_back_loop_encodeBlockAsm10B:
   4186 	CMPL CX, SI
   4187 	JBE  match_extend_back_end_encodeBlockAsm10B
   4188 	MOVB -1(DX)(BX*1), DI
   4189 	MOVB -1(DX)(CX*1), R8
   4190 	CMPB DI, R8
   4191 	JNE  match_extend_back_end_encodeBlockAsm10B
   4192 	LEAL -1(CX), CX
   4193 	DECL BX
   4194 	JZ   match_extend_back_end_encodeBlockAsm10B
   4195 	JMP  match_extend_back_loop_encodeBlockAsm10B
   4196 
   4197 match_extend_back_end_encodeBlockAsm10B:
   4198 	MOVL CX, SI
   4199 	SUBL 12(SP), SI
   4200 	LEAQ 3(AX)(SI*1), SI
   4201 	CMPQ SI, (SP)
   4202 	JB   match_dst_size_check_encodeBlockAsm10B
   4203 	MOVQ $0x00000000, ret+48(FP)
   4204 	RET
   4205 
   4206 match_dst_size_check_encodeBlockAsm10B:
   4207 	MOVL CX, SI
   4208 	MOVL 12(SP), DI
   4209 	CMPL DI, SI
   4210 	JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
   4211 	MOVL SI, R8
   4212 	MOVL SI, 12(SP)
   4213 	LEAQ (DX)(DI*1), SI
   4214 	SUBL DI, R8
   4215 	LEAL -1(R8), DI
   4216 	CMPL DI, $0x3c
   4217 	JB   one_byte_match_emit_encodeBlockAsm10B
   4218 	CMPL DI, $0x00000100
   4219 	JB   two_bytes_match_emit_encodeBlockAsm10B
   4220 	JB   three_bytes_match_emit_encodeBlockAsm10B
   4221 
   4222 three_bytes_match_emit_encodeBlockAsm10B:
   4223 	MOVB $0xf4, (AX)
   4224 	MOVW DI, 1(AX)
   4225 	ADDQ $0x03, AX
   4226 	JMP  memmove_long_match_emit_encodeBlockAsm10B
   4227 
   4228 two_bytes_match_emit_encodeBlockAsm10B:
   4229 	MOVB $0xf0, (AX)
   4230 	MOVB DI, 1(AX)
   4231 	ADDQ $0x02, AX
   4232 	CMPL DI, $0x40
   4233 	JB   memmove_match_emit_encodeBlockAsm10B
   4234 	JMP  memmove_long_match_emit_encodeBlockAsm10B
   4235 
   4236 one_byte_match_emit_encodeBlockAsm10B:
   4237 	SHLB $0x02, DI
   4238 	MOVB DI, (AX)
   4239 	ADDQ $0x01, AX
   4240 
   4241 memmove_match_emit_encodeBlockAsm10B:
   4242 	LEAQ (AX)(R8*1), DI
   4243 
   4244 	// genMemMoveShort
   4245 	CMPQ R8, $0x08
   4246 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
   4247 	CMPQ R8, $0x10
   4248 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
   4249 	CMPQ R8, $0x20
   4250 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
   4251 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
   4252 
   4253 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
   4254 	MOVQ (SI), R9
   4255 	MOVQ R9, (AX)
   4256 	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
   4257 
   4258 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
   4259 	MOVQ (SI), R9
   4260 	MOVQ -8(SI)(R8*1), SI
   4261 	MOVQ R9, (AX)
   4262 	MOVQ SI, -8(AX)(R8*1)
   4263 	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
   4264 
   4265 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
   4266 	MOVOU (SI), X0
   4267 	MOVOU -16(SI)(R8*1), X1
   4268 	MOVOU X0, (AX)
   4269 	MOVOU X1, -16(AX)(R8*1)
   4270 	JMP   memmove_end_copy_match_emit_encodeBlockAsm10B
   4271 
   4272 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
   4273 	MOVOU (SI), X0
   4274 	MOVOU 16(SI), X1
   4275 	MOVOU -32(SI)(R8*1), X2
   4276 	MOVOU -16(SI)(R8*1), X3
   4277 	MOVOU X0, (AX)
   4278 	MOVOU X1, 16(AX)
   4279 	MOVOU X2, -32(AX)(R8*1)
   4280 	MOVOU X3, -16(AX)(R8*1)
   4281 
   4282 memmove_end_copy_match_emit_encodeBlockAsm10B:
   4283 	MOVQ DI, AX
   4284 	JMP  emit_literal_done_match_emit_encodeBlockAsm10B
   4285 
   4286 memmove_long_match_emit_encodeBlockAsm10B:
   4287 	LEAQ (AX)(R8*1), DI
   4288 
   4289 	// genMemMoveLong
   4290 	MOVOU (SI), X0
   4291 	MOVOU 16(SI), X1
   4292 	MOVOU -32(SI)(R8*1), X2
   4293 	MOVOU -16(SI)(R8*1), X3
   4294 	MOVQ  R8, R10
   4295 	SHRQ  $0x05, R10
   4296 	MOVQ  AX, R9
   4297 	ANDL  $0x0000001f, R9
   4298 	MOVQ  $0x00000040, R11
   4299 	SUBQ  R9, R11
   4300 	DECQ  R10
   4301 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
   4302 	LEAQ  -32(SI)(R11*1), R9
   4303 	LEAQ  -32(AX)(R11*1), R12
   4304 
   4305 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
   4306 	MOVOU (R9), X4
   4307 	MOVOU 16(R9), X5
   4308 	MOVOA X4, (R12)
   4309 	MOVOA X5, 16(R12)
   4310 	ADDQ  $0x20, R12
   4311 	ADDQ  $0x20, R9
   4312 	ADDQ  $0x20, R11
   4313 	DECQ  R10
   4314 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
   4315 
   4316 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
   4317 	MOVOU -32(SI)(R11*1), X4
   4318 	MOVOU -16(SI)(R11*1), X5
   4319 	MOVOA X4, -32(AX)(R11*1)
   4320 	MOVOA X5, -16(AX)(R11*1)
   4321 	ADDQ  $0x20, R11
   4322 	CMPQ  R8, R11
   4323 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
   4324 	MOVOU X0, (AX)
   4325 	MOVOU X1, 16(AX)
   4326 	MOVOU X2, -32(AX)(R8*1)
   4327 	MOVOU X3, -16(AX)(R8*1)
   4328 	MOVQ  DI, AX
   4329 
   4330 emit_literal_done_match_emit_encodeBlockAsm10B:
   4331 match_nolit_loop_encodeBlockAsm10B:
   4332 	MOVL CX, SI
   4333 	SUBL BX, SI
   4334 	MOVL SI, 16(SP)
   4335 	ADDL $0x04, CX
   4336 	ADDL $0x04, BX
   4337 	MOVQ src_len+32(FP), SI
   4338 	SUBL CX, SI
   4339 	LEAQ (DX)(CX*1), DI
   4340 	LEAQ (DX)(BX*1), BX
   4341 
   4342 	// matchLen
   4343 	XORL R9, R9
   4344 	CMPL SI, $0x08
   4345 	JB   matchlen_match4_match_nolit_encodeBlockAsm10B
   4346 
   4347 matchlen_loopback_match_nolit_encodeBlockAsm10B:
   4348 	MOVQ  (DI)(R9*1), R8
   4349 	XORQ  (BX)(R9*1), R8
   4350 	TESTQ R8, R8
   4351 	JZ    matchlen_loop_match_nolit_encodeBlockAsm10B
   4352 
   4353 #ifdef GOAMD64_v3
   4354 	TZCNTQ R8, R8
   4355 
   4356 #else
   4357 	BSFQ R8, R8
   4358 
   4359 #endif
   4360 	SARQ $0x03, R8
   4361 	LEAL (R9)(R8*1), R9
   4362 	JMP  match_nolit_end_encodeBlockAsm10B
   4363 
   4364 matchlen_loop_match_nolit_encodeBlockAsm10B:
   4365 	LEAL -8(SI), SI
   4366 	LEAL 8(R9), R9
   4367 	CMPL SI, $0x08
   4368 	JAE  matchlen_loopback_match_nolit_encodeBlockAsm10B
   4369 	JZ   match_nolit_end_encodeBlockAsm10B
   4370 
   4371 matchlen_match4_match_nolit_encodeBlockAsm10B:
   4372 	CMPL SI, $0x04
   4373 	JB   matchlen_match2_match_nolit_encodeBlockAsm10B
   4374 	MOVL (DI)(R9*1), R8
   4375 	CMPL (BX)(R9*1), R8
   4376 	JNE  matchlen_match2_match_nolit_encodeBlockAsm10B
   4377 	SUBL $0x04, SI
   4378 	LEAL 4(R9), R9
   4379 
   4380 matchlen_match2_match_nolit_encodeBlockAsm10B:
   4381 	CMPL SI, $0x02
   4382 	JB   matchlen_match1_match_nolit_encodeBlockAsm10B
   4383 	MOVW (DI)(R9*1), R8
   4384 	CMPW (BX)(R9*1), R8
   4385 	JNE  matchlen_match1_match_nolit_encodeBlockAsm10B
   4386 	SUBL $0x02, SI
   4387 	LEAL 2(R9), R9
   4388 
   4389 matchlen_match1_match_nolit_encodeBlockAsm10B:
   4390 	CMPL SI, $0x01
   4391 	JB   match_nolit_end_encodeBlockAsm10B
   4392 	MOVB (DI)(R9*1), R8
   4393 	CMPB (BX)(R9*1), R8
   4394 	JNE  match_nolit_end_encodeBlockAsm10B
   4395 	LEAL 1(R9), R9
   4396 
   4397 match_nolit_end_encodeBlockAsm10B:
   4398 	ADDL R9, CX
   4399 	MOVL 16(SP), BX
   4400 	ADDL $0x04, R9
   4401 	MOVL CX, 12(SP)
   4402 
   4403 	// emitCopy
   4404 	CMPL R9, $0x40
   4405 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
   4406 	CMPL BX, $0x00000800
   4407 	JAE  long_offset_short_match_nolit_encodeBlockAsm10B
   4408 	MOVL $0x00000001, SI
   4409 	LEAL 16(SI), SI
   4410 	MOVB BL, 1(AX)
   4411 	SHRL $0x08, BX
   4412 	SHLL $0x05, BX
   4413 	ORL  BX, SI
   4414 	MOVB SI, (AX)
   4415 	ADDQ $0x02, AX
   4416 	SUBL $0x08, R9
   4417 
   4418 	// emitRepeat
   4419 	LEAL -4(R9), R9
   4420 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
   4421 	MOVL R9, SI
   4422 	LEAL -4(R9), R9
   4423 	CMPL SI, $0x08
   4424 	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
   4425 	CMPL SI, $0x0c
   4426 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
   4427 	CMPL BX, $0x00000800
   4428 	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
   4429 
   4430 cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
   4431 	CMPL R9, $0x00000104
   4432 	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
   4433 	LEAL -256(R9), R9
   4434 	MOVW $0x0019, (AX)
   4435 	MOVW R9, 2(AX)
   4436 	ADDQ $0x04, AX
   4437 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4438 
   4439 repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
   4440 	LEAL -4(R9), R9
   4441 	MOVW $0x0015, (AX)
   4442 	MOVB R9, 2(AX)
   4443 	ADDQ $0x03, AX
   4444 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4445 
   4446 repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
   4447 	SHLL $0x02, R9
   4448 	ORL  $0x01, R9
   4449 	MOVW R9, (AX)
   4450 	ADDQ $0x02, AX
   4451 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4452 
   4453 repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
   4454 	XORQ SI, SI
   4455 	LEAL 1(SI)(R9*4), R9
   4456 	MOVB BL, 1(AX)
   4457 	SARL $0x08, BX
   4458 	SHLL $0x05, BX
   4459 	ORL  BX, R9
   4460 	MOVB R9, (AX)
   4461 	ADDQ $0x02, AX
   4462 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4463 
   4464 long_offset_short_match_nolit_encodeBlockAsm10B:
   4465 	MOVB $0xee, (AX)
   4466 	MOVW BX, 1(AX)
   4467 	LEAL -60(R9), R9
   4468 	ADDQ $0x03, AX
   4469 
   4470 	// emitRepeat
   4471 	MOVL R9, SI
   4472 	LEAL -4(R9), R9
   4473 	CMPL SI, $0x08
   4474 	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
   4475 	CMPL SI, $0x0c
   4476 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
   4477 	CMPL BX, $0x00000800
   4478 	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
   4479 
   4480 cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
   4481 	CMPL R9, $0x00000104
   4482 	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
   4483 	LEAL -256(R9), R9
   4484 	MOVW $0x0019, (AX)
   4485 	MOVW R9, 2(AX)
   4486 	ADDQ $0x04, AX
   4487 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4488 
   4489 repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
   4490 	LEAL -4(R9), R9
   4491 	MOVW $0x0015, (AX)
   4492 	MOVB R9, 2(AX)
   4493 	ADDQ $0x03, AX
   4494 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4495 
   4496 repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
   4497 	SHLL $0x02, R9
   4498 	ORL  $0x01, R9
   4499 	MOVW R9, (AX)
   4500 	ADDQ $0x02, AX
   4501 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4502 
   4503 repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
   4504 	XORQ SI, SI
   4505 	LEAL 1(SI)(R9*4), R9
   4506 	MOVB BL, 1(AX)
   4507 	SARL $0x08, BX
   4508 	SHLL $0x05, BX
   4509 	ORL  BX, R9
   4510 	MOVB R9, (AX)
   4511 	ADDQ $0x02, AX
   4512 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4513 
   4514 two_byte_offset_short_match_nolit_encodeBlockAsm10B:
   4515 	MOVL R9, SI
   4516 	SHLL $0x02, SI
   4517 	CMPL R9, $0x0c
   4518 	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
   4519 	CMPL BX, $0x00000800
   4520 	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
   4521 	LEAL -15(SI), SI
   4522 	MOVB BL, 1(AX)
   4523 	SHRL $0x08, BX
   4524 	SHLL $0x05, BX
   4525 	ORL  BX, SI
   4526 	MOVB SI, (AX)
   4527 	ADDQ $0x02, AX
   4528 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
   4529 
   4530 emit_copy_three_match_nolit_encodeBlockAsm10B:
   4531 	LEAL -2(SI), SI
   4532 	MOVB SI, (AX)
   4533 	MOVW BX, 1(AX)
   4534 	ADDQ $0x03, AX
   4535 
   4536 match_nolit_emitcopy_end_encodeBlockAsm10B:
   4537 	CMPL CX, 8(SP)
   4538 	JAE  emit_remainder_encodeBlockAsm10B
   4539 	MOVQ -2(DX)(CX*1), SI
   4540 	CMPQ AX, (SP)
   4541 	JB   match_nolit_dst_ok_encodeBlockAsm10B
   4542 	MOVQ $0x00000000, ret+48(FP)
   4543 	RET
   4544 
   4545 match_nolit_dst_ok_encodeBlockAsm10B:
   4546 	MOVQ  $0x9e3779b1, R8
   4547 	MOVQ  SI, DI
   4548 	SHRQ  $0x10, SI
   4549 	MOVQ  SI, BX
   4550 	SHLQ  $0x20, DI
   4551 	IMULQ R8, DI
   4552 	SHRQ  $0x36, DI
   4553 	SHLQ  $0x20, BX
   4554 	IMULQ R8, BX
   4555 	SHRQ  $0x36, BX
   4556 	LEAL  -2(CX), R8
   4557 	LEAQ  24(SP)(BX*4), R9
   4558 	MOVL  (R9), BX
   4559 	MOVL  R8, 24(SP)(DI*4)
   4560 	MOVL  CX, (R9)
   4561 	CMPL  (DX)(BX*1), SI
   4562 	JEQ   match_nolit_loop_encodeBlockAsm10B
   4563 	INCL  CX
   4564 	JMP   search_loop_encodeBlockAsm10B
   4565 
   4566 emit_remainder_encodeBlockAsm10B:
   4567 	MOVQ src_len+32(FP), CX
   4568 	SUBL 12(SP), CX
   4569 	LEAQ 3(AX)(CX*1), CX
   4570 	CMPQ CX, (SP)
   4571 	JB   emit_remainder_ok_encodeBlockAsm10B
   4572 	MOVQ $0x00000000, ret+48(FP)
   4573 	RET
   4574 
   4575 emit_remainder_ok_encodeBlockAsm10B:
   4576 	MOVQ src_len+32(FP), CX
   4577 	MOVL 12(SP), BX
   4578 	CMPL BX, CX
   4579 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
   4580 	MOVL CX, SI
   4581 	MOVL CX, 12(SP)
   4582 	LEAQ (DX)(BX*1), CX
   4583 	SUBL BX, SI
   4584 	LEAL -1(SI), DX
   4585 	CMPL DX, $0x3c
   4586 	JB   one_byte_emit_remainder_encodeBlockAsm10B
   4587 	CMPL DX, $0x00000100
   4588 	JB   two_bytes_emit_remainder_encodeBlockAsm10B
   4589 	JB   three_bytes_emit_remainder_encodeBlockAsm10B
   4590 
   4591 three_bytes_emit_remainder_encodeBlockAsm10B:
   4592 	MOVB $0xf4, (AX)
   4593 	MOVW DX, 1(AX)
   4594 	ADDQ $0x03, AX
   4595 	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
   4596 
   4597 two_bytes_emit_remainder_encodeBlockAsm10B:
   4598 	MOVB $0xf0, (AX)
   4599 	MOVB DL, 1(AX)
   4600 	ADDQ $0x02, AX
   4601 	CMPL DX, $0x40
   4602 	JB   memmove_emit_remainder_encodeBlockAsm10B
   4603 	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
   4604 
   4605 one_byte_emit_remainder_encodeBlockAsm10B:
   4606 	SHLB $0x02, DL
   4607 	MOVB DL, (AX)
   4608 	ADDQ $0x01, AX
   4609 
   4610 memmove_emit_remainder_encodeBlockAsm10B:
   4611 	LEAQ (AX)(SI*1), DX
   4612 	MOVL SI, BX
   4613 
   4614 	// genMemMoveShort
   4615 	CMPQ BX, $0x03
   4616 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
   4617 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
   4618 	CMPQ BX, $0x08
   4619 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
   4620 	CMPQ BX, $0x10
   4621 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
   4622 	CMPQ BX, $0x20
   4623 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
   4624 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
   4625 
   4626 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
   4627 	MOVB (CX), SI
   4628 	MOVB -1(CX)(BX*1), CL
   4629 	MOVB SI, (AX)
   4630 	MOVB CL, -1(AX)(BX*1)
   4631 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
   4632 
   4633 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
   4634 	MOVW (CX), SI
   4635 	MOVB 2(CX), CL
   4636 	MOVW SI, (AX)
   4637 	MOVB CL, 2(AX)
   4638 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
   4639 
   4640 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
   4641 	MOVL (CX), SI
   4642 	MOVL -4(CX)(BX*1), CX
   4643 	MOVL SI, (AX)
   4644 	MOVL CX, -4(AX)(BX*1)
   4645 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
   4646 
   4647 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
   4648 	MOVQ (CX), SI
   4649 	MOVQ -8(CX)(BX*1), CX
   4650 	MOVQ SI, (AX)
   4651 	MOVQ CX, -8(AX)(BX*1)
   4652 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
   4653 
   4654 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
   4655 	MOVOU (CX), X0
   4656 	MOVOU -16(CX)(BX*1), X1
   4657 	MOVOU X0, (AX)
   4658 	MOVOU X1, -16(AX)(BX*1)
   4659 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B
   4660 
   4661 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
   4662 	MOVOU (CX), X0
   4663 	MOVOU 16(CX), X1
   4664 	MOVOU -32(CX)(BX*1), X2
   4665 	MOVOU -16(CX)(BX*1), X3
   4666 	MOVOU X0, (AX)
   4667 	MOVOU X1, 16(AX)
   4668 	MOVOU X2, -32(AX)(BX*1)
   4669 	MOVOU X3, -16(AX)(BX*1)
   4670 
   4671 memmove_end_copy_emit_remainder_encodeBlockAsm10B:
   4672 	MOVQ DX, AX
   4673 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B
   4674 
   4675 memmove_long_emit_remainder_encodeBlockAsm10B:
   4676 	LEAQ (AX)(SI*1), DX
   4677 	MOVL SI, BX
   4678 
   4679 	// genMemMoveLong
   4680 	MOVOU (CX), X0
   4681 	MOVOU 16(CX), X1
   4682 	MOVOU -32(CX)(BX*1), X2
   4683 	MOVOU -16(CX)(BX*1), X3
   4684 	MOVQ  BX, DI
   4685 	SHRQ  $0x05, DI
   4686 	MOVQ  AX, SI
   4687 	ANDL  $0x0000001f, SI
   4688 	MOVQ  $0x00000040, R8
   4689 	SUBQ  SI, R8
   4690 	DECQ  DI
   4691 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
   4692 	LEAQ  -32(CX)(R8*1), SI
   4693 	LEAQ  -32(AX)(R8*1), R9
   4694 
   4695 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
   4696 	MOVOU (SI), X4
   4697 	MOVOU 16(SI), X5
   4698 	MOVOA X4, (R9)
   4699 	MOVOA X5, 16(R9)
   4700 	ADDQ  $0x20, R9
   4701 	ADDQ  $0x20, SI
   4702 	ADDQ  $0x20, R8
   4703 	DECQ  DI
   4704 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
   4705 
   4706 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
   4707 	MOVOU -32(CX)(R8*1), X4
   4708 	MOVOU -16(CX)(R8*1), X5
   4709 	MOVOA X4, -32(AX)(R8*1)
   4710 	MOVOA X5, -16(AX)(R8*1)
   4711 	ADDQ  $0x20, R8
   4712 	CMPQ  BX, R8
   4713 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
   4714 	MOVOU X0, (AX)
   4715 	MOVOU X1, 16(AX)
   4716 	MOVOU X2, -32(AX)(BX*1)
   4717 	MOVOU X3, -16(AX)(BX*1)
   4718 	MOVQ  DX, AX
   4719 
   4720 emit_literal_done_emit_remainder_encodeBlockAsm10B:
   4721 	MOVQ dst_base+0(FP), CX
   4722 	SUBQ CX, AX
   4723 	MOVQ AX, ret+48(FP)
   4724 	RET
   4725 
   4726 // func encodeBlockAsm8B(dst []byte, src []byte) int
   4727 // Requires: BMI, SSE2
   4728 TEXT ·encodeBlockAsm8B(SB), $1048-56
   4729 	MOVQ dst_base+0(FP), AX
   4730 	MOVQ $0x00000008, CX
   4731 	LEAQ 24(SP), DX
   4732 	PXOR X0, X0
   4733 
   4734 zero_loop_encodeBlockAsm8B:
   4735 	MOVOU X0, (DX)
   4736 	MOVOU X0, 16(DX)
   4737 	MOVOU X0, 32(DX)
   4738 	MOVOU X0, 48(DX)
   4739 	MOVOU X0, 64(DX)
   4740 	MOVOU X0, 80(DX)
   4741 	MOVOU X0, 96(DX)
   4742 	MOVOU X0, 112(DX)
   4743 	ADDQ  $0x80, DX
   4744 	DECQ  CX
   4745 	JNZ   zero_loop_encodeBlockAsm8B
   4746 	MOVL  $0x00000000, 12(SP)
   4747 	MOVQ  src_len+32(FP), CX
   4748 	LEAQ  -9(CX), DX
   4749 	LEAQ  -8(CX), BX
   4750 	MOVL  BX, 8(SP)
   4751 	SHRQ  $0x05, CX
   4752 	SUBL  CX, DX
   4753 	LEAQ  (AX)(DX*1), DX
   4754 	MOVQ  DX, (SP)
   4755 	MOVL  $0x00000001, CX
   4756 	MOVL  CX, 16(SP)
   4757 	MOVQ  src_base+24(FP), DX
   4758 
   4759 search_loop_encodeBlockAsm8B:
   4760 	MOVL  CX, BX
   4761 	SUBL  12(SP), BX
   4762 	SHRL  $0x04, BX
   4763 	LEAL  4(CX)(BX*1), BX
   4764 	CMPL  BX, 8(SP)
   4765 	JAE   emit_remainder_encodeBlockAsm8B
   4766 	MOVQ  (DX)(CX*1), SI
   4767 	MOVL  BX, 20(SP)
   4768 	MOVQ  $0x9e3779b1, R8
   4769 	MOVQ  SI, R9
   4770 	MOVQ  SI, R10
   4771 	SHRQ  $0x08, R10
   4772 	SHLQ  $0x20, R9
   4773 	IMULQ R8, R9
   4774 	SHRQ  $0x38, R9
   4775 	SHLQ  $0x20, R10
   4776 	IMULQ R8, R10
   4777 	SHRQ  $0x38, R10
   4778 	MOVL  24(SP)(R9*4), BX
   4779 	MOVL  24(SP)(R10*4), DI
   4780 	MOVL  CX, 24(SP)(R9*4)
   4781 	LEAL  1(CX), R9
   4782 	MOVL  R9, 24(SP)(R10*4)
   4783 	MOVQ  SI, R9
   4784 	SHRQ  $0x10, R9
   4785 	SHLQ  $0x20, R9
   4786 	IMULQ R8, R9
   4787 	SHRQ  $0x38, R9
   4788 	MOVL  CX, R8
   4789 	SUBL  16(SP), R8
   4790 	MOVL  1(DX)(R8*1), R10
   4791 	MOVQ  SI, R8
   4792 	SHRQ  $0x08, R8
   4793 	CMPL  R8, R10
   4794 	JNE   no_repeat_found_encodeBlockAsm8B
   4795 	LEAL  1(CX), SI
   4796 	MOVL  12(SP), DI
   4797 	MOVL  SI, BX
   4798 	SUBL  16(SP), BX
   4799 	JZ    repeat_extend_back_end_encodeBlockAsm8B
   4800 
   4801 repeat_extend_back_loop_encodeBlockAsm8B:
   4802 	CMPL SI, DI
   4803 	JBE  repeat_extend_back_end_encodeBlockAsm8B
   4804 	MOVB -1(DX)(BX*1), R8
   4805 	MOVB -1(DX)(SI*1), R9
   4806 	CMPB R8, R9
   4807 	JNE  repeat_extend_back_end_encodeBlockAsm8B
   4808 	LEAL -1(SI), SI
   4809 	DECL BX
   4810 	JNZ  repeat_extend_back_loop_encodeBlockAsm8B
   4811 
   4812 repeat_extend_back_end_encodeBlockAsm8B:
   4813 	MOVL 12(SP), BX
   4814 	CMPL BX, SI
   4815 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
   4816 	MOVL SI, R8
   4817 	MOVL SI, 12(SP)
   4818 	LEAQ (DX)(BX*1), R9
   4819 	SUBL BX, R8
   4820 	LEAL -1(R8), BX
   4821 	CMPL BX, $0x3c
   4822 	JB   one_byte_repeat_emit_encodeBlockAsm8B
   4823 	CMPL BX, $0x00000100
   4824 	JB   two_bytes_repeat_emit_encodeBlockAsm8B
   4825 	JB   three_bytes_repeat_emit_encodeBlockAsm8B
   4826 
   4827 three_bytes_repeat_emit_encodeBlockAsm8B:
   4828 	MOVB $0xf4, (AX)
   4829 	MOVW BX, 1(AX)
   4830 	ADDQ $0x03, AX
   4831 	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
   4832 
   4833 two_bytes_repeat_emit_encodeBlockAsm8B:
   4834 	MOVB $0xf0, (AX)
   4835 	MOVB BL, 1(AX)
   4836 	ADDQ $0x02, AX
   4837 	CMPL BX, $0x40
   4838 	JB   memmove_repeat_emit_encodeBlockAsm8B
   4839 	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
   4840 
   4841 one_byte_repeat_emit_encodeBlockAsm8B:
   4842 	SHLB $0x02, BL
   4843 	MOVB BL, (AX)
   4844 	ADDQ $0x01, AX
   4845 
   4846 memmove_repeat_emit_encodeBlockAsm8B:
   4847 	LEAQ (AX)(R8*1), BX
   4848 
   4849 	// genMemMoveShort
   4850 	CMPQ R8, $0x08
   4851 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
   4852 	CMPQ R8, $0x10
   4853 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
   4854 	CMPQ R8, $0x20
   4855 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
   4856 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
   4857 
   4858 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
   4859 	MOVQ (R9), R10
   4860 	MOVQ R10, (AX)
   4861 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
   4862 
   4863 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
   4864 	MOVQ (R9), R10
   4865 	MOVQ -8(R9)(R8*1), R9
   4866 	MOVQ R10, (AX)
   4867 	MOVQ R9, -8(AX)(R8*1)
   4868 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
   4869 
   4870 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
   4871 	MOVOU (R9), X0
   4872 	MOVOU -16(R9)(R8*1), X1
   4873 	MOVOU X0, (AX)
   4874 	MOVOU X1, -16(AX)(R8*1)
   4875 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B
   4876 
   4877 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
   4878 	MOVOU (R9), X0
   4879 	MOVOU 16(R9), X1
   4880 	MOVOU -32(R9)(R8*1), X2
   4881 	MOVOU -16(R9)(R8*1), X3
   4882 	MOVOU X0, (AX)
   4883 	MOVOU X1, 16(AX)
   4884 	MOVOU X2, -32(AX)(R8*1)
   4885 	MOVOU X3, -16(AX)(R8*1)
   4886 
   4887 memmove_end_copy_repeat_emit_encodeBlockAsm8B:
   4888 	MOVQ BX, AX
   4889 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B
   4890 
   4891 memmove_long_repeat_emit_encodeBlockAsm8B:
   4892 	LEAQ (AX)(R8*1), BX
   4893 
   4894 	// genMemMoveLong
   4895 	MOVOU (R9), X0
   4896 	MOVOU 16(R9), X1
   4897 	MOVOU -32(R9)(R8*1), X2
   4898 	MOVOU -16(R9)(R8*1), X3
   4899 	MOVQ  R8, R11
   4900 	SHRQ  $0x05, R11
   4901 	MOVQ  AX, R10
   4902 	ANDL  $0x0000001f, R10
   4903 	MOVQ  $0x00000040, R12
   4904 	SUBQ  R10, R12
   4905 	DECQ  R11
   4906 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
   4907 	LEAQ  -32(R9)(R12*1), R10
   4908 	LEAQ  -32(AX)(R12*1), R13
   4909 
   4910 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
   4911 	MOVOU (R10), X4
   4912 	MOVOU 16(R10), X5
   4913 	MOVOA X4, (R13)
   4914 	MOVOA X5, 16(R13)
   4915 	ADDQ  $0x20, R13
   4916 	ADDQ  $0x20, R10
   4917 	ADDQ  $0x20, R12
   4918 	DECQ  R11
   4919 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
   4920 
   4921 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
   4922 	MOVOU -32(R9)(R12*1), X4
   4923 	MOVOU -16(R9)(R12*1), X5
   4924 	MOVOA X4, -32(AX)(R12*1)
   4925 	MOVOA X5, -16(AX)(R12*1)
   4926 	ADDQ  $0x20, R12
   4927 	CMPQ  R8, R12
   4928 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
   4929 	MOVOU X0, (AX)
   4930 	MOVOU X1, 16(AX)
   4931 	MOVOU X2, -32(AX)(R8*1)
   4932 	MOVOU X3, -16(AX)(R8*1)
   4933 	MOVQ  BX, AX
   4934 
   4935 emit_literal_done_repeat_emit_encodeBlockAsm8B:
   4936 	ADDL $0x05, CX
   4937 	MOVL CX, BX
   4938 	SUBL 16(SP), BX
   4939 	MOVQ src_len+32(FP), R8
   4940 	SUBL CX, R8
   4941 	LEAQ (DX)(CX*1), R9
   4942 	LEAQ (DX)(BX*1), BX
   4943 
   4944 	// matchLen
   4945 	XORL R11, R11
   4946 	CMPL R8, $0x08
   4947 	JB   matchlen_match4_repeat_extend_encodeBlockAsm8B
   4948 
   4949 matchlen_loopback_repeat_extend_encodeBlockAsm8B:
   4950 	MOVQ  (R9)(R11*1), R10
   4951 	XORQ  (BX)(R11*1), R10
   4952 	TESTQ R10, R10
   4953 	JZ    matchlen_loop_repeat_extend_encodeBlockAsm8B
   4954 
   4955 #ifdef GOAMD64_v3
   4956 	TZCNTQ R10, R10
   4957 
   4958 #else
   4959 	BSFQ R10, R10
   4960 
   4961 #endif
   4962 	SARQ $0x03, R10
   4963 	LEAL (R11)(R10*1), R11
   4964 	JMP  repeat_extend_forward_end_encodeBlockAsm8B
   4965 
   4966 matchlen_loop_repeat_extend_encodeBlockAsm8B:
   4967 	LEAL -8(R8), R8
   4968 	LEAL 8(R11), R11
   4969 	CMPL R8, $0x08
   4970 	JAE  matchlen_loopback_repeat_extend_encodeBlockAsm8B
   4971 	JZ   repeat_extend_forward_end_encodeBlockAsm8B
   4972 
   4973 matchlen_match4_repeat_extend_encodeBlockAsm8B:
   4974 	CMPL R8, $0x04
   4975 	JB   matchlen_match2_repeat_extend_encodeBlockAsm8B
   4976 	MOVL (R9)(R11*1), R10
   4977 	CMPL (BX)(R11*1), R10
   4978 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm8B
   4979 	SUBL $0x04, R8
   4980 	LEAL 4(R11), R11
   4981 
   4982 matchlen_match2_repeat_extend_encodeBlockAsm8B:
   4983 	CMPL R8, $0x02
   4984 	JB   matchlen_match1_repeat_extend_encodeBlockAsm8B
   4985 	MOVW (R9)(R11*1), R10
   4986 	CMPW (BX)(R11*1), R10
   4987 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm8B
   4988 	SUBL $0x02, R8
   4989 	LEAL 2(R11), R11
   4990 
   4991 matchlen_match1_repeat_extend_encodeBlockAsm8B:
   4992 	CMPL R8, $0x01
   4993 	JB   repeat_extend_forward_end_encodeBlockAsm8B
   4994 	MOVB (R9)(R11*1), R10
   4995 	CMPB (BX)(R11*1), R10
   4996 	JNE  repeat_extend_forward_end_encodeBlockAsm8B
   4997 	LEAL 1(R11), R11
   4998 
   4999 repeat_extend_forward_end_encodeBlockAsm8B:
   5000 	ADDL  R11, CX
   5001 	MOVL  CX, BX
   5002 	SUBL  SI, BX
   5003 	MOVL  16(SP), SI
   5004 	TESTL DI, DI
   5005 	JZ    repeat_as_copy_encodeBlockAsm8B
   5006 
   5007 	// emitRepeat
   5008 	MOVL BX, SI
   5009 	LEAL -4(BX), BX
   5010 	CMPL SI, $0x08
   5011 	JBE  repeat_two_match_repeat_encodeBlockAsm8B
   5012 	CMPL SI, $0x0c
   5013 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
   5014 
   5015 cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
   5016 	CMPL BX, $0x00000104
   5017 	JB   repeat_three_match_repeat_encodeBlockAsm8B
   5018 	LEAL -256(BX), BX
   5019 	MOVW $0x0019, (AX)
   5020 	MOVW BX, 2(AX)
   5021 	ADDQ $0x04, AX
   5022 	JMP  repeat_end_emit_encodeBlockAsm8B
   5023 
   5024 repeat_three_match_repeat_encodeBlockAsm8B:
   5025 	LEAL -4(BX), BX
   5026 	MOVW $0x0015, (AX)
   5027 	MOVB BL, 2(AX)
   5028 	ADDQ $0x03, AX
   5029 	JMP  repeat_end_emit_encodeBlockAsm8B
   5030 
   5031 repeat_two_match_repeat_encodeBlockAsm8B:
   5032 	SHLL $0x02, BX
   5033 	ORL  $0x01, BX
   5034 	MOVW BX, (AX)
   5035 	ADDQ $0x02, AX
   5036 	JMP  repeat_end_emit_encodeBlockAsm8B
   5037 	XORQ DI, DI
   5038 	LEAL 1(DI)(BX*4), BX
   5039 	MOVB SI, 1(AX)
   5040 	SARL $0x08, SI
   5041 	SHLL $0x05, SI
   5042 	ORL  SI, BX
   5043 	MOVB BL, (AX)
   5044 	ADDQ $0x02, AX
   5045 	JMP  repeat_end_emit_encodeBlockAsm8B
   5046 
   5047 repeat_as_copy_encodeBlockAsm8B:
   5048 	// emitCopy
   5049 	CMPL BX, $0x40
   5050 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
   5051 	CMPL SI, $0x00000800
   5052 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm8B
   5053 	MOVL $0x00000001, DI
   5054 	LEAL 16(DI), DI
   5055 	MOVB SI, 1(AX)
   5056 	SHRL $0x08, SI
   5057 	SHLL $0x05, SI
   5058 	ORL  SI, DI
   5059 	MOVB DI, (AX)
   5060 	ADDQ $0x02, AX
   5061 	SUBL $0x08, BX
   5062 
   5063 	// emitRepeat
   5064 	LEAL -4(BX), BX
   5065 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
   5066 	MOVL BX, SI
   5067 	LEAL -4(BX), BX
   5068 	CMPL SI, $0x08
   5069 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
   5070 	CMPL SI, $0x0c
   5071 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
   5072 
   5073 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
   5074 	CMPL BX, $0x00000104
   5075 	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
   5076 	LEAL -256(BX), BX
   5077 	MOVW $0x0019, (AX)
   5078 	MOVW BX, 2(AX)
   5079 	ADDQ $0x04, AX
   5080 	JMP  repeat_end_emit_encodeBlockAsm8B
   5081 
   5082 repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
   5083 	LEAL -4(BX), BX
   5084 	MOVW $0x0015, (AX)
   5085 	MOVB BL, 2(AX)
   5086 	ADDQ $0x03, AX
   5087 	JMP  repeat_end_emit_encodeBlockAsm8B
   5088 
   5089 repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
   5090 	SHLL $0x02, BX
   5091 	ORL  $0x01, BX
   5092 	MOVW BX, (AX)
   5093 	ADDQ $0x02, AX
   5094 	JMP  repeat_end_emit_encodeBlockAsm8B
   5095 	XORQ DI, DI
   5096 	LEAL 1(DI)(BX*4), BX
   5097 	MOVB SI, 1(AX)
   5098 	SARL $0x08, SI
   5099 	SHLL $0x05, SI
   5100 	ORL  SI, BX
   5101 	MOVB BL, (AX)
   5102 	ADDQ $0x02, AX
   5103 	JMP  repeat_end_emit_encodeBlockAsm8B
   5104 
   5105 long_offset_short_repeat_as_copy_encodeBlockAsm8B:
   5106 	MOVB $0xee, (AX)
   5107 	MOVW SI, 1(AX)
   5108 	LEAL -60(BX), BX
   5109 	ADDQ $0x03, AX
   5110 
   5111 	// emitRepeat
   5112 	MOVL BX, SI
   5113 	LEAL -4(BX), BX
   5114 	CMPL SI, $0x08
   5115 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
   5116 	CMPL SI, $0x0c
   5117 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
   5118 
   5119 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
   5120 	CMPL BX, $0x00000104
   5121 	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
   5122 	LEAL -256(BX), BX
   5123 	MOVW $0x0019, (AX)
   5124 	MOVW BX, 2(AX)
   5125 	ADDQ $0x04, AX
   5126 	JMP  repeat_end_emit_encodeBlockAsm8B
   5127 
   5128 repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
   5129 	LEAL -4(BX), BX
   5130 	MOVW $0x0015, (AX)
   5131 	MOVB BL, 2(AX)
   5132 	ADDQ $0x03, AX
   5133 	JMP  repeat_end_emit_encodeBlockAsm8B
   5134 
   5135 repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
   5136 	SHLL $0x02, BX
   5137 	ORL  $0x01, BX
   5138 	MOVW BX, (AX)
   5139 	ADDQ $0x02, AX
   5140 	JMP  repeat_end_emit_encodeBlockAsm8B
   5141 	XORQ DI, DI
   5142 	LEAL 1(DI)(BX*4), BX
   5143 	MOVB SI, 1(AX)
   5144 	SARL $0x08, SI
   5145 	SHLL $0x05, SI
   5146 	ORL  SI, BX
   5147 	MOVB BL, (AX)
   5148 	ADDQ $0x02, AX
   5149 	JMP  repeat_end_emit_encodeBlockAsm8B
   5150 
   5151 two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
   5152 	MOVL BX, DI
   5153 	SHLL $0x02, DI
   5154 	CMPL BX, $0x0c
   5155 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
   5156 	LEAL -15(DI), DI
   5157 	MOVB SI, 1(AX)
   5158 	SHRL $0x08, SI
   5159 	SHLL $0x05, SI
   5160 	ORL  SI, DI
   5161 	MOVB DI, (AX)
   5162 	ADDQ $0x02, AX
   5163 	JMP  repeat_end_emit_encodeBlockAsm8B
   5164 
   5165 emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
   5166 	LEAL -2(DI), DI
   5167 	MOVB DI, (AX)
   5168 	MOVW SI, 1(AX)
   5169 	ADDQ $0x03, AX
   5170 
   5171 repeat_end_emit_encodeBlockAsm8B:
   5172 	MOVL CX, 12(SP)
   5173 	JMP  search_loop_encodeBlockAsm8B
   5174 
   5175 no_repeat_found_encodeBlockAsm8B:
   5176 	CMPL (DX)(BX*1), SI
   5177 	JEQ  candidate_match_encodeBlockAsm8B
   5178 	SHRQ $0x08, SI
   5179 	MOVL 24(SP)(R9*4), BX
   5180 	LEAL 2(CX), R8
   5181 	CMPL (DX)(DI*1), SI
   5182 	JEQ  candidate2_match_encodeBlockAsm8B
   5183 	MOVL R8, 24(SP)(R9*4)
   5184 	SHRQ $0x08, SI
   5185 	CMPL (DX)(BX*1), SI
   5186 	JEQ  candidate3_match_encodeBlockAsm8B
   5187 	MOVL 20(SP), CX
   5188 	JMP  search_loop_encodeBlockAsm8B
   5189 
   5190 candidate3_match_encodeBlockAsm8B:
   5191 	ADDL $0x02, CX
   5192 	JMP  candidate_match_encodeBlockAsm8B
   5193 
   5194 candidate2_match_encodeBlockAsm8B:
   5195 	MOVL R8, 24(SP)(R9*4)
   5196 	INCL CX
   5197 	MOVL DI, BX
   5198 
   5199 candidate_match_encodeBlockAsm8B:
   5200 	MOVL  12(SP), SI
   5201 	TESTL BX, BX
   5202 	JZ    match_extend_back_end_encodeBlockAsm8B
   5203 
   5204 match_extend_back_loop_encodeBlockAsm8B:
   5205 	CMPL CX, SI
   5206 	JBE  match_extend_back_end_encodeBlockAsm8B
   5207 	MOVB -1(DX)(BX*1), DI
   5208 	MOVB -1(DX)(CX*1), R8
   5209 	CMPB DI, R8
   5210 	JNE  match_extend_back_end_encodeBlockAsm8B
   5211 	LEAL -1(CX), CX
   5212 	DECL BX
   5213 	JZ   match_extend_back_end_encodeBlockAsm8B
   5214 	JMP  match_extend_back_loop_encodeBlockAsm8B
   5215 
   5216 match_extend_back_end_encodeBlockAsm8B:
   5217 	MOVL CX, SI
   5218 	SUBL 12(SP), SI
   5219 	LEAQ 3(AX)(SI*1), SI
   5220 	CMPQ SI, (SP)
   5221 	JB   match_dst_size_check_encodeBlockAsm8B
   5222 	MOVQ $0x00000000, ret+48(FP)
   5223 	RET
   5224 
   5225 match_dst_size_check_encodeBlockAsm8B:
   5226 	MOVL CX, SI
   5227 	MOVL 12(SP), DI
   5228 	CMPL DI, SI
   5229 	JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
   5230 	MOVL SI, R8
   5231 	MOVL SI, 12(SP)
   5232 	LEAQ (DX)(DI*1), SI
   5233 	SUBL DI, R8
   5234 	LEAL -1(R8), DI
   5235 	CMPL DI, $0x3c
   5236 	JB   one_byte_match_emit_encodeBlockAsm8B
   5237 	CMPL DI, $0x00000100
   5238 	JB   two_bytes_match_emit_encodeBlockAsm8B
   5239 	JB   three_bytes_match_emit_encodeBlockAsm8B
   5240 
   5241 three_bytes_match_emit_encodeBlockAsm8B:
   5242 	MOVB $0xf4, (AX)
   5243 	MOVW DI, 1(AX)
   5244 	ADDQ $0x03, AX
   5245 	JMP  memmove_long_match_emit_encodeBlockAsm8B
   5246 
   5247 two_bytes_match_emit_encodeBlockAsm8B:
   5248 	MOVB $0xf0, (AX)
   5249 	MOVB DI, 1(AX)
   5250 	ADDQ $0x02, AX
   5251 	CMPL DI, $0x40
   5252 	JB   memmove_match_emit_encodeBlockAsm8B
   5253 	JMP  memmove_long_match_emit_encodeBlockAsm8B
   5254 
   5255 one_byte_match_emit_encodeBlockAsm8B:
   5256 	SHLB $0x02, DI
   5257 	MOVB DI, (AX)
   5258 	ADDQ $0x01, AX
   5259 
   5260 memmove_match_emit_encodeBlockAsm8B:
   5261 	LEAQ (AX)(R8*1), DI
   5262 
   5263 	// genMemMoveShort
   5264 	CMPQ R8, $0x08
   5265 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
   5266 	CMPQ R8, $0x10
   5267 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
   5268 	CMPQ R8, $0x20
   5269 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
   5270 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
   5271 
   5272 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
   5273 	MOVQ (SI), R9
   5274 	MOVQ R9, (AX)
   5275 	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
   5276 
   5277 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
   5278 	MOVQ (SI), R9
   5279 	MOVQ -8(SI)(R8*1), SI
   5280 	MOVQ R9, (AX)
   5281 	MOVQ SI, -8(AX)(R8*1)
   5282 	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
   5283 
   5284 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
   5285 	MOVOU (SI), X0
   5286 	MOVOU -16(SI)(R8*1), X1
   5287 	MOVOU X0, (AX)
   5288 	MOVOU X1, -16(AX)(R8*1)
   5289 	JMP   memmove_end_copy_match_emit_encodeBlockAsm8B
   5290 
   5291 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
   5292 	MOVOU (SI), X0
   5293 	MOVOU 16(SI), X1
   5294 	MOVOU -32(SI)(R8*1), X2
   5295 	MOVOU -16(SI)(R8*1), X3
   5296 	MOVOU X0, (AX)
   5297 	MOVOU X1, 16(AX)
   5298 	MOVOU X2, -32(AX)(R8*1)
   5299 	MOVOU X3, -16(AX)(R8*1)
   5300 
   5301 memmove_end_copy_match_emit_encodeBlockAsm8B:
   5302 	MOVQ DI, AX
   5303 	JMP  emit_literal_done_match_emit_encodeBlockAsm8B
   5304 
   5305 memmove_long_match_emit_encodeBlockAsm8B:
   5306 	LEAQ (AX)(R8*1), DI
   5307 
   5308 	// genMemMoveLong
   5309 	MOVOU (SI), X0
   5310 	MOVOU 16(SI), X1
   5311 	MOVOU -32(SI)(R8*1), X2
   5312 	MOVOU -16(SI)(R8*1), X3
   5313 	MOVQ  R8, R10
   5314 	SHRQ  $0x05, R10
   5315 	MOVQ  AX, R9
   5316 	ANDL  $0x0000001f, R9
   5317 	MOVQ  $0x00000040, R11
   5318 	SUBQ  R9, R11
   5319 	DECQ  R10
   5320 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
   5321 	LEAQ  -32(SI)(R11*1), R9
   5322 	LEAQ  -32(AX)(R11*1), R12
   5323 
   5324 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
   5325 	MOVOU (R9), X4
   5326 	MOVOU 16(R9), X5
   5327 	MOVOA X4, (R12)
   5328 	MOVOA X5, 16(R12)
   5329 	ADDQ  $0x20, R12
   5330 	ADDQ  $0x20, R9
   5331 	ADDQ  $0x20, R11
   5332 	DECQ  R10
   5333 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
   5334 
   5335 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
   5336 	MOVOU -32(SI)(R11*1), X4
   5337 	MOVOU -16(SI)(R11*1), X5
   5338 	MOVOA X4, -32(AX)(R11*1)
   5339 	MOVOA X5, -16(AX)(R11*1)
   5340 	ADDQ  $0x20, R11
   5341 	CMPQ  R8, R11
   5342 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
   5343 	MOVOU X0, (AX)
   5344 	MOVOU X1, 16(AX)
   5345 	MOVOU X2, -32(AX)(R8*1)
   5346 	MOVOU X3, -16(AX)(R8*1)
   5347 	MOVQ  DI, AX
   5348 
   5349 emit_literal_done_match_emit_encodeBlockAsm8B:
   5350 match_nolit_loop_encodeBlockAsm8B:
   5351 	MOVL CX, SI
   5352 	SUBL BX, SI
   5353 	MOVL SI, 16(SP)
   5354 	ADDL $0x04, CX
   5355 	ADDL $0x04, BX
   5356 	MOVQ src_len+32(FP), SI
   5357 	SUBL CX, SI
   5358 	LEAQ (DX)(CX*1), DI
   5359 	LEAQ (DX)(BX*1), BX
   5360 
   5361 	// matchLen
   5362 	XORL R9, R9
   5363 	CMPL SI, $0x08
   5364 	JB   matchlen_match4_match_nolit_encodeBlockAsm8B
   5365 
   5366 matchlen_loopback_match_nolit_encodeBlockAsm8B:
   5367 	MOVQ  (DI)(R9*1), R8
   5368 	XORQ  (BX)(R9*1), R8
   5369 	TESTQ R8, R8
   5370 	JZ    matchlen_loop_match_nolit_encodeBlockAsm8B
   5371 
   5372 #ifdef GOAMD64_v3
   5373 	TZCNTQ R8, R8
   5374 
   5375 #else
   5376 	BSFQ R8, R8
   5377 
   5378 #endif
   5379 	SARQ $0x03, R8
   5380 	LEAL (R9)(R8*1), R9
   5381 	JMP  match_nolit_end_encodeBlockAsm8B
   5382 
   5383 matchlen_loop_match_nolit_encodeBlockAsm8B:
   5384 	LEAL -8(SI), SI
   5385 	LEAL 8(R9), R9
   5386 	CMPL SI, $0x08
   5387 	JAE  matchlen_loopback_match_nolit_encodeBlockAsm8B
   5388 	JZ   match_nolit_end_encodeBlockAsm8B
   5389 
   5390 matchlen_match4_match_nolit_encodeBlockAsm8B:
   5391 	CMPL SI, $0x04
   5392 	JB   matchlen_match2_match_nolit_encodeBlockAsm8B
   5393 	MOVL (DI)(R9*1), R8
   5394 	CMPL (BX)(R9*1), R8
   5395 	JNE  matchlen_match2_match_nolit_encodeBlockAsm8B
   5396 	SUBL $0x04, SI
   5397 	LEAL 4(R9), R9
   5398 
   5399 matchlen_match2_match_nolit_encodeBlockAsm8B:
   5400 	CMPL SI, $0x02
   5401 	JB   matchlen_match1_match_nolit_encodeBlockAsm8B
   5402 	MOVW (DI)(R9*1), R8
   5403 	CMPW (BX)(R9*1), R8
   5404 	JNE  matchlen_match1_match_nolit_encodeBlockAsm8B
   5405 	SUBL $0x02, SI
   5406 	LEAL 2(R9), R9
   5407 
   5408 matchlen_match1_match_nolit_encodeBlockAsm8B:
   5409 	CMPL SI, $0x01
   5410 	JB   match_nolit_end_encodeBlockAsm8B
   5411 	MOVB (DI)(R9*1), R8
   5412 	CMPB (BX)(R9*1), R8
   5413 	JNE  match_nolit_end_encodeBlockAsm8B
   5414 	LEAL 1(R9), R9
   5415 
   5416 match_nolit_end_encodeBlockAsm8B:
   5417 	ADDL R9, CX
   5418 	MOVL 16(SP), BX
   5419 	ADDL $0x04, R9
   5420 	MOVL CX, 12(SP)
   5421 
   5422 	// emitCopy
   5423 	CMPL R9, $0x40
   5424 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
   5425 	CMPL BX, $0x00000800
   5426 	JAE  long_offset_short_match_nolit_encodeBlockAsm8B
   5427 	MOVL $0x00000001, SI
   5428 	LEAL 16(SI), SI
   5429 	MOVB BL, 1(AX)
   5430 	SHRL $0x08, BX
   5431 	SHLL $0x05, BX
   5432 	ORL  BX, SI
   5433 	MOVB SI, (AX)
   5434 	ADDQ $0x02, AX
   5435 	SUBL $0x08, R9
   5436 
   5437 	// emitRepeat
   5438 	LEAL -4(R9), R9
   5439 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
   5440 	MOVL R9, BX
   5441 	LEAL -4(R9), R9
   5442 	CMPL BX, $0x08
   5443 	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
   5444 	CMPL BX, $0x0c
   5445 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
   5446 
   5447 cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
   5448 	CMPL R9, $0x00000104
   5449 	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
   5450 	LEAL -256(R9), R9
   5451 	MOVW $0x0019, (AX)
   5452 	MOVW R9, 2(AX)
   5453 	ADDQ $0x04, AX
   5454 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5455 
   5456 repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
   5457 	LEAL -4(R9), R9
   5458 	MOVW $0x0015, (AX)
   5459 	MOVB R9, 2(AX)
   5460 	ADDQ $0x03, AX
   5461 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5462 
   5463 repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
   5464 	SHLL $0x02, R9
   5465 	ORL  $0x01, R9
   5466 	MOVW R9, (AX)
   5467 	ADDQ $0x02, AX
   5468 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5469 	XORQ SI, SI
   5470 	LEAL 1(SI)(R9*4), R9
   5471 	MOVB BL, 1(AX)
   5472 	SARL $0x08, BX
   5473 	SHLL $0x05, BX
   5474 	ORL  BX, R9
   5475 	MOVB R9, (AX)
   5476 	ADDQ $0x02, AX
   5477 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5478 
   5479 long_offset_short_match_nolit_encodeBlockAsm8B:
   5480 	MOVB $0xee, (AX)
   5481 	MOVW BX, 1(AX)
   5482 	LEAL -60(R9), R9
   5483 	ADDQ $0x03, AX
   5484 
   5485 	// emitRepeat
   5486 	MOVL R9, BX
   5487 	LEAL -4(R9), R9
   5488 	CMPL BX, $0x08
   5489 	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
   5490 	CMPL BX, $0x0c
   5491 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
   5492 
   5493 cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
   5494 	CMPL R9, $0x00000104
   5495 	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
   5496 	LEAL -256(R9), R9
   5497 	MOVW $0x0019, (AX)
   5498 	MOVW R9, 2(AX)
   5499 	ADDQ $0x04, AX
   5500 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5501 
   5502 repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
   5503 	LEAL -4(R9), R9
   5504 	MOVW $0x0015, (AX)
   5505 	MOVB R9, 2(AX)
   5506 	ADDQ $0x03, AX
   5507 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5508 
   5509 repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
   5510 	SHLL $0x02, R9
   5511 	ORL  $0x01, R9
   5512 	MOVW R9, (AX)
   5513 	ADDQ $0x02, AX
   5514 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5515 	XORQ SI, SI
   5516 	LEAL 1(SI)(R9*4), R9
   5517 	MOVB BL, 1(AX)
   5518 	SARL $0x08, BX
   5519 	SHLL $0x05, BX
   5520 	ORL  BX, R9
   5521 	MOVB R9, (AX)
   5522 	ADDQ $0x02, AX
   5523 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5524 
   5525 two_byte_offset_short_match_nolit_encodeBlockAsm8B:
   5526 	MOVL R9, SI
   5527 	SHLL $0x02, SI
   5528 	CMPL R9, $0x0c
   5529 	JAE  emit_copy_three_match_nolit_encodeBlockAsm8B
   5530 	LEAL -15(SI), SI
   5531 	MOVB BL, 1(AX)
   5532 	SHRL $0x08, BX
   5533 	SHLL $0x05, BX
   5534 	ORL  BX, SI
   5535 	MOVB SI, (AX)
   5536 	ADDQ $0x02, AX
   5537 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
   5538 
   5539 emit_copy_three_match_nolit_encodeBlockAsm8B:
   5540 	LEAL -2(SI), SI
   5541 	MOVB SI, (AX)
   5542 	MOVW BX, 1(AX)
   5543 	ADDQ $0x03, AX
   5544 
   5545 match_nolit_emitcopy_end_encodeBlockAsm8B:
   5546 	CMPL CX, 8(SP)
   5547 	JAE  emit_remainder_encodeBlockAsm8B
   5548 	MOVQ -2(DX)(CX*1), SI
   5549 	CMPQ AX, (SP)
   5550 	JB   match_nolit_dst_ok_encodeBlockAsm8B
   5551 	MOVQ $0x00000000, ret+48(FP)
   5552 	RET
   5553 
   5554 match_nolit_dst_ok_encodeBlockAsm8B:
   5555 	MOVQ  $0x9e3779b1, R8
   5556 	MOVQ  SI, DI
   5557 	SHRQ  $0x10, SI
   5558 	MOVQ  SI, BX
   5559 	SHLQ  $0x20, DI
   5560 	IMULQ R8, DI
   5561 	SHRQ  $0x38, DI
   5562 	SHLQ  $0x20, BX
   5563 	IMULQ R8, BX
   5564 	SHRQ  $0x38, BX
   5565 	LEAL  -2(CX), R8
   5566 	LEAQ  24(SP)(BX*4), R9
   5567 	MOVL  (R9), BX
   5568 	MOVL  R8, 24(SP)(DI*4)
   5569 	MOVL  CX, (R9)
   5570 	CMPL  (DX)(BX*1), SI
   5571 	JEQ   match_nolit_loop_encodeBlockAsm8B
   5572 	INCL  CX
   5573 	JMP   search_loop_encodeBlockAsm8B
   5574 
   5575 emit_remainder_encodeBlockAsm8B:
   5576 	MOVQ src_len+32(FP), CX
   5577 	SUBL 12(SP), CX
   5578 	LEAQ 3(AX)(CX*1), CX
   5579 	CMPQ CX, (SP)
   5580 	JB   emit_remainder_ok_encodeBlockAsm8B
   5581 	MOVQ $0x00000000, ret+48(FP)
   5582 	RET
   5583 
   5584 emit_remainder_ok_encodeBlockAsm8B:
   5585 	MOVQ src_len+32(FP), CX
   5586 	MOVL 12(SP), BX
   5587 	CMPL BX, CX
   5588 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
   5589 	MOVL CX, SI
   5590 	MOVL CX, 12(SP)
   5591 	LEAQ (DX)(BX*1), CX
   5592 	SUBL BX, SI
   5593 	LEAL -1(SI), DX
   5594 	CMPL DX, $0x3c
   5595 	JB   one_byte_emit_remainder_encodeBlockAsm8B
   5596 	CMPL DX, $0x00000100
   5597 	JB   two_bytes_emit_remainder_encodeBlockAsm8B
   5598 	JB   three_bytes_emit_remainder_encodeBlockAsm8B
   5599 
   5600 three_bytes_emit_remainder_encodeBlockAsm8B:
   5601 	MOVB $0xf4, (AX)
   5602 	MOVW DX, 1(AX)
   5603 	ADDQ $0x03, AX
   5604 	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
   5605 
   5606 two_bytes_emit_remainder_encodeBlockAsm8B:
   5607 	MOVB $0xf0, (AX)
   5608 	MOVB DL, 1(AX)
   5609 	ADDQ $0x02, AX
   5610 	CMPL DX, $0x40
   5611 	JB   memmove_emit_remainder_encodeBlockAsm8B
   5612 	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
   5613 
   5614 one_byte_emit_remainder_encodeBlockAsm8B:
   5615 	SHLB $0x02, DL
   5616 	MOVB DL, (AX)
   5617 	ADDQ $0x01, AX
   5618 
   5619 memmove_emit_remainder_encodeBlockAsm8B:
   5620 	LEAQ (AX)(SI*1), DX
   5621 	MOVL SI, BX
   5622 
   5623 	// genMemMoveShort
   5624 	CMPQ BX, $0x03
   5625 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
   5626 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
   5627 	CMPQ BX, $0x08
   5628 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
   5629 	CMPQ BX, $0x10
   5630 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
   5631 	CMPQ BX, $0x20
   5632 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
   5633 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
   5634 
   5635 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
   5636 	MOVB (CX), SI
   5637 	MOVB -1(CX)(BX*1), CL
   5638 	MOVB SI, (AX)
   5639 	MOVB CL, -1(AX)(BX*1)
   5640 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
   5641 
   5642 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
   5643 	MOVW (CX), SI
   5644 	MOVB 2(CX), CL
   5645 	MOVW SI, (AX)
   5646 	MOVB CL, 2(AX)
   5647 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
   5648 
   5649 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
   5650 	MOVL (CX), SI
   5651 	MOVL -4(CX)(BX*1), CX
   5652 	MOVL SI, (AX)
   5653 	MOVL CX, -4(AX)(BX*1)
   5654 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
   5655 
   5656 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
   5657 	MOVQ (CX), SI
   5658 	MOVQ -8(CX)(BX*1), CX
   5659 	MOVQ SI, (AX)
   5660 	MOVQ CX, -8(AX)(BX*1)
   5661 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
   5662 
   5663 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
   5664 	MOVOU (CX), X0
   5665 	MOVOU -16(CX)(BX*1), X1
   5666 	MOVOU X0, (AX)
   5667 	MOVOU X1, -16(AX)(BX*1)
   5668 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B
   5669 
   5670 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
   5671 	MOVOU (CX), X0
   5672 	MOVOU 16(CX), X1
   5673 	MOVOU -32(CX)(BX*1), X2
   5674 	MOVOU -16(CX)(BX*1), X3
   5675 	MOVOU X0, (AX)
   5676 	MOVOU X1, 16(AX)
   5677 	MOVOU X2, -32(AX)(BX*1)
   5678 	MOVOU X3, -16(AX)(BX*1)
   5679 
   5680 memmove_end_copy_emit_remainder_encodeBlockAsm8B:
   5681 	MOVQ DX, AX
   5682 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B
   5683 
   5684 memmove_long_emit_remainder_encodeBlockAsm8B:
   5685 	LEAQ (AX)(SI*1), DX
   5686 	MOVL SI, BX
   5687 
   5688 	// genMemMoveLong
   5689 	MOVOU (CX), X0
   5690 	MOVOU 16(CX), X1
   5691 	MOVOU -32(CX)(BX*1), X2
   5692 	MOVOU -16(CX)(BX*1), X3
   5693 	MOVQ  BX, DI
   5694 	SHRQ  $0x05, DI
   5695 	MOVQ  AX, SI
   5696 	ANDL  $0x0000001f, SI
   5697 	MOVQ  $0x00000040, R8
   5698 	SUBQ  SI, R8
   5699 	DECQ  DI
   5700 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
   5701 	LEAQ  -32(CX)(R8*1), SI
   5702 	LEAQ  -32(AX)(R8*1), R9
   5703 
   5704 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
   5705 	MOVOU (SI), X4
   5706 	MOVOU 16(SI), X5
   5707 	MOVOA X4, (R9)
   5708 	MOVOA X5, 16(R9)
   5709 	ADDQ  $0x20, R9
   5710 	ADDQ  $0x20, SI
   5711 	ADDQ  $0x20, R8
   5712 	DECQ  DI
   5713 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
   5714 
   5715 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
   5716 	MOVOU -32(CX)(R8*1), X4
   5717 	MOVOU -16(CX)(R8*1), X5
   5718 	MOVOA X4, -32(AX)(R8*1)
   5719 	MOVOA X5, -16(AX)(R8*1)
   5720 	ADDQ  $0x20, R8
   5721 	CMPQ  BX, R8
   5722 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
   5723 	MOVOU X0, (AX)
   5724 	MOVOU X1, 16(AX)
   5725 	MOVOU X2, -32(AX)(BX*1)
   5726 	MOVOU X3, -16(AX)(BX*1)
   5727 	MOVQ  DX, AX
   5728 
   5729 emit_literal_done_emit_remainder_encodeBlockAsm8B:
   5730 	MOVQ dst_base+0(FP), CX
   5731 	SUBQ CX, AX
   5732 	MOVQ AX, ret+48(FP)
   5733 	RET
   5734 
   5735 // func encodeBetterBlockAsm(dst []byte, src []byte) int
   5736 // Requires: BMI, SSE2
   5737 TEXT ·encodeBetterBlockAsm(SB), $589848-56
   5738 	MOVQ dst_base+0(FP), AX
   5739 	MOVQ $0x00001200, CX
   5740 	LEAQ 24(SP), DX
   5741 	PXOR X0, X0
   5742 
   5743 zero_loop_encodeBetterBlockAsm:
   5744 	MOVOU X0, (DX)
   5745 	MOVOU X0, 16(DX)
   5746 	MOVOU X0, 32(DX)
   5747 	MOVOU X0, 48(DX)
   5748 	MOVOU X0, 64(DX)
   5749 	MOVOU X0, 80(DX)
   5750 	MOVOU X0, 96(DX)
   5751 	MOVOU X0, 112(DX)
   5752 	ADDQ  $0x80, DX
   5753 	DECQ  CX
   5754 	JNZ   zero_loop_encodeBetterBlockAsm
   5755 	MOVL  $0x00000000, 12(SP)
   5756 	MOVQ  src_len+32(FP), CX
   5757 	LEAQ  -6(CX), DX
   5758 	LEAQ  -8(CX), BX
   5759 	MOVL  BX, 8(SP)
   5760 	SHRQ  $0x05, CX
   5761 	SUBL  CX, DX
   5762 	LEAQ  (AX)(DX*1), DX
   5763 	MOVQ  DX, (SP)
   5764 	MOVL  $0x00000001, CX
   5765 	MOVL  $0x00000000, 16(SP)
   5766 	MOVQ  src_base+24(FP), DX
   5767 
   5768 search_loop_encodeBetterBlockAsm:
   5769 	MOVL CX, BX
   5770 	SUBL 12(SP), BX
   5771 	SHRL $0x07, BX
   5772 	CMPL BX, $0x63
   5773 	JBE  check_maxskip_ok_encodeBetterBlockAsm
   5774 	LEAL 100(CX), BX
   5775 	JMP  check_maxskip_cont_encodeBetterBlockAsm
   5776 
   5777 check_maxskip_ok_encodeBetterBlockAsm:
   5778 	LEAL 1(CX)(BX*1), BX
   5779 
   5780 check_maxskip_cont_encodeBetterBlockAsm:
   5781 	CMPL  BX, 8(SP)
   5782 	JAE   emit_remainder_encodeBetterBlockAsm
   5783 	MOVQ  (DX)(CX*1), SI
   5784 	MOVL  BX, 20(SP)
   5785 	MOVQ  $0x00cf1bbcdcbfa563, R8
   5786 	MOVQ  $0x9e3779b1, BX
   5787 	MOVQ  SI, R9
   5788 	MOVQ  SI, R10
   5789 	SHLQ  $0x08, R9
   5790 	IMULQ R8, R9
   5791 	SHRQ  $0x2f, R9
   5792 	SHLQ  $0x20, R10
   5793 	IMULQ BX, R10
   5794 	SHRQ  $0x32, R10
   5795 	MOVL  24(SP)(R9*4), BX
   5796 	MOVL  524312(SP)(R10*4), DI
   5797 	MOVL  CX, 24(SP)(R9*4)
   5798 	MOVL  CX, 524312(SP)(R10*4)
   5799 	MOVQ  (DX)(BX*1), R9
   5800 	MOVQ  (DX)(DI*1), R10
   5801 	CMPQ  R9, SI
   5802 	JEQ   candidate_match_encodeBetterBlockAsm
   5803 	CMPQ  R10, SI
   5804 	JNE   no_short_found_encodeBetterBlockAsm
   5805 	MOVL  DI, BX
   5806 	JMP   candidate_match_encodeBetterBlockAsm
   5807 
   5808 no_short_found_encodeBetterBlockAsm:
   5809 	CMPL R9, SI
   5810 	JEQ  candidate_match_encodeBetterBlockAsm
   5811 	CMPL R10, SI
   5812 	JEQ  candidateS_match_encodeBetterBlockAsm
   5813 	MOVL 20(SP), CX
   5814 	JMP  search_loop_encodeBetterBlockAsm
   5815 
   5816 candidateS_match_encodeBetterBlockAsm:
   5817 	SHRQ  $0x08, SI
   5818 	MOVQ  SI, R9
   5819 	SHLQ  $0x08, R9
   5820 	IMULQ R8, R9
   5821 	SHRQ  $0x2f, R9
   5822 	MOVL  24(SP)(R9*4), BX
   5823 	INCL  CX
   5824 	MOVL  CX, 24(SP)(R9*4)
   5825 	CMPL  (DX)(BX*1), SI
   5826 	JEQ   candidate_match_encodeBetterBlockAsm
   5827 	DECL  CX
   5828 	MOVL  DI, BX
   5829 
   5830 candidate_match_encodeBetterBlockAsm:
   5831 	MOVL  12(SP), SI
   5832 	TESTL BX, BX
   5833 	JZ    match_extend_back_end_encodeBetterBlockAsm
   5834 
   5835 match_extend_back_loop_encodeBetterBlockAsm:
   5836 	CMPL CX, SI
   5837 	JBE  match_extend_back_end_encodeBetterBlockAsm
   5838 	MOVB -1(DX)(BX*1), DI
   5839 	MOVB -1(DX)(CX*1), R8
   5840 	CMPB DI, R8
   5841 	JNE  match_extend_back_end_encodeBetterBlockAsm
   5842 	LEAL -1(CX), CX
   5843 	DECL BX
   5844 	JZ   match_extend_back_end_encodeBetterBlockAsm
   5845 	JMP  match_extend_back_loop_encodeBetterBlockAsm
   5846 
   5847 match_extend_back_end_encodeBetterBlockAsm:
   5848 	MOVL CX, SI
   5849 	SUBL 12(SP), SI
   5850 	LEAQ 5(AX)(SI*1), SI
   5851 	CMPQ SI, (SP)
   5852 	JB   match_dst_size_check_encodeBetterBlockAsm
   5853 	MOVQ $0x00000000, ret+48(FP)
   5854 	RET
   5855 
   5856 match_dst_size_check_encodeBetterBlockAsm:
   5857 	MOVL CX, SI
   5858 	ADDL $0x04, CX
   5859 	ADDL $0x04, BX
   5860 	MOVQ src_len+32(FP), DI
   5861 	SUBL CX, DI
   5862 	LEAQ (DX)(CX*1), R8
   5863 	LEAQ (DX)(BX*1), R9
   5864 
   5865 	// matchLen
   5866 	XORL R11, R11
   5867 	CMPL DI, $0x08
   5868 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm
   5869 
   5870 matchlen_loopback_match_nolit_encodeBetterBlockAsm:
   5871 	MOVQ  (R8)(R11*1), R10
   5872 	XORQ  (R9)(R11*1), R10
   5873 	TESTQ R10, R10
   5874 	JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm
   5875 
   5876 #ifdef GOAMD64_v3
   5877 	TZCNTQ R10, R10
   5878 
   5879 #else
   5880 	BSFQ R10, R10
   5881 
   5882 #endif
   5883 	SARQ $0x03, R10
   5884 	LEAL (R11)(R10*1), R11
   5885 	JMP  match_nolit_end_encodeBetterBlockAsm
   5886 
   5887 matchlen_loop_match_nolit_encodeBetterBlockAsm:
   5888 	LEAL -8(DI), DI
   5889 	LEAL 8(R11), R11
   5890 	CMPL DI, $0x08
   5891 	JAE  matchlen_loopback_match_nolit_encodeBetterBlockAsm
   5892 	JZ   match_nolit_end_encodeBetterBlockAsm
   5893 
   5894 matchlen_match4_match_nolit_encodeBetterBlockAsm:
   5895 	CMPL DI, $0x04
   5896 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm
   5897 	MOVL (R8)(R11*1), R10
   5898 	CMPL (R9)(R11*1), R10
   5899 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm
   5900 	SUBL $0x04, DI
   5901 	LEAL 4(R11), R11
   5902 
   5903 matchlen_match2_match_nolit_encodeBetterBlockAsm:
   5904 	CMPL DI, $0x02
   5905 	JB   matchlen_match1_match_nolit_encodeBetterBlockAsm
   5906 	MOVW (R8)(R11*1), R10
   5907 	CMPW (R9)(R11*1), R10
   5908 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm
   5909 	SUBL $0x02, DI
   5910 	LEAL 2(R11), R11
   5911 
   5912 matchlen_match1_match_nolit_encodeBetterBlockAsm:
   5913 	CMPL DI, $0x01
   5914 	JB   match_nolit_end_encodeBetterBlockAsm
   5915 	MOVB (R8)(R11*1), R10
   5916 	CMPB (R9)(R11*1), R10
   5917 	JNE  match_nolit_end_encodeBetterBlockAsm
   5918 	LEAL 1(R11), R11
   5919 
   5920 match_nolit_end_encodeBetterBlockAsm:
   5921 	MOVL CX, DI
   5922 	SUBL BX, DI
   5923 
   5924 	// Check if repeat
   5925 	CMPL 16(SP), DI
   5926 	JEQ  match_is_repeat_encodeBetterBlockAsm
   5927 	CMPL R11, $0x01
   5928 	JA   match_length_ok_encodeBetterBlockAsm
   5929 	CMPL DI, $0x0000ffff
   5930 	JBE  match_length_ok_encodeBetterBlockAsm
   5931 	MOVL 20(SP), CX
   5932 	INCL CX
   5933 	JMP  search_loop_encodeBetterBlockAsm
   5934 
   5935 match_length_ok_encodeBetterBlockAsm:
   5936 	MOVL DI, 16(SP)
   5937 	MOVL 12(SP), BX
   5938 	CMPL BX, SI
   5939 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm
   5940 	MOVL SI, R8
   5941 	MOVL SI, 12(SP)
   5942 	LEAQ (DX)(BX*1), R9
   5943 	SUBL BX, R8
   5944 	LEAL -1(R8), BX
   5945 	CMPL BX, $0x3c
   5946 	JB   one_byte_match_emit_encodeBetterBlockAsm
   5947 	CMPL BX, $0x00000100
   5948 	JB   two_bytes_match_emit_encodeBetterBlockAsm
   5949 	CMPL BX, $0x00010000
   5950 	JB   three_bytes_match_emit_encodeBetterBlockAsm
   5951 	CMPL BX, $0x01000000
   5952 	JB   four_bytes_match_emit_encodeBetterBlockAsm
   5953 	MOVB $0xfc, (AX)
   5954 	MOVL BX, 1(AX)
   5955 	ADDQ $0x05, AX
   5956 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
   5957 
   5958 four_bytes_match_emit_encodeBetterBlockAsm:
   5959 	MOVL BX, R10
   5960 	SHRL $0x10, R10
   5961 	MOVB $0xf8, (AX)
   5962 	MOVW BX, 1(AX)
   5963 	MOVB R10, 3(AX)
   5964 	ADDQ $0x04, AX
   5965 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
   5966 
   5967 three_bytes_match_emit_encodeBetterBlockAsm:
   5968 	MOVB $0xf4, (AX)
   5969 	MOVW BX, 1(AX)
   5970 	ADDQ $0x03, AX
   5971 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
   5972 
   5973 two_bytes_match_emit_encodeBetterBlockAsm:
   5974 	MOVB $0xf0, (AX)
   5975 	MOVB BL, 1(AX)
   5976 	ADDQ $0x02, AX
   5977 	CMPL BX, $0x40
   5978 	JB   memmove_match_emit_encodeBetterBlockAsm
   5979 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
   5980 
   5981 one_byte_match_emit_encodeBetterBlockAsm:
   5982 	SHLB $0x02, BL
   5983 	MOVB BL, (AX)
   5984 	ADDQ $0x01, AX
   5985 
   5986 memmove_match_emit_encodeBetterBlockAsm:
   5987 	LEAQ (AX)(R8*1), BX
   5988 
   5989 	// genMemMoveShort
   5990 	CMPQ R8, $0x04
   5991 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
   5992 	CMPQ R8, $0x08
   5993 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
   5994 	CMPQ R8, $0x10
   5995 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
   5996 	CMPQ R8, $0x20
   5997 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
   5998 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
   5999 
   6000 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
   6001 	MOVL (R9), R10
   6002 	MOVL R10, (AX)
   6003 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
   6004 
   6005 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
   6006 	MOVL (R9), R10
   6007 	MOVL -4(R9)(R8*1), R9
   6008 	MOVL R10, (AX)
   6009 	MOVL R9, -4(AX)(R8*1)
   6010 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
   6011 
   6012 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
   6013 	MOVQ (R9), R10
   6014 	MOVQ -8(R9)(R8*1), R9
   6015 	MOVQ R10, (AX)
   6016 	MOVQ R9, -8(AX)(R8*1)
   6017 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
   6018 
   6019 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
   6020 	MOVOU (R9), X0
   6021 	MOVOU -16(R9)(R8*1), X1
   6022 	MOVOU X0, (AX)
   6023 	MOVOU X1, -16(AX)(R8*1)
   6024 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm
   6025 
   6026 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
   6027 	MOVOU (R9), X0
   6028 	MOVOU 16(R9), X1
   6029 	MOVOU -32(R9)(R8*1), X2
   6030 	MOVOU -16(R9)(R8*1), X3
   6031 	MOVOU X0, (AX)
   6032 	MOVOU X1, 16(AX)
   6033 	MOVOU X2, -32(AX)(R8*1)
   6034 	MOVOU X3, -16(AX)(R8*1)
   6035 
   6036 memmove_end_copy_match_emit_encodeBetterBlockAsm:
   6037 	MOVQ BX, AX
   6038 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm
   6039 
   6040 memmove_long_match_emit_encodeBetterBlockAsm:
   6041 	LEAQ (AX)(R8*1), BX
   6042 
   6043 	// genMemMoveLong
   6044 	MOVOU (R9), X0
   6045 	MOVOU 16(R9), X1
   6046 	MOVOU -32(R9)(R8*1), X2
   6047 	MOVOU -16(R9)(R8*1), X3
   6048 	MOVQ  R8, R12
   6049 	SHRQ  $0x05, R12
   6050 	MOVQ  AX, R10
   6051 	ANDL  $0x0000001f, R10
   6052 	MOVQ  $0x00000040, R13
   6053 	SUBQ  R10, R13
   6054 	DECQ  R12
   6055 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
   6056 	LEAQ  -32(R9)(R13*1), R10
   6057 	LEAQ  -32(AX)(R13*1), R14
   6058 
   6059 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
   6060 	MOVOU (R10), X4
   6061 	MOVOU 16(R10), X5
   6062 	MOVOA X4, (R14)
   6063 	MOVOA X5, 16(R14)
   6064 	ADDQ  $0x20, R14
   6065 	ADDQ  $0x20, R10
   6066 	ADDQ  $0x20, R13
   6067 	DECQ  R12
   6068 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
   6069 
   6070 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
   6071 	MOVOU -32(R9)(R13*1), X4
   6072 	MOVOU -16(R9)(R13*1), X5
   6073 	MOVOA X4, -32(AX)(R13*1)
   6074 	MOVOA X5, -16(AX)(R13*1)
   6075 	ADDQ  $0x20, R13
   6076 	CMPQ  R8, R13
   6077 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
   6078 	MOVOU X0, (AX)
   6079 	MOVOU X1, 16(AX)
   6080 	MOVOU X2, -32(AX)(R8*1)
   6081 	MOVOU X3, -16(AX)(R8*1)
   6082 	MOVQ  BX, AX
   6083 
   6084 emit_literal_done_match_emit_encodeBetterBlockAsm:
   6085 	ADDL R11, CX
   6086 	ADDL $0x04, R11
   6087 	MOVL CX, 12(SP)
   6088 
   6089 	// emitCopy
   6090 	CMPL DI, $0x00010000
   6091 	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm
   6092 	CMPL R11, $0x40
   6093 	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm
   6094 	MOVB $0xff, (AX)
   6095 	MOVL DI, 1(AX)
   6096 	LEAL -64(R11), R11
   6097 	ADDQ $0x05, AX
   6098 	CMPL R11, $0x04
   6099 	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm
   6100 
   6101 	// emitRepeat
   6102 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
   6103 	MOVL R11, BX
   6104 	LEAL -4(R11), R11
   6105 	CMPL BX, $0x08
   6106 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
   6107 	CMPL BX, $0x0c
   6108 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
   6109 	CMPL DI, $0x00000800
   6110 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
   6111 
   6112 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
   6113 	CMPL R11, $0x00000104
   6114 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
   6115 	CMPL R11, $0x00010100
   6116 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
   6117 	CMPL R11, $0x0100ffff
   6118 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
   6119 	LEAL -16842747(R11), R11
   6120 	MOVL $0xfffb001d, (AX)
   6121 	MOVB $0xff, 4(AX)
   6122 	ADDQ $0x05, AX
   6123 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
   6124 
   6125 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
   6126 	LEAL -65536(R11), R11
   6127 	MOVL R11, DI
   6128 	MOVW $0x001d, (AX)
   6129 	MOVW R11, 2(AX)
   6130 	SARL $0x10, DI
   6131 	MOVB DI, 4(AX)
   6132 	ADDQ $0x05, AX
   6133 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6134 
   6135 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
   6136 	LEAL -256(R11), R11
   6137 	MOVW $0x0019, (AX)
   6138 	MOVW R11, 2(AX)
   6139 	ADDQ $0x04, AX
   6140 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6141 
   6142 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
   6143 	LEAL -4(R11), R11
   6144 	MOVW $0x0015, (AX)
   6145 	MOVB R11, 2(AX)
   6146 	ADDQ $0x03, AX
   6147 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6148 
   6149 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
   6150 	SHLL $0x02, R11
   6151 	ORL  $0x01, R11
   6152 	MOVW R11, (AX)
   6153 	ADDQ $0x02, AX
   6154 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6155 
   6156 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
   6157 	XORQ BX, BX
   6158 	LEAL 1(BX)(R11*4), R11
   6159 	MOVB DI, 1(AX)
   6160 	SARL $0x08, DI
   6161 	SHLL $0x05, DI
   6162 	ORL  DI, R11
   6163 	MOVB R11, (AX)
   6164 	ADDQ $0x02, AX
   6165 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6166 
   6167 four_bytes_remain_match_nolit_encodeBetterBlockAsm:
   6168 	TESTL R11, R11
   6169 	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm
   6170 	XORL  BX, BX
   6171 	LEAL  -1(BX)(R11*4), R11
   6172 	MOVB  R11, (AX)
   6173 	MOVL  DI, 1(AX)
   6174 	ADDQ  $0x05, AX
   6175 	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm
   6176 
   6177 two_byte_offset_match_nolit_encodeBetterBlockAsm:
   6178 	CMPL R11, $0x40
   6179 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm
   6180 	CMPL DI, $0x00000800
   6181 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm
   6182 	MOVL $0x00000001, BX
   6183 	LEAL 16(BX), BX
   6184 	MOVB DI, 1(AX)
   6185 	MOVL DI, R8
   6186 	SHRL $0x08, R8
   6187 	SHLL $0x05, R8
   6188 	ORL  R8, BX
   6189 	MOVB BL, (AX)
   6190 	ADDQ $0x02, AX
   6191 	SUBL $0x08, R11
   6192 
   6193 	// emitRepeat
   6194 	LEAL -4(R11), R11
   6195 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6196 
   6197 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6198 	MOVL R11, BX
   6199 	LEAL -4(R11), R11
   6200 	CMPL BX, $0x08
   6201 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6202 	CMPL BX, $0x0c
   6203 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6204 	CMPL DI, $0x00000800
   6205 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6206 
   6207 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6208 	CMPL R11, $0x00000104
   6209 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6210 	CMPL R11, $0x00010100
   6211 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6212 	CMPL R11, $0x0100ffff
   6213 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6214 	LEAL -16842747(R11), R11
   6215 	MOVL $0xfffb001d, (AX)
   6216 	MOVB $0xff, 4(AX)
   6217 	ADDQ $0x05, AX
   6218 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
   6219 
   6220 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6221 	LEAL -65536(R11), R11
   6222 	MOVL R11, DI
   6223 	MOVW $0x001d, (AX)
   6224 	MOVW R11, 2(AX)
   6225 	SARL $0x10, DI
   6226 	MOVB DI, 4(AX)
   6227 	ADDQ $0x05, AX
   6228 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6229 
   6230 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6231 	LEAL -256(R11), R11
   6232 	MOVW $0x0019, (AX)
   6233 	MOVW R11, 2(AX)
   6234 	ADDQ $0x04, AX
   6235 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6236 
   6237 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6238 	LEAL -4(R11), R11
   6239 	MOVW $0x0015, (AX)
   6240 	MOVB R11, 2(AX)
   6241 	ADDQ $0x03, AX
   6242 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6243 
   6244 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6245 	SHLL $0x02, R11
   6246 	ORL  $0x01, R11
   6247 	MOVW R11, (AX)
   6248 	ADDQ $0x02, AX
   6249 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6250 
   6251 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
   6252 	XORQ BX, BX
   6253 	LEAL 1(BX)(R11*4), R11
   6254 	MOVB DI, 1(AX)
   6255 	SARL $0x08, DI
   6256 	SHLL $0x05, DI
   6257 	ORL  DI, R11
   6258 	MOVB R11, (AX)
   6259 	ADDQ $0x02, AX
   6260 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6261 
   6262 long_offset_short_match_nolit_encodeBetterBlockAsm:
   6263 	MOVB $0xee, (AX)
   6264 	MOVW DI, 1(AX)
   6265 	LEAL -60(R11), R11
   6266 	ADDQ $0x03, AX
   6267 
   6268 	// emitRepeat
   6269 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6270 	MOVL R11, BX
   6271 	LEAL -4(R11), R11
   6272 	CMPL BX, $0x08
   6273 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6274 	CMPL BX, $0x0c
   6275 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6276 	CMPL DI, $0x00000800
   6277 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6278 
   6279 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6280 	CMPL R11, $0x00000104
   6281 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6282 	CMPL R11, $0x00010100
   6283 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6284 	CMPL R11, $0x0100ffff
   6285 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6286 	LEAL -16842747(R11), R11
   6287 	MOVL $0xfffb001d, (AX)
   6288 	MOVB $0xff, 4(AX)
   6289 	ADDQ $0x05, AX
   6290 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
   6291 
   6292 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6293 	LEAL -65536(R11), R11
   6294 	MOVL R11, DI
   6295 	MOVW $0x001d, (AX)
   6296 	MOVW R11, 2(AX)
   6297 	SARL $0x10, DI
   6298 	MOVB DI, 4(AX)
   6299 	ADDQ $0x05, AX
   6300 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6301 
   6302 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6303 	LEAL -256(R11), R11
   6304 	MOVW $0x0019, (AX)
   6305 	MOVW R11, 2(AX)
   6306 	ADDQ $0x04, AX
   6307 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6308 
   6309 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6310 	LEAL -4(R11), R11
   6311 	MOVW $0x0015, (AX)
   6312 	MOVB R11, 2(AX)
   6313 	ADDQ $0x03, AX
   6314 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6315 
   6316 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6317 	SHLL $0x02, R11
   6318 	ORL  $0x01, R11
   6319 	MOVW R11, (AX)
   6320 	ADDQ $0x02, AX
   6321 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6322 
   6323 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
   6324 	XORQ BX, BX
   6325 	LEAL 1(BX)(R11*4), R11
   6326 	MOVB DI, 1(AX)
   6327 	SARL $0x08, DI
   6328 	SHLL $0x05, DI
   6329 	ORL  DI, R11
   6330 	MOVB R11, (AX)
   6331 	ADDQ $0x02, AX
   6332 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6333 
   6334 two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
   6335 	MOVL R11, BX
   6336 	SHLL $0x02, BX
   6337 	CMPL R11, $0x0c
   6338 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
   6339 	CMPL DI, $0x00000800
   6340 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
   6341 	LEAL -15(BX), BX
   6342 	MOVB DI, 1(AX)
   6343 	SHRL $0x08, DI
   6344 	SHLL $0x05, DI
   6345 	ORL  DI, BX
   6346 	MOVB BL, (AX)
   6347 	ADDQ $0x02, AX
   6348 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6349 
   6350 emit_copy_three_match_nolit_encodeBetterBlockAsm:
   6351 	LEAL -2(BX), BX
   6352 	MOVB BL, (AX)
   6353 	MOVW DI, 1(AX)
   6354 	ADDQ $0x03, AX
   6355 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6356 
   6357 match_is_repeat_encodeBetterBlockAsm:
   6358 	MOVL 12(SP), BX
   6359 	CMPL BX, SI
   6360 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
   6361 	MOVL SI, R8
   6362 	MOVL SI, 12(SP)
   6363 	LEAQ (DX)(BX*1), R9
   6364 	SUBL BX, R8
   6365 	LEAL -1(R8), BX
   6366 	CMPL BX, $0x3c
   6367 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm
   6368 	CMPL BX, $0x00000100
   6369 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm
   6370 	CMPL BX, $0x00010000
   6371 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm
   6372 	CMPL BX, $0x01000000
   6373 	JB   four_bytes_match_emit_repeat_encodeBetterBlockAsm
   6374 	MOVB $0xfc, (AX)
   6375 	MOVL BX, 1(AX)
   6376 	ADDQ $0x05, AX
   6377 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
   6378 
   6379 four_bytes_match_emit_repeat_encodeBetterBlockAsm:
   6380 	MOVL BX, R10
   6381 	SHRL $0x10, R10
   6382 	MOVB $0xf8, (AX)
   6383 	MOVW BX, 1(AX)
   6384 	MOVB R10, 3(AX)
   6385 	ADDQ $0x04, AX
   6386 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
   6387 
   6388 three_bytes_match_emit_repeat_encodeBetterBlockAsm:
   6389 	MOVB $0xf4, (AX)
   6390 	MOVW BX, 1(AX)
   6391 	ADDQ $0x03, AX
   6392 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
   6393 
   6394 two_bytes_match_emit_repeat_encodeBetterBlockAsm:
   6395 	MOVB $0xf0, (AX)
   6396 	MOVB BL, 1(AX)
   6397 	ADDQ $0x02, AX
   6398 	CMPL BX, $0x40
   6399 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm
   6400 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
   6401 
   6402 one_byte_match_emit_repeat_encodeBetterBlockAsm:
   6403 	SHLB $0x02, BL
   6404 	MOVB BL, (AX)
   6405 	ADDQ $0x01, AX
   6406 
   6407 memmove_match_emit_repeat_encodeBetterBlockAsm:
   6408 	LEAQ (AX)(R8*1), BX
   6409 
   6410 	// genMemMoveShort
   6411 	CMPQ R8, $0x04
   6412 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
   6413 	CMPQ R8, $0x08
   6414 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
   6415 	CMPQ R8, $0x10
   6416 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
   6417 	CMPQ R8, $0x20
   6418 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
   6419 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
   6420 
   6421 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
   6422 	MOVL (R9), R10
   6423 	MOVL R10, (AX)
   6424 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
   6425 
   6426 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
   6427 	MOVL (R9), R10
   6428 	MOVL -4(R9)(R8*1), R9
   6429 	MOVL R10, (AX)
   6430 	MOVL R9, -4(AX)(R8*1)
   6431 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
   6432 
   6433 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
   6434 	MOVQ (R9), R10
   6435 	MOVQ -8(R9)(R8*1), R9
   6436 	MOVQ R10, (AX)
   6437 	MOVQ R9, -8(AX)(R8*1)
   6438 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
   6439 
   6440 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
   6441 	MOVOU (R9), X0
   6442 	MOVOU -16(R9)(R8*1), X1
   6443 	MOVOU X0, (AX)
   6444 	MOVOU X1, -16(AX)(R8*1)
   6445 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
   6446 
   6447 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
   6448 	MOVOU (R9), X0
   6449 	MOVOU 16(R9), X1
   6450 	MOVOU -32(R9)(R8*1), X2
   6451 	MOVOU -16(R9)(R8*1), X3
   6452 	MOVOU X0, (AX)
   6453 	MOVOU X1, 16(AX)
   6454 	MOVOU X2, -32(AX)(R8*1)
   6455 	MOVOU X3, -16(AX)(R8*1)
   6456 
   6457 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
   6458 	MOVQ BX, AX
   6459 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
   6460 
   6461 memmove_long_match_emit_repeat_encodeBetterBlockAsm:
   6462 	LEAQ (AX)(R8*1), BX
   6463 
   6464 	// genMemMoveLong
   6465 	MOVOU (R9), X0
   6466 	MOVOU 16(R9), X1
   6467 	MOVOU -32(R9)(R8*1), X2
   6468 	MOVOU -16(R9)(R8*1), X3
   6469 	MOVQ  R8, R12
   6470 	SHRQ  $0x05, R12
   6471 	MOVQ  AX, R10
   6472 	ANDL  $0x0000001f, R10
   6473 	MOVQ  $0x00000040, R13
   6474 	SUBQ  R10, R13
   6475 	DECQ  R12
   6476 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
   6477 	LEAQ  -32(R9)(R13*1), R10
   6478 	LEAQ  -32(AX)(R13*1), R14
   6479 
   6480 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
   6481 	MOVOU (R10), X4
   6482 	MOVOU 16(R10), X5
   6483 	MOVOA X4, (R14)
   6484 	MOVOA X5, 16(R14)
   6485 	ADDQ  $0x20, R14
   6486 	ADDQ  $0x20, R10
   6487 	ADDQ  $0x20, R13
   6488 	DECQ  R12
   6489 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
   6490 
   6491 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
   6492 	MOVOU -32(R9)(R13*1), X4
   6493 	MOVOU -16(R9)(R13*1), X5
   6494 	MOVOA X4, -32(AX)(R13*1)
   6495 	MOVOA X5, -16(AX)(R13*1)
   6496 	ADDQ  $0x20, R13
   6497 	CMPQ  R8, R13
   6498 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
   6499 	MOVOU X0, (AX)
   6500 	MOVOU X1, 16(AX)
   6501 	MOVOU X2, -32(AX)(R8*1)
   6502 	MOVOU X3, -16(AX)(R8*1)
   6503 	MOVQ  BX, AX
   6504 
   6505 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
   6506 	ADDL R11, CX
   6507 	ADDL $0x04, R11
   6508 	MOVL CX, 12(SP)
   6509 
   6510 	// emitRepeat
   6511 emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
   6512 	MOVL R11, BX
   6513 	LEAL -4(R11), R11
   6514 	CMPL BX, $0x08
   6515 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm
   6516 	CMPL BX, $0x0c
   6517 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
   6518 	CMPL DI, $0x00000800
   6519 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
   6520 
   6521 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
   6522 	CMPL R11, $0x00000104
   6523 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm
   6524 	CMPL R11, $0x00010100
   6525 	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm
   6526 	CMPL R11, $0x0100ffff
   6527 	JB   repeat_five_match_nolit_repeat_encodeBetterBlockAsm
   6528 	LEAL -16842747(R11), R11
   6529 	MOVL $0xfffb001d, (AX)
   6530 	MOVB $0xff, 4(AX)
   6531 	ADDQ $0x05, AX
   6532 	JMP  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
   6533 
   6534 repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
   6535 	LEAL -65536(R11), R11
   6536 	MOVL R11, DI
   6537 	MOVW $0x001d, (AX)
   6538 	MOVW R11, 2(AX)
   6539 	SARL $0x10, DI
   6540 	MOVB DI, 4(AX)
   6541 	ADDQ $0x05, AX
   6542 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6543 
   6544 repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
   6545 	LEAL -256(R11), R11
   6546 	MOVW $0x0019, (AX)
   6547 	MOVW R11, 2(AX)
   6548 	ADDQ $0x04, AX
   6549 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6550 
   6551 repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
   6552 	LEAL -4(R11), R11
   6553 	MOVW $0x0015, (AX)
   6554 	MOVB R11, 2(AX)
   6555 	ADDQ $0x03, AX
   6556 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6557 
   6558 repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
   6559 	SHLL $0x02, R11
   6560 	ORL  $0x01, R11
   6561 	MOVW R11, (AX)
   6562 	ADDQ $0x02, AX
   6563 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
   6564 
   6565 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
   6566 	XORQ BX, BX
   6567 	LEAL 1(BX)(R11*4), R11
   6568 	MOVB DI, 1(AX)
   6569 	SARL $0x08, DI
   6570 	SHLL $0x05, DI
   6571 	ORL  DI, R11
   6572 	MOVB R11, (AX)
   6573 	ADDQ $0x02, AX
   6574 
   6575 match_nolit_emitcopy_end_encodeBetterBlockAsm:
   6576 	CMPL CX, 8(SP)
   6577 	JAE  emit_remainder_encodeBetterBlockAsm
   6578 	CMPQ AX, (SP)
   6579 	JB   match_nolit_dst_ok_encodeBetterBlockAsm
   6580 	MOVQ $0x00000000, ret+48(FP)
   6581 	RET
   6582 
   6583 match_nolit_dst_ok_encodeBetterBlockAsm:
   6584 	MOVQ  $0x00cf1bbcdcbfa563, BX
   6585 	MOVQ  $0x9e3779b1, DI
   6586 	LEAQ  1(SI), SI
   6587 	LEAQ  -2(CX), R8
   6588 	MOVQ  (DX)(SI*1), R9
   6589 	MOVQ  1(DX)(SI*1), R10
   6590 	MOVQ  (DX)(R8*1), R11
   6591 	MOVQ  1(DX)(R8*1), R12
   6592 	SHLQ  $0x08, R9
   6593 	IMULQ BX, R9
   6594 	SHRQ  $0x2f, R9
   6595 	SHLQ  $0x20, R10
   6596 	IMULQ DI, R10
   6597 	SHRQ  $0x32, R10
   6598 	SHLQ  $0x08, R11
   6599 	IMULQ BX, R11
   6600 	SHRQ  $0x2f, R11
   6601 	SHLQ  $0x20, R12
   6602 	IMULQ DI, R12
   6603 	SHRQ  $0x32, R12
   6604 	LEAQ  1(SI), DI
   6605 	LEAQ  1(R8), R13
   6606 	MOVL  SI, 24(SP)(R9*4)
   6607 	MOVL  R8, 24(SP)(R11*4)
   6608 	MOVL  DI, 524312(SP)(R10*4)
   6609 	MOVL  R13, 524312(SP)(R12*4)
   6610 	ADDQ  $0x01, SI
   6611 	SUBQ  $0x01, R8
   6612 
   6613 index_loop_encodeBetterBlockAsm:
   6614 	CMPQ  SI, R8
   6615 	JAE   search_loop_encodeBetterBlockAsm
   6616 	MOVQ  (DX)(SI*1), DI
   6617 	MOVQ  (DX)(R8*1), R9
   6618 	SHLQ  $0x08, DI
   6619 	IMULQ BX, DI
   6620 	SHRQ  $0x2f, DI
   6621 	SHLQ  $0x08, R9
   6622 	IMULQ BX, R9
   6623 	SHRQ  $0x2f, R9
   6624 	MOVL  SI, 24(SP)(DI*4)
   6625 	MOVL  R8, 24(SP)(R9*4)
   6626 	ADDQ  $0x02, SI
   6627 	SUBQ  $0x02, R8
   6628 	JMP   index_loop_encodeBetterBlockAsm
   6629 
   6630 emit_remainder_encodeBetterBlockAsm:
   6631 	MOVQ src_len+32(FP), CX
   6632 	SUBL 12(SP), CX
   6633 	LEAQ 5(AX)(CX*1), CX
   6634 	CMPQ CX, (SP)
   6635 	JB   emit_remainder_ok_encodeBetterBlockAsm
   6636 	MOVQ $0x00000000, ret+48(FP)
   6637 	RET
   6638 
   6639 emit_remainder_ok_encodeBetterBlockAsm:
   6640 	MOVQ src_len+32(FP), CX
   6641 	MOVL 12(SP), BX
   6642 	CMPL BX, CX
   6643 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm
   6644 	MOVL CX, SI
   6645 	MOVL CX, 12(SP)
   6646 	LEAQ (DX)(BX*1), CX
   6647 	SUBL BX, SI
   6648 	LEAL -1(SI), DX
   6649 	CMPL DX, $0x3c
   6650 	JB   one_byte_emit_remainder_encodeBetterBlockAsm
   6651 	CMPL DX, $0x00000100
   6652 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm
   6653 	CMPL DX, $0x00010000
   6654 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm
   6655 	CMPL DX, $0x01000000
   6656 	JB   four_bytes_emit_remainder_encodeBetterBlockAsm
   6657 	MOVB $0xfc, (AX)
   6658 	MOVL DX, 1(AX)
   6659 	ADDQ $0x05, AX
   6660 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
   6661 
   6662 four_bytes_emit_remainder_encodeBetterBlockAsm:
   6663 	MOVL DX, BX
   6664 	SHRL $0x10, BX
   6665 	MOVB $0xf8, (AX)
   6666 	MOVW DX, 1(AX)
   6667 	MOVB BL, 3(AX)
   6668 	ADDQ $0x04, AX
   6669 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
   6670 
   6671 three_bytes_emit_remainder_encodeBetterBlockAsm:
   6672 	MOVB $0xf4, (AX)
   6673 	MOVW DX, 1(AX)
   6674 	ADDQ $0x03, AX
   6675 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
   6676 
   6677 two_bytes_emit_remainder_encodeBetterBlockAsm:
   6678 	MOVB $0xf0, (AX)
   6679 	MOVB DL, 1(AX)
   6680 	ADDQ $0x02, AX
   6681 	CMPL DX, $0x40
   6682 	JB   memmove_emit_remainder_encodeBetterBlockAsm
   6683 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
   6684 
   6685 one_byte_emit_remainder_encodeBetterBlockAsm:
   6686 	SHLB $0x02, DL
   6687 	MOVB DL, (AX)
   6688 	ADDQ $0x01, AX
   6689 
   6690 memmove_emit_remainder_encodeBetterBlockAsm:
   6691 	LEAQ (AX)(SI*1), DX
   6692 	MOVL SI, BX
   6693 
   6694 	// genMemMoveShort
   6695 	CMPQ BX, $0x03
   6696 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
   6697 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
   6698 	CMPQ BX, $0x08
   6699 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
   6700 	CMPQ BX, $0x10
   6701 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
   6702 	CMPQ BX, $0x20
   6703 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
   6704 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
   6705 
   6706 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
   6707 	MOVB (CX), SI
   6708 	MOVB -1(CX)(BX*1), CL
   6709 	MOVB SI, (AX)
   6710 	MOVB CL, -1(AX)(BX*1)
   6711 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
   6712 
   6713 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
   6714 	MOVW (CX), SI
   6715 	MOVB 2(CX), CL
   6716 	MOVW SI, (AX)
   6717 	MOVB CL, 2(AX)
   6718 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
   6719 
   6720 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
   6721 	MOVL (CX), SI
   6722 	MOVL -4(CX)(BX*1), CX
   6723 	MOVL SI, (AX)
   6724 	MOVL CX, -4(AX)(BX*1)
   6725 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
   6726 
   6727 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
   6728 	MOVQ (CX), SI
   6729 	MOVQ -8(CX)(BX*1), CX
   6730 	MOVQ SI, (AX)
   6731 	MOVQ CX, -8(AX)(BX*1)
   6732 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
   6733 
   6734 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
   6735 	MOVOU (CX), X0
   6736 	MOVOU -16(CX)(BX*1), X1
   6737 	MOVOU X0, (AX)
   6738 	MOVOU X1, -16(AX)(BX*1)
   6739 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm
   6740 
   6741 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
   6742 	MOVOU (CX), X0
   6743 	MOVOU 16(CX), X1
   6744 	MOVOU -32(CX)(BX*1), X2
   6745 	MOVOU -16(CX)(BX*1), X3
   6746 	MOVOU X0, (AX)
   6747 	MOVOU X1, 16(AX)
   6748 	MOVOU X2, -32(AX)(BX*1)
   6749 	MOVOU X3, -16(AX)(BX*1)
   6750 
   6751 memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
   6752 	MOVQ DX, AX
   6753 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm
   6754 
   6755 memmove_long_emit_remainder_encodeBetterBlockAsm:
   6756 	LEAQ (AX)(SI*1), DX
   6757 	MOVL SI, BX
   6758 
   6759 	// genMemMoveLong
   6760 	MOVOU (CX), X0
   6761 	MOVOU 16(CX), X1
   6762 	MOVOU -32(CX)(BX*1), X2
   6763 	MOVOU -16(CX)(BX*1), X3
   6764 	MOVQ  BX, DI
   6765 	SHRQ  $0x05, DI
   6766 	MOVQ  AX, SI
   6767 	ANDL  $0x0000001f, SI
   6768 	MOVQ  $0x00000040, R8
   6769 	SUBQ  SI, R8
   6770 	DECQ  DI
   6771 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
   6772 	LEAQ  -32(CX)(R8*1), SI
   6773 	LEAQ  -32(AX)(R8*1), R9
   6774 
   6775 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
   6776 	MOVOU (SI), X4
   6777 	MOVOU 16(SI), X5
   6778 	MOVOA X4, (R9)
   6779 	MOVOA X5, 16(R9)
   6780 	ADDQ  $0x20, R9
   6781 	ADDQ  $0x20, SI
   6782 	ADDQ  $0x20, R8
   6783 	DECQ  DI
   6784 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
   6785 
   6786 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
   6787 	MOVOU -32(CX)(R8*1), X4
   6788 	MOVOU -16(CX)(R8*1), X5
   6789 	MOVOA X4, -32(AX)(R8*1)
   6790 	MOVOA X5, -16(AX)(R8*1)
   6791 	ADDQ  $0x20, R8
   6792 	CMPQ  BX, R8
   6793 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
   6794 	MOVOU X0, (AX)
   6795 	MOVOU X1, 16(AX)
   6796 	MOVOU X2, -32(AX)(BX*1)
   6797 	MOVOU X3, -16(AX)(BX*1)
   6798 	MOVQ  DX, AX
   6799 
   6800 emit_literal_done_emit_remainder_encodeBetterBlockAsm:
   6801 	MOVQ dst_base+0(FP), CX
   6802 	SUBQ CX, AX
   6803 	MOVQ AX, ret+48(FP)
   6804 	RET
   6805 
   6806 // func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
   6807 // Requires: BMI, SSE2
   6808 TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56
   6809 	MOVQ dst_base+0(FP), AX
   6810 	MOVQ $0x00001200, CX
   6811 	LEAQ 24(SP), DX
   6812 	PXOR X0, X0
   6813 
   6814 zero_loop_encodeBetterBlockAsm4MB:
   6815 	MOVOU X0, (DX)
   6816 	MOVOU X0, 16(DX)
   6817 	MOVOU X0, 32(DX)
   6818 	MOVOU X0, 48(DX)
   6819 	MOVOU X0, 64(DX)
   6820 	MOVOU X0, 80(DX)
   6821 	MOVOU X0, 96(DX)
   6822 	MOVOU X0, 112(DX)
   6823 	ADDQ  $0x80, DX
   6824 	DECQ  CX
   6825 	JNZ   zero_loop_encodeBetterBlockAsm4MB
   6826 	MOVL  $0x00000000, 12(SP)
   6827 	MOVQ  src_len+32(FP), CX
   6828 	LEAQ  -6(CX), DX
   6829 	LEAQ  -8(CX), BX
   6830 	MOVL  BX, 8(SP)
   6831 	SHRQ  $0x05, CX
   6832 	SUBL  CX, DX
   6833 	LEAQ  (AX)(DX*1), DX
   6834 	MOVQ  DX, (SP)
   6835 	MOVL  $0x00000001, CX
   6836 	MOVL  $0x00000000, 16(SP)
   6837 	MOVQ  src_base+24(FP), DX
   6838 
   6839 search_loop_encodeBetterBlockAsm4MB:
   6840 	MOVL CX, BX
   6841 	SUBL 12(SP), BX
   6842 	SHRL $0x07, BX
   6843 	CMPL BX, $0x63
   6844 	JBE  check_maxskip_ok_encodeBetterBlockAsm4MB
   6845 	LEAL 100(CX), BX
   6846 	JMP  check_maxskip_cont_encodeBetterBlockAsm4MB
   6847 
   6848 check_maxskip_ok_encodeBetterBlockAsm4MB:
   6849 	LEAL 1(CX)(BX*1), BX
   6850 
   6851 check_maxskip_cont_encodeBetterBlockAsm4MB:
   6852 	CMPL  BX, 8(SP)
   6853 	JAE   emit_remainder_encodeBetterBlockAsm4MB
   6854 	MOVQ  (DX)(CX*1), SI
   6855 	MOVL  BX, 20(SP)
   6856 	MOVQ  $0x00cf1bbcdcbfa563, R8
   6857 	MOVQ  $0x9e3779b1, BX
   6858 	MOVQ  SI, R9
   6859 	MOVQ  SI, R10
   6860 	SHLQ  $0x08, R9
   6861 	IMULQ R8, R9
   6862 	SHRQ  $0x2f, R9
   6863 	SHLQ  $0x20, R10
   6864 	IMULQ BX, R10
   6865 	SHRQ  $0x32, R10
   6866 	MOVL  24(SP)(R9*4), BX
   6867 	MOVL  524312(SP)(R10*4), DI
   6868 	MOVL  CX, 24(SP)(R9*4)
   6869 	MOVL  CX, 524312(SP)(R10*4)
   6870 	MOVQ  (DX)(BX*1), R9
   6871 	MOVQ  (DX)(DI*1), R10
   6872 	CMPQ  R9, SI
   6873 	JEQ   candidate_match_encodeBetterBlockAsm4MB
   6874 	CMPQ  R10, SI
   6875 	JNE   no_short_found_encodeBetterBlockAsm4MB
   6876 	MOVL  DI, BX
   6877 	JMP   candidate_match_encodeBetterBlockAsm4MB
   6878 
   6879 no_short_found_encodeBetterBlockAsm4MB:
   6880 	CMPL R9, SI
   6881 	JEQ  candidate_match_encodeBetterBlockAsm4MB
   6882 	CMPL R10, SI
   6883 	JEQ  candidateS_match_encodeBetterBlockAsm4MB
   6884 	MOVL 20(SP), CX
   6885 	JMP  search_loop_encodeBetterBlockAsm4MB
   6886 
   6887 candidateS_match_encodeBetterBlockAsm4MB:
   6888 	SHRQ  $0x08, SI
   6889 	MOVQ  SI, R9
   6890 	SHLQ  $0x08, R9
   6891 	IMULQ R8, R9
   6892 	SHRQ  $0x2f, R9
   6893 	MOVL  24(SP)(R9*4), BX
   6894 	INCL  CX
   6895 	MOVL  CX, 24(SP)(R9*4)
   6896 	CMPL  (DX)(BX*1), SI
   6897 	JEQ   candidate_match_encodeBetterBlockAsm4MB
   6898 	DECL  CX
   6899 	MOVL  DI, BX
   6900 
   6901 candidate_match_encodeBetterBlockAsm4MB:
   6902 	MOVL  12(SP), SI
   6903 	TESTL BX, BX
   6904 	JZ    match_extend_back_end_encodeBetterBlockAsm4MB
   6905 
   6906 match_extend_back_loop_encodeBetterBlockAsm4MB:
   6907 	CMPL CX, SI
   6908 	JBE  match_extend_back_end_encodeBetterBlockAsm4MB
   6909 	MOVB -1(DX)(BX*1), DI
   6910 	MOVB -1(DX)(CX*1), R8
   6911 	CMPB DI, R8
   6912 	JNE  match_extend_back_end_encodeBetterBlockAsm4MB
   6913 	LEAL -1(CX), CX
   6914 	DECL BX
   6915 	JZ   match_extend_back_end_encodeBetterBlockAsm4MB
   6916 	JMP  match_extend_back_loop_encodeBetterBlockAsm4MB
   6917 
   6918 match_extend_back_end_encodeBetterBlockAsm4MB:
   6919 	MOVL CX, SI
   6920 	SUBL 12(SP), SI
   6921 	LEAQ 4(AX)(SI*1), SI
   6922 	CMPQ SI, (SP)
   6923 	JB   match_dst_size_check_encodeBetterBlockAsm4MB
   6924 	MOVQ $0x00000000, ret+48(FP)
   6925 	RET
   6926 
   6927 match_dst_size_check_encodeBetterBlockAsm4MB:
   6928 	MOVL CX, SI
   6929 	ADDL $0x04, CX
   6930 	ADDL $0x04, BX
   6931 	MOVQ src_len+32(FP), DI
   6932 	SUBL CX, DI
   6933 	LEAQ (DX)(CX*1), R8
   6934 	LEAQ (DX)(BX*1), R9
   6935 
   6936 	// matchLen
   6937 	XORL R11, R11
   6938 	CMPL DI, $0x08
   6939 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
   6940 
   6941 matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB:
   6942 	MOVQ  (R8)(R11*1), R10
   6943 	XORQ  (R9)(R11*1), R10
   6944 	TESTQ R10, R10
   6945 	JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm4MB
   6946 
   6947 #ifdef GOAMD64_v3
   6948 	TZCNTQ R10, R10
   6949 
   6950 #else
   6951 	BSFQ R10, R10
   6952 
   6953 #endif
   6954 	SARQ $0x03, R10
   6955 	LEAL (R11)(R10*1), R11
   6956 	JMP  match_nolit_end_encodeBetterBlockAsm4MB
   6957 
   6958 matchlen_loop_match_nolit_encodeBetterBlockAsm4MB:
   6959 	LEAL -8(DI), DI
   6960 	LEAL 8(R11), R11
   6961 	CMPL DI, $0x08
   6962 	JAE  matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB
   6963 	JZ   match_nolit_end_encodeBetterBlockAsm4MB
   6964 
   6965 matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
   6966 	CMPL DI, $0x04
   6967 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
   6968 	MOVL (R8)(R11*1), R10
   6969 	CMPL (R9)(R11*1), R10
   6970 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
   6971 	SUBL $0x04, DI
   6972 	LEAL 4(R11), R11
   6973 
   6974 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
   6975 	CMPL DI, $0x02
   6976 	JB   matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
   6977 	MOVW (R8)(R11*1), R10
   6978 	CMPW (R9)(R11*1), R10
   6979 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
   6980 	SUBL $0x02, DI
   6981 	LEAL 2(R11), R11
   6982 
   6983 matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
   6984 	CMPL DI, $0x01
   6985 	JB   match_nolit_end_encodeBetterBlockAsm4MB
   6986 	MOVB (R8)(R11*1), R10
   6987 	CMPB (R9)(R11*1), R10
   6988 	JNE  match_nolit_end_encodeBetterBlockAsm4MB
   6989 	LEAL 1(R11), R11
   6990 
   6991 match_nolit_end_encodeBetterBlockAsm4MB:
   6992 	MOVL CX, DI
   6993 	SUBL BX, DI
   6994 
   6995 	// Check if repeat
   6996 	CMPL 16(SP), DI
   6997 	JEQ  match_is_repeat_encodeBetterBlockAsm4MB
   6998 	CMPL R11, $0x01
   6999 	JA   match_length_ok_encodeBetterBlockAsm4MB
   7000 	CMPL DI, $0x0000ffff
   7001 	JBE  match_length_ok_encodeBetterBlockAsm4MB
   7002 	MOVL 20(SP), CX
   7003 	INCL CX
   7004 	JMP  search_loop_encodeBetterBlockAsm4MB
   7005 
   7006 match_length_ok_encodeBetterBlockAsm4MB:
   7007 	MOVL DI, 16(SP)
   7008 	MOVL 12(SP), BX
   7009 	CMPL BX, SI
   7010 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
   7011 	MOVL SI, R8
   7012 	MOVL SI, 12(SP)
   7013 	LEAQ (DX)(BX*1), R9
   7014 	SUBL BX, R8
   7015 	LEAL -1(R8), BX
   7016 	CMPL BX, $0x3c
   7017 	JB   one_byte_match_emit_encodeBetterBlockAsm4MB
   7018 	CMPL BX, $0x00000100
   7019 	JB   two_bytes_match_emit_encodeBetterBlockAsm4MB
   7020 	CMPL BX, $0x00010000
   7021 	JB   three_bytes_match_emit_encodeBetterBlockAsm4MB
   7022 	MOVL BX, R10
   7023 	SHRL $0x10, R10
   7024 	MOVB $0xf8, (AX)
   7025 	MOVW BX, 1(AX)
   7026 	MOVB R10, 3(AX)
   7027 	ADDQ $0x04, AX
   7028 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
   7029 
   7030 three_bytes_match_emit_encodeBetterBlockAsm4MB:
   7031 	MOVB $0xf4, (AX)
   7032 	MOVW BX, 1(AX)
   7033 	ADDQ $0x03, AX
   7034 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
   7035 
   7036 two_bytes_match_emit_encodeBetterBlockAsm4MB:
   7037 	MOVB $0xf0, (AX)
   7038 	MOVB BL, 1(AX)
   7039 	ADDQ $0x02, AX
   7040 	CMPL BX, $0x40
   7041 	JB   memmove_match_emit_encodeBetterBlockAsm4MB
   7042 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
   7043 
   7044 one_byte_match_emit_encodeBetterBlockAsm4MB:
   7045 	SHLB $0x02, BL
   7046 	MOVB BL, (AX)
   7047 	ADDQ $0x01, AX
   7048 
   7049 memmove_match_emit_encodeBetterBlockAsm4MB:
   7050 	LEAQ (AX)(R8*1), BX
   7051 
   7052 	// genMemMoveShort
   7053 	CMPQ R8, $0x04
   7054 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
   7055 	CMPQ R8, $0x08
   7056 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
   7057 	CMPQ R8, $0x10
   7058 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
   7059 	CMPQ R8, $0x20
   7060 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
   7061 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
   7062 
   7063 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
   7064 	MOVL (R9), R10
   7065 	MOVL R10, (AX)
   7066 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
   7067 
   7068 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
   7069 	MOVL (R9), R10
   7070 	MOVL -4(R9)(R8*1), R9
   7071 	MOVL R10, (AX)
   7072 	MOVL R9, -4(AX)(R8*1)
   7073 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
   7074 
   7075 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
   7076 	MOVQ (R9), R10
   7077 	MOVQ -8(R9)(R8*1), R9
   7078 	MOVQ R10, (AX)
   7079 	MOVQ R9, -8(AX)(R8*1)
   7080 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
   7081 
   7082 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
   7083 	MOVOU (R9), X0
   7084 	MOVOU -16(R9)(R8*1), X1
   7085 	MOVOU X0, (AX)
   7086 	MOVOU X1, -16(AX)(R8*1)
   7087 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
   7088 
   7089 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
   7090 	MOVOU (R9), X0
   7091 	MOVOU 16(R9), X1
   7092 	MOVOU -32(R9)(R8*1), X2
   7093 	MOVOU -16(R9)(R8*1), X3
   7094 	MOVOU X0, (AX)
   7095 	MOVOU X1, 16(AX)
   7096 	MOVOU X2, -32(AX)(R8*1)
   7097 	MOVOU X3, -16(AX)(R8*1)
   7098 
   7099 memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
   7100 	MOVQ BX, AX
   7101 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
   7102 
   7103 memmove_long_match_emit_encodeBetterBlockAsm4MB:
   7104 	LEAQ (AX)(R8*1), BX
   7105 
   7106 	// genMemMoveLong
   7107 	MOVOU (R9), X0
   7108 	MOVOU 16(R9), X1
   7109 	MOVOU -32(R9)(R8*1), X2
   7110 	MOVOU -16(R9)(R8*1), X3
   7111 	MOVQ  R8, R12
   7112 	SHRQ  $0x05, R12
   7113 	MOVQ  AX, R10
   7114 	ANDL  $0x0000001f, R10
   7115 	MOVQ  $0x00000040, R13
   7116 	SUBQ  R10, R13
   7117 	DECQ  R12
   7118 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
   7119 	LEAQ  -32(R9)(R13*1), R10
   7120 	LEAQ  -32(AX)(R13*1), R14
   7121 
   7122 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
   7123 	MOVOU (R10), X4
   7124 	MOVOU 16(R10), X5
   7125 	MOVOA X4, (R14)
   7126 	MOVOA X5, 16(R14)
   7127 	ADDQ  $0x20, R14
   7128 	ADDQ  $0x20, R10
   7129 	ADDQ  $0x20, R13
   7130 	DECQ  R12
   7131 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
   7132 
   7133 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
   7134 	MOVOU -32(R9)(R13*1), X4
   7135 	MOVOU -16(R9)(R13*1), X5
   7136 	MOVOA X4, -32(AX)(R13*1)
   7137 	MOVOA X5, -16(AX)(R13*1)
   7138 	ADDQ  $0x20, R13
   7139 	CMPQ  R8, R13
   7140 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
   7141 	MOVOU X0, (AX)
   7142 	MOVOU X1, 16(AX)
   7143 	MOVOU X2, -32(AX)(R8*1)
   7144 	MOVOU X3, -16(AX)(R8*1)
   7145 	MOVQ  BX, AX
   7146 
   7147 emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
   7148 	ADDL R11, CX
   7149 	ADDL $0x04, R11
   7150 	MOVL CX, 12(SP)
   7151 
   7152 	// emitCopy
   7153 	CMPL DI, $0x00010000
   7154 	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
   7155 	CMPL R11, $0x40
   7156 	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
   7157 	MOVB $0xff, (AX)
   7158 	MOVL DI, 1(AX)
   7159 	LEAL -64(R11), R11
   7160 	ADDQ $0x05, AX
   7161 	CMPL R11, $0x04
   7162 	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
   7163 
   7164 	// emitRepeat
   7165 	MOVL R11, BX
   7166 	LEAL -4(R11), R11
   7167 	CMPL BX, $0x08
   7168 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
   7169 	CMPL BX, $0x0c
   7170 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
   7171 	CMPL DI, $0x00000800
   7172 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
   7173 
   7174 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
   7175 	CMPL R11, $0x00000104
   7176 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
   7177 	CMPL R11, $0x00010100
   7178 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
   7179 	LEAL -65536(R11), R11
   7180 	MOVL R11, DI
   7181 	MOVW $0x001d, (AX)
   7182 	MOVW R11, 2(AX)
   7183 	SARL $0x10, DI
   7184 	MOVB DI, 4(AX)
   7185 	ADDQ $0x05, AX
   7186 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7187 
   7188 repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
   7189 	LEAL -256(R11), R11
   7190 	MOVW $0x0019, (AX)
   7191 	MOVW R11, 2(AX)
   7192 	ADDQ $0x04, AX
   7193 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7194 
   7195 repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
   7196 	LEAL -4(R11), R11
   7197 	MOVW $0x0015, (AX)
   7198 	MOVB R11, 2(AX)
   7199 	ADDQ $0x03, AX
   7200 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7201 
   7202 repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
   7203 	SHLL $0x02, R11
   7204 	ORL  $0x01, R11
   7205 	MOVW R11, (AX)
   7206 	ADDQ $0x02, AX
   7207 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7208 
   7209 repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
   7210 	XORQ BX, BX
   7211 	LEAL 1(BX)(R11*4), R11
   7212 	MOVB DI, 1(AX)
   7213 	SARL $0x08, DI
   7214 	SHLL $0x05, DI
   7215 	ORL  DI, R11
   7216 	MOVB R11, (AX)
   7217 	ADDQ $0x02, AX
   7218 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7219 
   7220 four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
   7221 	TESTL R11, R11
   7222 	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7223 	XORL  BX, BX
   7224 	LEAL  -1(BX)(R11*4), R11
   7225 	MOVB  R11, (AX)
   7226 	MOVL  DI, 1(AX)
   7227 	ADDQ  $0x05, AX
   7228 	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7229 
   7230 two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
   7231 	CMPL R11, $0x40
   7232 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
   7233 	CMPL DI, $0x00000800
   7234 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm4MB
   7235 	MOVL $0x00000001, BX
   7236 	LEAL 16(BX), BX
   7237 	MOVB DI, 1(AX)
   7238 	SHRL $0x08, DI
   7239 	SHLL $0x05, DI
   7240 	ORL  DI, BX
   7241 	MOVB BL, (AX)
   7242 	ADDQ $0x02, AX
   7243 	SUBL $0x08, R11
   7244 
   7245 	// emitRepeat
   7246 	LEAL -4(R11), R11
   7247 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
   7248 	MOVL R11, BX
   7249 	LEAL -4(R11), R11
   7250 	CMPL BX, $0x08
   7251 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
   7252 	CMPL BX, $0x0c
   7253 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
   7254 	CMPL DI, $0x00000800
   7255 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
   7256 
   7257 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
   7258 	CMPL R11, $0x00000104
   7259 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
   7260 	CMPL R11, $0x00010100
   7261 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
   7262 	LEAL -65536(R11), R11
   7263 	MOVL R11, DI
   7264 	MOVW $0x001d, (AX)
   7265 	MOVW R11, 2(AX)
   7266 	SARL $0x10, DI
   7267 	MOVB DI, 4(AX)
   7268 	ADDQ $0x05, AX
   7269 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7270 
   7271 repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
   7272 	LEAL -256(R11), R11
   7273 	MOVW $0x0019, (AX)
   7274 	MOVW R11, 2(AX)
   7275 	ADDQ $0x04, AX
   7276 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7277 
   7278 repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
   7279 	LEAL -4(R11), R11
   7280 	MOVW $0x0015, (AX)
   7281 	MOVB R11, 2(AX)
   7282 	ADDQ $0x03, AX
   7283 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7284 
   7285 repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
   7286 	SHLL $0x02, R11
   7287 	ORL  $0x01, R11
   7288 	MOVW R11, (AX)
   7289 	ADDQ $0x02, AX
   7290 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7291 
   7292 repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
   7293 	XORQ BX, BX
   7294 	LEAL 1(BX)(R11*4), R11
   7295 	MOVB DI, 1(AX)
   7296 	SARL $0x08, DI
   7297 	SHLL $0x05, DI
   7298 	ORL  DI, R11
   7299 	MOVB R11, (AX)
   7300 	ADDQ $0x02, AX
   7301 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7302 
   7303 long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
   7304 	MOVB $0xee, (AX)
   7305 	MOVW DI, 1(AX)
   7306 	LEAL -60(R11), R11
   7307 	ADDQ $0x03, AX
   7308 
   7309 	// emitRepeat
   7310 	MOVL R11, BX
   7311 	LEAL -4(R11), R11
   7312 	CMPL BX, $0x08
   7313 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
   7314 	CMPL BX, $0x0c
   7315 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
   7316 	CMPL DI, $0x00000800
   7317 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
   7318 
   7319 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
   7320 	CMPL R11, $0x00000104
   7321 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
   7322 	CMPL R11, $0x00010100
   7323 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
   7324 	LEAL -65536(R11), R11
   7325 	MOVL R11, DI
   7326 	MOVW $0x001d, (AX)
   7327 	MOVW R11, 2(AX)
   7328 	SARL $0x10, DI
   7329 	MOVB DI, 4(AX)
   7330 	ADDQ $0x05, AX
   7331 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7332 
   7333 repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
   7334 	LEAL -256(R11), R11
   7335 	MOVW $0x0019, (AX)
   7336 	MOVW R11, 2(AX)
   7337 	ADDQ $0x04, AX
   7338 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7339 
   7340 repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
   7341 	LEAL -4(R11), R11
   7342 	MOVW $0x0015, (AX)
   7343 	MOVB R11, 2(AX)
   7344 	ADDQ $0x03, AX
   7345 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7346 
   7347 repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
   7348 	SHLL $0x02, R11
   7349 	ORL  $0x01, R11
   7350 	MOVW R11, (AX)
   7351 	ADDQ $0x02, AX
   7352 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7353 
   7354 repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
   7355 	XORQ BX, BX
   7356 	LEAL 1(BX)(R11*4), R11
   7357 	MOVB DI, 1(AX)
   7358 	SARL $0x08, DI
   7359 	SHLL $0x05, DI
   7360 	ORL  DI, R11
   7361 	MOVB R11, (AX)
   7362 	ADDQ $0x02, AX
   7363 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7364 
   7365 two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
   7366 	MOVL R11, BX
   7367 	SHLL $0x02, BX
   7368 	CMPL R11, $0x0c
   7369 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
   7370 	CMPL DI, $0x00000800
   7371 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
   7372 	LEAL -15(BX), BX
   7373 	MOVB DI, 1(AX)
   7374 	SHRL $0x08, DI
   7375 	SHLL $0x05, DI
   7376 	ORL  DI, BX
   7377 	MOVB BL, (AX)
   7378 	ADDQ $0x02, AX
   7379 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7380 
   7381 emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
   7382 	LEAL -2(BX), BX
   7383 	MOVB BL, (AX)
   7384 	MOVW DI, 1(AX)
   7385 	ADDQ $0x03, AX
   7386 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7387 
   7388 match_is_repeat_encodeBetterBlockAsm4MB:
   7389 	MOVL 12(SP), BX
   7390 	CMPL BX, SI
   7391 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
   7392 	MOVL SI, R8
   7393 	MOVL SI, 12(SP)
   7394 	LEAQ (DX)(BX*1), R9
   7395 	SUBL BX, R8
   7396 	LEAL -1(R8), BX
   7397 	CMPL BX, $0x3c
   7398 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
   7399 	CMPL BX, $0x00000100
   7400 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
   7401 	CMPL BX, $0x00010000
   7402 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
   7403 	MOVL BX, R10
   7404 	SHRL $0x10, R10
   7405 	MOVB $0xf8, (AX)
   7406 	MOVW BX, 1(AX)
   7407 	MOVB R10, 3(AX)
   7408 	ADDQ $0x04, AX
   7409 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
   7410 
   7411 three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
   7412 	MOVB $0xf4, (AX)
   7413 	MOVW BX, 1(AX)
   7414 	ADDQ $0x03, AX
   7415 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
   7416 
   7417 two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
   7418 	MOVB $0xf0, (AX)
   7419 	MOVB BL, 1(AX)
   7420 	ADDQ $0x02, AX
   7421 	CMPL BX, $0x40
   7422 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm4MB
   7423 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
   7424 
   7425 one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
   7426 	SHLB $0x02, BL
   7427 	MOVB BL, (AX)
   7428 	ADDQ $0x01, AX
   7429 
   7430 memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
   7431 	LEAQ (AX)(R8*1), BX
   7432 
   7433 	// genMemMoveShort
   7434 	CMPQ R8, $0x04
   7435 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
   7436 	CMPQ R8, $0x08
   7437 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
   7438 	CMPQ R8, $0x10
   7439 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
   7440 	CMPQ R8, $0x20
   7441 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
   7442 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
   7443 
   7444 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
   7445 	MOVL (R9), R10
   7446 	MOVL R10, (AX)
   7447 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
   7448 
   7449 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
   7450 	MOVL (R9), R10
   7451 	MOVL -4(R9)(R8*1), R9
   7452 	MOVL R10, (AX)
   7453 	MOVL R9, -4(AX)(R8*1)
   7454 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
   7455 
   7456 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
   7457 	MOVQ (R9), R10
   7458 	MOVQ -8(R9)(R8*1), R9
   7459 	MOVQ R10, (AX)
   7460 	MOVQ R9, -8(AX)(R8*1)
   7461 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
   7462 
   7463 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
   7464 	MOVOU (R9), X0
   7465 	MOVOU -16(R9)(R8*1), X1
   7466 	MOVOU X0, (AX)
   7467 	MOVOU X1, -16(AX)(R8*1)
   7468 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
   7469 
   7470 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
   7471 	MOVOU (R9), X0
   7472 	MOVOU 16(R9), X1
   7473 	MOVOU -32(R9)(R8*1), X2
   7474 	MOVOU -16(R9)(R8*1), X3
   7475 	MOVOU X0, (AX)
   7476 	MOVOU X1, 16(AX)
   7477 	MOVOU X2, -32(AX)(R8*1)
   7478 	MOVOU X3, -16(AX)(R8*1)
   7479 
   7480 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
   7481 	MOVQ BX, AX
   7482 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
   7483 
   7484 memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
   7485 	LEAQ (AX)(R8*1), BX
   7486 
   7487 	// genMemMoveLong
   7488 	MOVOU (R9), X0
   7489 	MOVOU 16(R9), X1
   7490 	MOVOU -32(R9)(R8*1), X2
   7491 	MOVOU -16(R9)(R8*1), X3
   7492 	MOVQ  R8, R12
   7493 	SHRQ  $0x05, R12
   7494 	MOVQ  AX, R10
   7495 	ANDL  $0x0000001f, R10
   7496 	MOVQ  $0x00000040, R13
   7497 	SUBQ  R10, R13
   7498 	DECQ  R12
   7499 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
   7500 	LEAQ  -32(R9)(R13*1), R10
   7501 	LEAQ  -32(AX)(R13*1), R14
   7502 
   7503 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
   7504 	MOVOU (R10), X4
   7505 	MOVOU 16(R10), X5
   7506 	MOVOA X4, (R14)
   7507 	MOVOA X5, 16(R14)
   7508 	ADDQ  $0x20, R14
   7509 	ADDQ  $0x20, R10
   7510 	ADDQ  $0x20, R13
   7511 	DECQ  R12
   7512 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
   7513 
   7514 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
   7515 	MOVOU -32(R9)(R13*1), X4
   7516 	MOVOU -16(R9)(R13*1), X5
   7517 	MOVOA X4, -32(AX)(R13*1)
   7518 	MOVOA X5, -16(AX)(R13*1)
   7519 	ADDQ  $0x20, R13
   7520 	CMPQ  R8, R13
   7521 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
   7522 	MOVOU X0, (AX)
   7523 	MOVOU X1, 16(AX)
   7524 	MOVOU X2, -32(AX)(R8*1)
   7525 	MOVOU X3, -16(AX)(R8*1)
   7526 	MOVQ  BX, AX
   7527 
   7528 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
   7529 	ADDL R11, CX
   7530 	ADDL $0x04, R11
   7531 	MOVL CX, 12(SP)
   7532 
   7533 	// emitRepeat
   7534 	MOVL R11, BX
   7535 	LEAL -4(R11), R11
   7536 	CMPL BX, $0x08
   7537 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
   7538 	CMPL BX, $0x0c
   7539 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
   7540 	CMPL DI, $0x00000800
   7541 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
   7542 
   7543 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
   7544 	CMPL R11, $0x00000104
   7545 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
   7546 	CMPL R11, $0x00010100
   7547 	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
   7548 	LEAL -65536(R11), R11
   7549 	MOVL R11, DI
   7550 	MOVW $0x001d, (AX)
   7551 	MOVW R11, 2(AX)
   7552 	SARL $0x10, DI
   7553 	MOVB DI, 4(AX)
   7554 	ADDQ $0x05, AX
   7555 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7556 
   7557 repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
   7558 	LEAL -256(R11), R11
   7559 	MOVW $0x0019, (AX)
   7560 	MOVW R11, 2(AX)
   7561 	ADDQ $0x04, AX
   7562 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7563 
   7564 repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
   7565 	LEAL -4(R11), R11
   7566 	MOVW $0x0015, (AX)
   7567 	MOVB R11, 2(AX)
   7568 	ADDQ $0x03, AX
   7569 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7570 
   7571 repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
   7572 	SHLL $0x02, R11
   7573 	ORL  $0x01, R11
   7574 	MOVW R11, (AX)
   7575 	ADDQ $0x02, AX
   7576 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
   7577 
   7578 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
   7579 	XORQ BX, BX
   7580 	LEAL 1(BX)(R11*4), R11
   7581 	MOVB DI, 1(AX)
   7582 	SARL $0x08, DI
   7583 	SHLL $0x05, DI
   7584 	ORL  DI, R11
   7585 	MOVB R11, (AX)
   7586 	ADDQ $0x02, AX
   7587 
   7588 match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
   7589 	CMPL CX, 8(SP)
   7590 	JAE  emit_remainder_encodeBetterBlockAsm4MB
   7591 	CMPQ AX, (SP)
   7592 	JB   match_nolit_dst_ok_encodeBetterBlockAsm4MB
   7593 	MOVQ $0x00000000, ret+48(FP)
   7594 	RET
   7595 
   7596 match_nolit_dst_ok_encodeBetterBlockAsm4MB:
   7597 	MOVQ  $0x00cf1bbcdcbfa563, BX
   7598 	MOVQ  $0x9e3779b1, DI
   7599 	LEAQ  1(SI), SI
   7600 	LEAQ  -2(CX), R8
   7601 	MOVQ  (DX)(SI*1), R9
   7602 	MOVQ  1(DX)(SI*1), R10
   7603 	MOVQ  (DX)(R8*1), R11
   7604 	MOVQ  1(DX)(R8*1), R12
   7605 	SHLQ  $0x08, R9
   7606 	IMULQ BX, R9
   7607 	SHRQ  $0x2f, R9
   7608 	SHLQ  $0x20, R10
   7609 	IMULQ DI, R10
   7610 	SHRQ  $0x32, R10
   7611 	SHLQ  $0x08, R11
   7612 	IMULQ BX, R11
   7613 	SHRQ  $0x2f, R11
   7614 	SHLQ  $0x20, R12
   7615 	IMULQ DI, R12
   7616 	SHRQ  $0x32, R12
   7617 	LEAQ  1(SI), DI
   7618 	LEAQ  1(R8), R13
   7619 	MOVL  SI, 24(SP)(R9*4)
   7620 	MOVL  R8, 24(SP)(R11*4)
   7621 	MOVL  DI, 524312(SP)(R10*4)
   7622 	MOVL  R13, 524312(SP)(R12*4)
   7623 	ADDQ  $0x01, SI
   7624 	SUBQ  $0x01, R8
   7625 
   7626 index_loop_encodeBetterBlockAsm4MB:
   7627 	CMPQ  SI, R8
   7628 	JAE   search_loop_encodeBetterBlockAsm4MB
   7629 	MOVQ  (DX)(SI*1), DI
   7630 	MOVQ  (DX)(R8*1), R9
   7631 	SHLQ  $0x08, DI
   7632 	IMULQ BX, DI
   7633 	SHRQ  $0x2f, DI
   7634 	SHLQ  $0x08, R9
   7635 	IMULQ BX, R9
   7636 	SHRQ  $0x2f, R9
   7637 	MOVL  SI, 24(SP)(DI*4)
   7638 	MOVL  R8, 24(SP)(R9*4)
   7639 	ADDQ  $0x02, SI
   7640 	SUBQ  $0x02, R8
   7641 	JMP   index_loop_encodeBetterBlockAsm4MB
   7642 
   7643 emit_remainder_encodeBetterBlockAsm4MB:
   7644 	MOVQ src_len+32(FP), CX
   7645 	SUBL 12(SP), CX
   7646 	LEAQ 4(AX)(CX*1), CX
   7647 	CMPQ CX, (SP)
   7648 	JB   emit_remainder_ok_encodeBetterBlockAsm4MB
   7649 	MOVQ $0x00000000, ret+48(FP)
   7650 	RET
   7651 
   7652 emit_remainder_ok_encodeBetterBlockAsm4MB:
   7653 	MOVQ src_len+32(FP), CX
   7654 	MOVL 12(SP), BX
   7655 	CMPL BX, CX
   7656 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
   7657 	MOVL CX, SI
   7658 	MOVL CX, 12(SP)
   7659 	LEAQ (DX)(BX*1), CX
   7660 	SUBL BX, SI
   7661 	LEAL -1(SI), DX
   7662 	CMPL DX, $0x3c
   7663 	JB   one_byte_emit_remainder_encodeBetterBlockAsm4MB
   7664 	CMPL DX, $0x00000100
   7665 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm4MB
   7666 	CMPL DX, $0x00010000
   7667 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm4MB
   7668 	MOVL DX, BX
   7669 	SHRL $0x10, BX
   7670 	MOVB $0xf8, (AX)
   7671 	MOVW DX, 1(AX)
   7672 	MOVB BL, 3(AX)
   7673 	ADDQ $0x04, AX
   7674 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
   7675 
   7676 three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
   7677 	MOVB $0xf4, (AX)
   7678 	MOVW DX, 1(AX)
   7679 	ADDQ $0x03, AX
   7680 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
   7681 
   7682 two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
   7683 	MOVB $0xf0, (AX)
   7684 	MOVB DL, 1(AX)
   7685 	ADDQ $0x02, AX
   7686 	CMPL DX, $0x40
   7687 	JB   memmove_emit_remainder_encodeBetterBlockAsm4MB
   7688 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
   7689 
   7690 one_byte_emit_remainder_encodeBetterBlockAsm4MB:
   7691 	SHLB $0x02, DL
   7692 	MOVB DL, (AX)
   7693 	ADDQ $0x01, AX
   7694 
   7695 memmove_emit_remainder_encodeBetterBlockAsm4MB:
   7696 	LEAQ (AX)(SI*1), DX
   7697 	MOVL SI, BX
   7698 
   7699 	// genMemMoveShort
   7700 	CMPQ BX, $0x03
   7701 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2
   7702 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3
   7703 	CMPQ BX, $0x08
   7704 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
   7705 	CMPQ BX, $0x10
   7706 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
   7707 	CMPQ BX, $0x20
   7708 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
   7709 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
   7710 
   7711 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
   7712 	MOVB (CX), SI
   7713 	MOVB -1(CX)(BX*1), CL
   7714 	MOVB SI, (AX)
   7715 	MOVB CL, -1(AX)(BX*1)
   7716 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
   7717 
   7718 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
   7719 	MOVW (CX), SI
   7720 	MOVB 2(CX), CL
   7721 	MOVW SI, (AX)
   7722 	MOVB CL, 2(AX)
   7723 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
   7724 
   7725 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
   7726 	MOVL (CX), SI
   7727 	MOVL -4(CX)(BX*1), CX
   7728 	MOVL SI, (AX)
   7729 	MOVL CX, -4(AX)(BX*1)
   7730 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
   7731 
   7732 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
   7733 	MOVQ (CX), SI
   7734 	MOVQ -8(CX)(BX*1), CX
   7735 	MOVQ SI, (AX)
   7736 	MOVQ CX, -8(AX)(BX*1)
   7737 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
   7738 
   7739 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
   7740 	MOVOU (CX), X0
   7741 	MOVOU -16(CX)(BX*1), X1
   7742 	MOVOU X0, (AX)
   7743 	MOVOU X1, -16(AX)(BX*1)
   7744 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
   7745 
   7746 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
   7747 	MOVOU (CX), X0
   7748 	MOVOU 16(CX), X1
   7749 	MOVOU -32(CX)(BX*1), X2
   7750 	MOVOU -16(CX)(BX*1), X3
   7751 	MOVOU X0, (AX)
   7752 	MOVOU X1, 16(AX)
   7753 	MOVOU X2, -32(AX)(BX*1)
   7754 	MOVOU X3, -16(AX)(BX*1)
   7755 
   7756 memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
   7757 	MOVQ DX, AX
   7758 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
   7759 
   7760 memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
   7761 	LEAQ (AX)(SI*1), DX
   7762 	MOVL SI, BX
   7763 
   7764 	// genMemMoveLong
   7765 	MOVOU (CX), X0
   7766 	MOVOU 16(CX), X1
   7767 	MOVOU -32(CX)(BX*1), X2
   7768 	MOVOU -16(CX)(BX*1), X3
   7769 	MOVQ  BX, DI
   7770 	SHRQ  $0x05, DI
   7771 	MOVQ  AX, SI
   7772 	ANDL  $0x0000001f, SI
   7773 	MOVQ  $0x00000040, R8
   7774 	SUBQ  SI, R8
   7775 	DECQ  DI
   7776 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
   7777 	LEAQ  -32(CX)(R8*1), SI
   7778 	LEAQ  -32(AX)(R8*1), R9
   7779 
   7780 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
   7781 	MOVOU (SI), X4
   7782 	MOVOU 16(SI), X5
   7783 	MOVOA X4, (R9)
   7784 	MOVOA X5, 16(R9)
   7785 	ADDQ  $0x20, R9
   7786 	ADDQ  $0x20, SI
   7787 	ADDQ  $0x20, R8
   7788 	DECQ  DI
   7789 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
   7790 
   7791 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
   7792 	MOVOU -32(CX)(R8*1), X4
   7793 	MOVOU -16(CX)(R8*1), X5
   7794 	MOVOA X4, -32(AX)(R8*1)
   7795 	MOVOA X5, -16(AX)(R8*1)
   7796 	ADDQ  $0x20, R8
   7797 	CMPQ  BX, R8
   7798 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
   7799 	MOVOU X0, (AX)
   7800 	MOVOU X1, 16(AX)
   7801 	MOVOU X2, -32(AX)(BX*1)
   7802 	MOVOU X3, -16(AX)(BX*1)
   7803 	MOVQ  DX, AX
   7804 
   7805 emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
   7806 	MOVQ dst_base+0(FP), CX
   7807 	SUBQ CX, AX
   7808 	MOVQ AX, ret+48(FP)
   7809 	RET
   7810 
   7811 // func encodeBetterBlockAsm12B(dst []byte, src []byte) int
   7812 // Requires: BMI, SSE2
   7813 TEXT ·encodeBetterBlockAsm12B(SB), $81944-56
   7814 	MOVQ dst_base+0(FP), AX
   7815 	MOVQ $0x00000280, CX
   7816 	LEAQ 24(SP), DX
   7817 	PXOR X0, X0
   7818 
   7819 zero_loop_encodeBetterBlockAsm12B:
   7820 	MOVOU X0, (DX)
   7821 	MOVOU X0, 16(DX)
   7822 	MOVOU X0, 32(DX)
   7823 	MOVOU X0, 48(DX)
   7824 	MOVOU X0, 64(DX)
   7825 	MOVOU X0, 80(DX)
   7826 	MOVOU X0, 96(DX)
   7827 	MOVOU X0, 112(DX)
   7828 	ADDQ  $0x80, DX
   7829 	DECQ  CX
   7830 	JNZ   zero_loop_encodeBetterBlockAsm12B
   7831 	MOVL  $0x00000000, 12(SP)
   7832 	MOVQ  src_len+32(FP), CX
   7833 	LEAQ  -6(CX), DX
   7834 	LEAQ  -8(CX), BX
   7835 	MOVL  BX, 8(SP)
   7836 	SHRQ  $0x05, CX
   7837 	SUBL  CX, DX
   7838 	LEAQ  (AX)(DX*1), DX
   7839 	MOVQ  DX, (SP)
   7840 	MOVL  $0x00000001, CX
   7841 	MOVL  $0x00000000, 16(SP)
   7842 	MOVQ  src_base+24(FP), DX
   7843 
   7844 search_loop_encodeBetterBlockAsm12B:
   7845 	MOVL  CX, BX
   7846 	SUBL  12(SP), BX
   7847 	SHRL  $0x06, BX
   7848 	LEAL  1(CX)(BX*1), BX
   7849 	CMPL  BX, 8(SP)
   7850 	JAE   emit_remainder_encodeBetterBlockAsm12B
   7851 	MOVQ  (DX)(CX*1), SI
   7852 	MOVL  BX, 20(SP)
   7853 	MOVQ  $0x0000cf1bbcdcbf9b, R8
   7854 	MOVQ  $0x9e3779b1, BX
   7855 	MOVQ  SI, R9
   7856 	MOVQ  SI, R10
   7857 	SHLQ  $0x10, R9
   7858 	IMULQ R8, R9
   7859 	SHRQ  $0x32, R9
   7860 	SHLQ  $0x20, R10
   7861 	IMULQ BX, R10
   7862 	SHRQ  $0x34, R10
   7863 	MOVL  24(SP)(R9*4), BX
   7864 	MOVL  65560(SP)(R10*4), DI
   7865 	MOVL  CX, 24(SP)(R9*4)
   7866 	MOVL  CX, 65560(SP)(R10*4)
   7867 	MOVQ  (DX)(BX*1), R9
   7868 	MOVQ  (DX)(DI*1), R10
   7869 	CMPQ  R9, SI
   7870 	JEQ   candidate_match_encodeBetterBlockAsm12B
   7871 	CMPQ  R10, SI
   7872 	JNE   no_short_found_encodeBetterBlockAsm12B
   7873 	MOVL  DI, BX
   7874 	JMP   candidate_match_encodeBetterBlockAsm12B
   7875 
   7876 no_short_found_encodeBetterBlockAsm12B:
   7877 	CMPL R9, SI
   7878 	JEQ  candidate_match_encodeBetterBlockAsm12B
   7879 	CMPL R10, SI
   7880 	JEQ  candidateS_match_encodeBetterBlockAsm12B
   7881 	MOVL 20(SP), CX
   7882 	JMP  search_loop_encodeBetterBlockAsm12B
   7883 
   7884 candidateS_match_encodeBetterBlockAsm12B:
   7885 	SHRQ  $0x08, SI
   7886 	MOVQ  SI, R9
   7887 	SHLQ  $0x10, R9
   7888 	IMULQ R8, R9
   7889 	SHRQ  $0x32, R9
   7890 	MOVL  24(SP)(R9*4), BX
   7891 	INCL  CX
   7892 	MOVL  CX, 24(SP)(R9*4)
   7893 	CMPL  (DX)(BX*1), SI
   7894 	JEQ   candidate_match_encodeBetterBlockAsm12B
   7895 	DECL  CX
   7896 	MOVL  DI, BX
   7897 
   7898 candidate_match_encodeBetterBlockAsm12B:
   7899 	MOVL  12(SP), SI
   7900 	TESTL BX, BX
   7901 	JZ    match_extend_back_end_encodeBetterBlockAsm12B
   7902 
   7903 match_extend_back_loop_encodeBetterBlockAsm12B:
   7904 	CMPL CX, SI
   7905 	JBE  match_extend_back_end_encodeBetterBlockAsm12B
   7906 	MOVB -1(DX)(BX*1), DI
   7907 	MOVB -1(DX)(CX*1), R8
   7908 	CMPB DI, R8
   7909 	JNE  match_extend_back_end_encodeBetterBlockAsm12B
   7910 	LEAL -1(CX), CX
   7911 	DECL BX
   7912 	JZ   match_extend_back_end_encodeBetterBlockAsm12B
   7913 	JMP  match_extend_back_loop_encodeBetterBlockAsm12B
   7914 
   7915 match_extend_back_end_encodeBetterBlockAsm12B:
   7916 	MOVL CX, SI
   7917 	SUBL 12(SP), SI
   7918 	LEAQ 3(AX)(SI*1), SI
   7919 	CMPQ SI, (SP)
   7920 	JB   match_dst_size_check_encodeBetterBlockAsm12B
   7921 	MOVQ $0x00000000, ret+48(FP)
   7922 	RET
   7923 
   7924 match_dst_size_check_encodeBetterBlockAsm12B:
   7925 	MOVL CX, SI
   7926 	ADDL $0x04, CX
   7927 	ADDL $0x04, BX
   7928 	MOVQ src_len+32(FP), DI
   7929 	SUBL CX, DI
   7930 	LEAQ (DX)(CX*1), R8
   7931 	LEAQ (DX)(BX*1), R9
   7932 
   7933 	// matchLen
   7934 	XORL R11, R11
   7935 	CMPL DI, $0x08
   7936 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm12B
   7937 
   7938 matchlen_loopback_match_nolit_encodeBetterBlockAsm12B:
   7939 	MOVQ  (R8)(R11*1), R10
   7940 	XORQ  (R9)(R11*1), R10
   7941 	TESTQ R10, R10
   7942 	JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm12B
   7943 
   7944 #ifdef GOAMD64_v3
   7945 	TZCNTQ R10, R10
   7946 
   7947 #else
   7948 	BSFQ R10, R10
   7949 
   7950 #endif
   7951 	SARQ $0x03, R10
   7952 	LEAL (R11)(R10*1), R11
   7953 	JMP  match_nolit_end_encodeBetterBlockAsm12B
   7954 
   7955 matchlen_loop_match_nolit_encodeBetterBlockAsm12B:
   7956 	LEAL -8(DI), DI
   7957 	LEAL 8(R11), R11
   7958 	CMPL DI, $0x08
   7959 	JAE  matchlen_loopback_match_nolit_encodeBetterBlockAsm12B
   7960 	JZ   match_nolit_end_encodeBetterBlockAsm12B
   7961 
   7962 matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
   7963 	CMPL DI, $0x04
   7964 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm12B
   7965 	MOVL (R8)(R11*1), R10
   7966 	CMPL (R9)(R11*1), R10
   7967 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm12B
   7968 	SUBL $0x04, DI
   7969 	LEAL 4(R11), R11
   7970 
   7971 matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
   7972 	CMPL DI, $0x02
   7973 	JB   matchlen_match1_match_nolit_encodeBetterBlockAsm12B
   7974 	MOVW (R8)(R11*1), R10
   7975 	CMPW (R9)(R11*1), R10
   7976 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm12B
   7977 	SUBL $0x02, DI
   7978 	LEAL 2(R11), R11
   7979 
   7980 matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
   7981 	CMPL DI, $0x01
   7982 	JB   match_nolit_end_encodeBetterBlockAsm12B
   7983 	MOVB (R8)(R11*1), R10
   7984 	CMPB (R9)(R11*1), R10
   7985 	JNE  match_nolit_end_encodeBetterBlockAsm12B
   7986 	LEAL 1(R11), R11
   7987 
   7988 match_nolit_end_encodeBetterBlockAsm12B:
   7989 	MOVL CX, DI
   7990 	SUBL BX, DI
   7991 
   7992 	// Check if repeat
   7993 	CMPL 16(SP), DI
   7994 	JEQ  match_is_repeat_encodeBetterBlockAsm12B
   7995 	MOVL DI, 16(SP)
   7996 	MOVL 12(SP), BX
   7997 	CMPL BX, SI
   7998 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm12B
   7999 	MOVL SI, R8
   8000 	MOVL SI, 12(SP)
   8001 	LEAQ (DX)(BX*1), R9
   8002 	SUBL BX, R8
   8003 	LEAL -1(R8), BX
   8004 	CMPL BX, $0x3c
   8005 	JB   one_byte_match_emit_encodeBetterBlockAsm12B
   8006 	CMPL BX, $0x00000100
   8007 	JB   two_bytes_match_emit_encodeBetterBlockAsm12B
   8008 	JB   three_bytes_match_emit_encodeBetterBlockAsm12B
   8009 
   8010 three_bytes_match_emit_encodeBetterBlockAsm12B:
   8011 	MOVB $0xf4, (AX)
   8012 	MOVW BX, 1(AX)
   8013 	ADDQ $0x03, AX
   8014 	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
   8015 
   8016 two_bytes_match_emit_encodeBetterBlockAsm12B:
   8017 	MOVB $0xf0, (AX)
   8018 	MOVB BL, 1(AX)
   8019 	ADDQ $0x02, AX
   8020 	CMPL BX, $0x40
   8021 	JB   memmove_match_emit_encodeBetterBlockAsm12B
   8022 	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
   8023 
   8024 one_byte_match_emit_encodeBetterBlockAsm12B:
   8025 	SHLB $0x02, BL
   8026 	MOVB BL, (AX)
   8027 	ADDQ $0x01, AX
   8028 
   8029 memmove_match_emit_encodeBetterBlockAsm12B:
   8030 	LEAQ (AX)(R8*1), BX
   8031 
   8032 	// genMemMoveShort
   8033 	CMPQ R8, $0x04
   8034 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
   8035 	CMPQ R8, $0x08
   8036 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
   8037 	CMPQ R8, $0x10
   8038 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
   8039 	CMPQ R8, $0x20
   8040 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
   8041 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
   8042 
   8043 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
   8044 	MOVL (R9), R10
   8045 	MOVL R10, (AX)
   8046 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
   8047 
   8048 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
   8049 	MOVL (R9), R10
   8050 	MOVL -4(R9)(R8*1), R9
   8051 	MOVL R10, (AX)
   8052 	MOVL R9, -4(AX)(R8*1)
   8053 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
   8054 
   8055 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
   8056 	MOVQ (R9), R10
   8057 	MOVQ -8(R9)(R8*1), R9
   8058 	MOVQ R10, (AX)
   8059 	MOVQ R9, -8(AX)(R8*1)
   8060 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
   8061 
   8062 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
   8063 	MOVOU (R9), X0
   8064 	MOVOU -16(R9)(R8*1), X1
   8065 	MOVOU X0, (AX)
   8066 	MOVOU X1, -16(AX)(R8*1)
   8067 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm12B
   8068 
   8069 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
   8070 	MOVOU (R9), X0
   8071 	MOVOU 16(R9), X1
   8072 	MOVOU -32(R9)(R8*1), X2
   8073 	MOVOU -16(R9)(R8*1), X3
   8074 	MOVOU X0, (AX)
   8075 	MOVOU X1, 16(AX)
   8076 	MOVOU X2, -32(AX)(R8*1)
   8077 	MOVOU X3, -16(AX)(R8*1)
   8078 
   8079 memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
   8080 	MOVQ BX, AX
   8081 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm12B
   8082 
   8083 memmove_long_match_emit_encodeBetterBlockAsm12B:
   8084 	LEAQ (AX)(R8*1), BX
   8085 
   8086 	// genMemMoveLong
   8087 	MOVOU (R9), X0
   8088 	MOVOU 16(R9), X1
   8089 	MOVOU -32(R9)(R8*1), X2
   8090 	MOVOU -16(R9)(R8*1), X3
   8091 	MOVQ  R8, R12
   8092 	SHRQ  $0x05, R12
   8093 	MOVQ  AX, R10
   8094 	ANDL  $0x0000001f, R10
   8095 	MOVQ  $0x00000040, R13
   8096 	SUBQ  R10, R13
   8097 	DECQ  R12
   8098 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
   8099 	LEAQ  -32(R9)(R13*1), R10
   8100 	LEAQ  -32(AX)(R13*1), R14
   8101 
   8102 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
   8103 	MOVOU (R10), X4
   8104 	MOVOU 16(R10), X5
   8105 	MOVOA X4, (R14)
   8106 	MOVOA X5, 16(R14)
   8107 	ADDQ  $0x20, R14
   8108 	ADDQ  $0x20, R10
   8109 	ADDQ  $0x20, R13
   8110 	DECQ  R12
   8111 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
   8112 
   8113 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
   8114 	MOVOU -32(R9)(R13*1), X4
   8115 	MOVOU -16(R9)(R13*1), X5
   8116 	MOVOA X4, -32(AX)(R13*1)
   8117 	MOVOA X5, -16(AX)(R13*1)
   8118 	ADDQ  $0x20, R13
   8119 	CMPQ  R8, R13
   8120 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
   8121 	MOVOU X0, (AX)
   8122 	MOVOU X1, 16(AX)
   8123 	MOVOU X2, -32(AX)(R8*1)
   8124 	MOVOU X3, -16(AX)(R8*1)
   8125 	MOVQ  BX, AX
   8126 
   8127 emit_literal_done_match_emit_encodeBetterBlockAsm12B:
   8128 	ADDL R11, CX
   8129 	ADDL $0x04, R11
   8130 	MOVL CX, 12(SP)
   8131 
   8132 	// emitCopy
   8133 	CMPL R11, $0x40
   8134 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
   8135 	CMPL DI, $0x00000800
   8136 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm12B
   8137 	MOVL $0x00000001, BX
   8138 	LEAL 16(BX), BX
   8139 	MOVB DI, 1(AX)
   8140 	SHRL $0x08, DI
   8141 	SHLL $0x05, DI
   8142 	ORL  DI, BX
   8143 	MOVB BL, (AX)
   8144 	ADDQ $0x02, AX
   8145 	SUBL $0x08, R11
   8146 
   8147 	// emitRepeat
   8148 	LEAL -4(R11), R11
   8149 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
   8150 	MOVL R11, BX
   8151 	LEAL -4(R11), R11
   8152 	CMPL BX, $0x08
   8153 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
   8154 	CMPL BX, $0x0c
   8155 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
   8156 	CMPL DI, $0x00000800
   8157 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
   8158 
   8159 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
   8160 	CMPL R11, $0x00000104
   8161 	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
   8162 	LEAL -256(R11), R11
   8163 	MOVW $0x0019, (AX)
   8164 	MOVW R11, 2(AX)
   8165 	ADDQ $0x04, AX
   8166 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8167 
   8168 repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
   8169 	LEAL -4(R11), R11
   8170 	MOVW $0x0015, (AX)
   8171 	MOVB R11, 2(AX)
   8172 	ADDQ $0x03, AX
   8173 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8174 
   8175 repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
   8176 	SHLL $0x02, R11
   8177 	ORL  $0x01, R11
   8178 	MOVW R11, (AX)
   8179 	ADDQ $0x02, AX
   8180 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8181 
   8182 repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
   8183 	XORQ BX, BX
   8184 	LEAL 1(BX)(R11*4), R11
   8185 	MOVB DI, 1(AX)
   8186 	SARL $0x08, DI
   8187 	SHLL $0x05, DI
   8188 	ORL  DI, R11
   8189 	MOVB R11, (AX)
   8190 	ADDQ $0x02, AX
   8191 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8192 
   8193 long_offset_short_match_nolit_encodeBetterBlockAsm12B:
   8194 	MOVB $0xee, (AX)
   8195 	MOVW DI, 1(AX)
   8196 	LEAL -60(R11), R11
   8197 	ADDQ $0x03, AX
   8198 
   8199 	// emitRepeat
   8200 	MOVL R11, BX
   8201 	LEAL -4(R11), R11
   8202 	CMPL BX, $0x08
   8203 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
   8204 	CMPL BX, $0x0c
   8205 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
   8206 	CMPL DI, $0x00000800
   8207 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
   8208 
   8209 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
   8210 	CMPL R11, $0x00000104
   8211 	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
   8212 	LEAL -256(R11), R11
   8213 	MOVW $0x0019, (AX)
   8214 	MOVW R11, 2(AX)
   8215 	ADDQ $0x04, AX
   8216 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8217 
   8218 repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
   8219 	LEAL -4(R11), R11
   8220 	MOVW $0x0015, (AX)
   8221 	MOVB R11, 2(AX)
   8222 	ADDQ $0x03, AX
   8223 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8224 
   8225 repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
   8226 	SHLL $0x02, R11
   8227 	ORL  $0x01, R11
   8228 	MOVW R11, (AX)
   8229 	ADDQ $0x02, AX
   8230 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8231 
   8232 repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
   8233 	XORQ BX, BX
   8234 	LEAL 1(BX)(R11*4), R11
   8235 	MOVB DI, 1(AX)
   8236 	SARL $0x08, DI
   8237 	SHLL $0x05, DI
   8238 	ORL  DI, R11
   8239 	MOVB R11, (AX)
   8240 	ADDQ $0x02, AX
   8241 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8242 
   8243 two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
   8244 	MOVL R11, BX
   8245 	SHLL $0x02, BX
   8246 	CMPL R11, $0x0c
   8247 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
   8248 	CMPL DI, $0x00000800
   8249 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
   8250 	LEAL -15(BX), BX
   8251 	MOVB DI, 1(AX)
   8252 	SHRL $0x08, DI
   8253 	SHLL $0x05, DI
   8254 	ORL  DI, BX
   8255 	MOVB BL, (AX)
   8256 	ADDQ $0x02, AX
   8257 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8258 
   8259 emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
   8260 	LEAL -2(BX), BX
   8261 	MOVB BL, (AX)
   8262 	MOVW DI, 1(AX)
   8263 	ADDQ $0x03, AX
   8264 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8265 
   8266 match_is_repeat_encodeBetterBlockAsm12B:
   8267 	MOVL 12(SP), BX
   8268 	CMPL BX, SI
   8269 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
   8270 	MOVL SI, R8
   8271 	MOVL SI, 12(SP)
   8272 	LEAQ (DX)(BX*1), R9
   8273 	SUBL BX, R8
   8274 	LEAL -1(R8), BX
   8275 	CMPL BX, $0x3c
   8276 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm12B
   8277 	CMPL BX, $0x00000100
   8278 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
   8279 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
   8280 
   8281 three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
   8282 	MOVB $0xf4, (AX)
   8283 	MOVW BX, 1(AX)
   8284 	ADDQ $0x03, AX
   8285 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
   8286 
   8287 two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
   8288 	MOVB $0xf0, (AX)
   8289 	MOVB BL, 1(AX)
   8290 	ADDQ $0x02, AX
   8291 	CMPL BX, $0x40
   8292 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm12B
   8293 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
   8294 
   8295 one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
   8296 	SHLB $0x02, BL
   8297 	MOVB BL, (AX)
   8298 	ADDQ $0x01, AX
   8299 
   8300 memmove_match_emit_repeat_encodeBetterBlockAsm12B:
   8301 	LEAQ (AX)(R8*1), BX
   8302 
   8303 	// genMemMoveShort
   8304 	CMPQ R8, $0x04
   8305 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
   8306 	CMPQ R8, $0x08
   8307 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
   8308 	CMPQ R8, $0x10
   8309 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
   8310 	CMPQ R8, $0x20
   8311 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
   8312 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
   8313 
   8314 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
   8315 	MOVL (R9), R10
   8316 	MOVL R10, (AX)
   8317 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
   8318 
   8319 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
   8320 	MOVL (R9), R10
   8321 	MOVL -4(R9)(R8*1), R9
   8322 	MOVL R10, (AX)
   8323 	MOVL R9, -4(AX)(R8*1)
   8324 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
   8325 
   8326 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
   8327 	MOVQ (R9), R10
   8328 	MOVQ -8(R9)(R8*1), R9
   8329 	MOVQ R10, (AX)
   8330 	MOVQ R9, -8(AX)(R8*1)
   8331 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
   8332 
   8333 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
   8334 	MOVOU (R9), X0
   8335 	MOVOU -16(R9)(R8*1), X1
   8336 	MOVOU X0, (AX)
   8337 	MOVOU X1, -16(AX)(R8*1)
   8338 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
   8339 
   8340 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
   8341 	MOVOU (R9), X0
   8342 	MOVOU 16(R9), X1
   8343 	MOVOU -32(R9)(R8*1), X2
   8344 	MOVOU -16(R9)(R8*1), X3
   8345 	MOVOU X0, (AX)
   8346 	MOVOU X1, 16(AX)
   8347 	MOVOU X2, -32(AX)(R8*1)
   8348 	MOVOU X3, -16(AX)(R8*1)
   8349 
   8350 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
   8351 	MOVQ BX, AX
   8352 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
   8353 
   8354 memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
   8355 	LEAQ (AX)(R8*1), BX
   8356 
   8357 	// genMemMoveLong
   8358 	MOVOU (R9), X0
   8359 	MOVOU 16(R9), X1
   8360 	MOVOU -32(R9)(R8*1), X2
   8361 	MOVOU -16(R9)(R8*1), X3
   8362 	MOVQ  R8, R12
   8363 	SHRQ  $0x05, R12
   8364 	MOVQ  AX, R10
   8365 	ANDL  $0x0000001f, R10
   8366 	MOVQ  $0x00000040, R13
   8367 	SUBQ  R10, R13
   8368 	DECQ  R12
   8369 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
   8370 	LEAQ  -32(R9)(R13*1), R10
   8371 	LEAQ  -32(AX)(R13*1), R14
   8372 
   8373 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
   8374 	MOVOU (R10), X4
   8375 	MOVOU 16(R10), X5
   8376 	MOVOA X4, (R14)
   8377 	MOVOA X5, 16(R14)
   8378 	ADDQ  $0x20, R14
   8379 	ADDQ  $0x20, R10
   8380 	ADDQ  $0x20, R13
   8381 	DECQ  R12
   8382 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
   8383 
   8384 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
   8385 	MOVOU -32(R9)(R13*1), X4
   8386 	MOVOU -16(R9)(R13*1), X5
   8387 	MOVOA X4, -32(AX)(R13*1)
   8388 	MOVOA X5, -16(AX)(R13*1)
   8389 	ADDQ  $0x20, R13
   8390 	CMPQ  R8, R13
   8391 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
   8392 	MOVOU X0, (AX)
   8393 	MOVOU X1, 16(AX)
   8394 	MOVOU X2, -32(AX)(R8*1)
   8395 	MOVOU X3, -16(AX)(R8*1)
   8396 	MOVQ  BX, AX
   8397 
   8398 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
   8399 	ADDL R11, CX
   8400 	ADDL $0x04, R11
   8401 	MOVL CX, 12(SP)
   8402 
   8403 	// emitRepeat
   8404 	MOVL R11, BX
   8405 	LEAL -4(R11), R11
   8406 	CMPL BX, $0x08
   8407 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
   8408 	CMPL BX, $0x0c
   8409 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
   8410 	CMPL DI, $0x00000800
   8411 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
   8412 
   8413 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
   8414 	CMPL R11, $0x00000104
   8415 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
   8416 	LEAL -256(R11), R11
   8417 	MOVW $0x0019, (AX)
   8418 	MOVW R11, 2(AX)
   8419 	ADDQ $0x04, AX
   8420 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8421 
   8422 repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
   8423 	LEAL -4(R11), R11
   8424 	MOVW $0x0015, (AX)
   8425 	MOVB R11, 2(AX)
   8426 	ADDQ $0x03, AX
   8427 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8428 
   8429 repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
   8430 	SHLL $0x02, R11
   8431 	ORL  $0x01, R11
   8432 	MOVW R11, (AX)
   8433 	ADDQ $0x02, AX
   8434 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
   8435 
   8436 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
   8437 	XORQ BX, BX
   8438 	LEAL 1(BX)(R11*4), R11
   8439 	MOVB DI, 1(AX)
   8440 	SARL $0x08, DI
   8441 	SHLL $0x05, DI
   8442 	ORL  DI, R11
   8443 	MOVB R11, (AX)
   8444 	ADDQ $0x02, AX
   8445 
   8446 match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
   8447 	CMPL CX, 8(SP)
   8448 	JAE  emit_remainder_encodeBetterBlockAsm12B
   8449 	CMPQ AX, (SP)
   8450 	JB   match_nolit_dst_ok_encodeBetterBlockAsm12B
   8451 	MOVQ $0x00000000, ret+48(FP)
   8452 	RET
   8453 
   8454 match_nolit_dst_ok_encodeBetterBlockAsm12B:
   8455 	MOVQ  $0x0000cf1bbcdcbf9b, BX
   8456 	MOVQ  $0x9e3779b1, DI
   8457 	LEAQ  1(SI), SI
   8458 	LEAQ  -2(CX), R8
   8459 	MOVQ  (DX)(SI*1), R9
   8460 	MOVQ  1(DX)(SI*1), R10
   8461 	MOVQ  (DX)(R8*1), R11
   8462 	MOVQ  1(DX)(R8*1), R12
   8463 	SHLQ  $0x10, R9
   8464 	IMULQ BX, R9
   8465 	SHRQ  $0x32, R9
   8466 	SHLQ  $0x20, R10
   8467 	IMULQ DI, R10
   8468 	SHRQ  $0x34, R10
   8469 	SHLQ  $0x10, R11
   8470 	IMULQ BX, R11
   8471 	SHRQ  $0x32, R11
   8472 	SHLQ  $0x20, R12
   8473 	IMULQ DI, R12
   8474 	SHRQ  $0x34, R12
   8475 	LEAQ  1(SI), DI
   8476 	LEAQ  1(R8), R13
   8477 	MOVL  SI, 24(SP)(R9*4)
   8478 	MOVL  R8, 24(SP)(R11*4)
   8479 	MOVL  DI, 65560(SP)(R10*4)
   8480 	MOVL  R13, 65560(SP)(R12*4)
   8481 	ADDQ  $0x01, SI
   8482 	SUBQ  $0x01, R8
   8483 
   8484 index_loop_encodeBetterBlockAsm12B:
   8485 	CMPQ  SI, R8
   8486 	JAE   search_loop_encodeBetterBlockAsm12B
   8487 	MOVQ  (DX)(SI*1), DI
   8488 	MOVQ  (DX)(R8*1), R9
   8489 	SHLQ  $0x10, DI
   8490 	IMULQ BX, DI
   8491 	SHRQ  $0x32, DI
   8492 	SHLQ  $0x10, R9
   8493 	IMULQ BX, R9
   8494 	SHRQ  $0x32, R9
   8495 	MOVL  SI, 24(SP)(DI*4)
   8496 	MOVL  R8, 24(SP)(R9*4)
   8497 	ADDQ  $0x02, SI
   8498 	SUBQ  $0x02, R8
   8499 	JMP   index_loop_encodeBetterBlockAsm12B
   8500 
   8501 emit_remainder_encodeBetterBlockAsm12B:
   8502 	MOVQ src_len+32(FP), CX
   8503 	SUBL 12(SP), CX
   8504 	LEAQ 3(AX)(CX*1), CX
   8505 	CMPQ CX, (SP)
   8506 	JB   emit_remainder_ok_encodeBetterBlockAsm12B
   8507 	MOVQ $0x00000000, ret+48(FP)
   8508 	RET
   8509 
   8510 emit_remainder_ok_encodeBetterBlockAsm12B:
   8511 	MOVQ src_len+32(FP), CX
   8512 	MOVL 12(SP), BX
   8513 	CMPL BX, CX
   8514 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
   8515 	MOVL CX, SI
   8516 	MOVL CX, 12(SP)
   8517 	LEAQ (DX)(BX*1), CX
   8518 	SUBL BX, SI
   8519 	LEAL -1(SI), DX
   8520 	CMPL DX, $0x3c
   8521 	JB   one_byte_emit_remainder_encodeBetterBlockAsm12B
   8522 	CMPL DX, $0x00000100
   8523 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm12B
   8524 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm12B
   8525 
   8526 three_bytes_emit_remainder_encodeBetterBlockAsm12B:
   8527 	MOVB $0xf4, (AX)
   8528 	MOVW DX, 1(AX)
   8529 	ADDQ $0x03, AX
   8530 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
   8531 
   8532 two_bytes_emit_remainder_encodeBetterBlockAsm12B:
   8533 	MOVB $0xf0, (AX)
   8534 	MOVB DL, 1(AX)
   8535 	ADDQ $0x02, AX
   8536 	CMPL DX, $0x40
   8537 	JB   memmove_emit_remainder_encodeBetterBlockAsm12B
   8538 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
   8539 
   8540 one_byte_emit_remainder_encodeBetterBlockAsm12B:
   8541 	SHLB $0x02, DL
   8542 	MOVB DL, (AX)
   8543 	ADDQ $0x01, AX
   8544 
   8545 memmove_emit_remainder_encodeBetterBlockAsm12B:
   8546 	LEAQ (AX)(SI*1), DX
   8547 	MOVL SI, BX
   8548 
   8549 	// genMemMoveShort
   8550 	CMPQ BX, $0x03
   8551 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2
   8552 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3
   8553 	CMPQ BX, $0x08
   8554 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
   8555 	CMPQ BX, $0x10
   8556 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
   8557 	CMPQ BX, $0x20
   8558 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
   8559 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
   8560 
   8561 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
   8562 	MOVB (CX), SI
   8563 	MOVB -1(CX)(BX*1), CL
   8564 	MOVB SI, (AX)
   8565 	MOVB CL, -1(AX)(BX*1)
   8566 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
   8567 
   8568 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
   8569 	MOVW (CX), SI
   8570 	MOVB 2(CX), CL
   8571 	MOVW SI, (AX)
   8572 	MOVB CL, 2(AX)
   8573 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
   8574 
   8575 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
   8576 	MOVL (CX), SI
   8577 	MOVL -4(CX)(BX*1), CX
   8578 	MOVL SI, (AX)
   8579 	MOVL CX, -4(AX)(BX*1)
   8580 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
   8581 
   8582 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
   8583 	MOVQ (CX), SI
   8584 	MOVQ -8(CX)(BX*1), CX
   8585 	MOVQ SI, (AX)
   8586 	MOVQ CX, -8(AX)(BX*1)
   8587 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
   8588 
   8589 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
   8590 	MOVOU (CX), X0
   8591 	MOVOU -16(CX)(BX*1), X1
   8592 	MOVOU X0, (AX)
   8593 	MOVOU X1, -16(AX)(BX*1)
   8594 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
   8595 
   8596 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
   8597 	MOVOU (CX), X0
   8598 	MOVOU 16(CX), X1
   8599 	MOVOU -32(CX)(BX*1), X2
   8600 	MOVOU -16(CX)(BX*1), X3
   8601 	MOVOU X0, (AX)
   8602 	MOVOU X1, 16(AX)
   8603 	MOVOU X2, -32(AX)(BX*1)
   8604 	MOVOU X3, -16(AX)(BX*1)
   8605 
   8606 memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
   8607 	MOVQ DX, AX
   8608 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
   8609 
   8610 memmove_long_emit_remainder_encodeBetterBlockAsm12B:
   8611 	LEAQ (AX)(SI*1), DX
   8612 	MOVL SI, BX
   8613 
   8614 	// genMemMoveLong
   8615 	MOVOU (CX), X0
   8616 	MOVOU 16(CX), X1
   8617 	MOVOU -32(CX)(BX*1), X2
   8618 	MOVOU -16(CX)(BX*1), X3
   8619 	MOVQ  BX, DI
   8620 	SHRQ  $0x05, DI
   8621 	MOVQ  AX, SI
   8622 	ANDL  $0x0000001f, SI
   8623 	MOVQ  $0x00000040, R8
   8624 	SUBQ  SI, R8
   8625 	DECQ  DI
   8626 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
   8627 	LEAQ  -32(CX)(R8*1), SI
   8628 	LEAQ  -32(AX)(R8*1), R9
   8629 
   8630 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
   8631 	MOVOU (SI), X4
   8632 	MOVOU 16(SI), X5
   8633 	MOVOA X4, (R9)
   8634 	MOVOA X5, 16(R9)
   8635 	ADDQ  $0x20, R9
   8636 	ADDQ  $0x20, SI
   8637 	ADDQ  $0x20, R8
   8638 	DECQ  DI
   8639 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
   8640 
   8641 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
   8642 	MOVOU -32(CX)(R8*1), X4
   8643 	MOVOU -16(CX)(R8*1), X5
   8644 	MOVOA X4, -32(AX)(R8*1)
   8645 	MOVOA X5, -16(AX)(R8*1)
   8646 	ADDQ  $0x20, R8
   8647 	CMPQ  BX, R8
   8648 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
   8649 	MOVOU X0, (AX)
   8650 	MOVOU X1, 16(AX)
   8651 	MOVOU X2, -32(AX)(BX*1)
   8652 	MOVOU X3, -16(AX)(BX*1)
   8653 	MOVQ  DX, AX
   8654 
   8655 emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
   8656 	MOVQ dst_base+0(FP), CX
   8657 	SUBQ CX, AX
   8658 	MOVQ AX, ret+48(FP)
   8659 	RET
   8660 
   8661 // func encodeBetterBlockAsm10B(dst []byte, src []byte) int
   8662 // Requires: BMI, SSE2
   8663 TEXT ·encodeBetterBlockAsm10B(SB), $20504-56
   8664 	MOVQ dst_base+0(FP), AX
   8665 	MOVQ $0x000000a0, CX
   8666 	LEAQ 24(SP), DX
   8667 	PXOR X0, X0
   8668 
   8669 zero_loop_encodeBetterBlockAsm10B:
   8670 	MOVOU X0, (DX)
   8671 	MOVOU X0, 16(DX)
   8672 	MOVOU X0, 32(DX)
   8673 	MOVOU X0, 48(DX)
   8674 	MOVOU X0, 64(DX)
   8675 	MOVOU X0, 80(DX)
   8676 	MOVOU X0, 96(DX)
   8677 	MOVOU X0, 112(DX)
   8678 	ADDQ  $0x80, DX
   8679 	DECQ  CX
   8680 	JNZ   zero_loop_encodeBetterBlockAsm10B
   8681 	MOVL  $0x00000000, 12(SP)
   8682 	MOVQ  src_len+32(FP), CX
   8683 	LEAQ  -6(CX), DX
   8684 	LEAQ  -8(CX), BX
   8685 	MOVL  BX, 8(SP)
   8686 	SHRQ  $0x05, CX
   8687 	SUBL  CX, DX
   8688 	LEAQ  (AX)(DX*1), DX
   8689 	MOVQ  DX, (SP)
   8690 	MOVL  $0x00000001, CX
   8691 	MOVL  $0x00000000, 16(SP)
   8692 	MOVQ  src_base+24(FP), DX
   8693 
   8694 search_loop_encodeBetterBlockAsm10B:
   8695 	MOVL  CX, BX
   8696 	SUBL  12(SP), BX
   8697 	SHRL  $0x05, BX
   8698 	LEAL  1(CX)(BX*1), BX
   8699 	CMPL  BX, 8(SP)
   8700 	JAE   emit_remainder_encodeBetterBlockAsm10B
   8701 	MOVQ  (DX)(CX*1), SI
   8702 	MOVL  BX, 20(SP)
   8703 	MOVQ  $0x0000cf1bbcdcbf9b, R8
   8704 	MOVQ  $0x9e3779b1, BX
   8705 	MOVQ  SI, R9
   8706 	MOVQ  SI, R10
   8707 	SHLQ  $0x10, R9
   8708 	IMULQ R8, R9
   8709 	SHRQ  $0x34, R9
   8710 	SHLQ  $0x20, R10
   8711 	IMULQ BX, R10
   8712 	SHRQ  $0x36, R10
   8713 	MOVL  24(SP)(R9*4), BX
   8714 	MOVL  16408(SP)(R10*4), DI
   8715 	MOVL  CX, 24(SP)(R9*4)
   8716 	MOVL  CX, 16408(SP)(R10*4)
   8717 	MOVQ  (DX)(BX*1), R9
   8718 	MOVQ  (DX)(DI*1), R10
   8719 	CMPQ  R9, SI
   8720 	JEQ   candidate_match_encodeBetterBlockAsm10B
   8721 	CMPQ  R10, SI
   8722 	JNE   no_short_found_encodeBetterBlockAsm10B
   8723 	MOVL  DI, BX
   8724 	JMP   candidate_match_encodeBetterBlockAsm10B
   8725 
   8726 no_short_found_encodeBetterBlockAsm10B:
   8727 	CMPL R9, SI
   8728 	JEQ  candidate_match_encodeBetterBlockAsm10B
   8729 	CMPL R10, SI
   8730 	JEQ  candidateS_match_encodeBetterBlockAsm10B
   8731 	MOVL 20(SP), CX
   8732 	JMP  search_loop_encodeBetterBlockAsm10B
   8733 
   8734 candidateS_match_encodeBetterBlockAsm10B:
   8735 	SHRQ  $0x08, SI
   8736 	MOVQ  SI, R9
   8737 	SHLQ  $0x10, R9
   8738 	IMULQ R8, R9
   8739 	SHRQ  $0x34, R9
   8740 	MOVL  24(SP)(R9*4), BX
   8741 	INCL  CX
   8742 	MOVL  CX, 24(SP)(R9*4)
   8743 	CMPL  (DX)(BX*1), SI
   8744 	JEQ   candidate_match_encodeBetterBlockAsm10B
   8745 	DECL  CX
   8746 	MOVL  DI, BX
   8747 
   8748 candidate_match_encodeBetterBlockAsm10B:
   8749 	MOVL  12(SP), SI
   8750 	TESTL BX, BX
   8751 	JZ    match_extend_back_end_encodeBetterBlockAsm10B
   8752 
   8753 match_extend_back_loop_encodeBetterBlockAsm10B:
   8754 	CMPL CX, SI
   8755 	JBE  match_extend_back_end_encodeBetterBlockAsm10B
   8756 	MOVB -1(DX)(BX*1), DI
   8757 	MOVB -1(DX)(CX*1), R8
   8758 	CMPB DI, R8
   8759 	JNE  match_extend_back_end_encodeBetterBlockAsm10B
   8760 	LEAL -1(CX), CX
   8761 	DECL BX
   8762 	JZ   match_extend_back_end_encodeBetterBlockAsm10B
   8763 	JMP  match_extend_back_loop_encodeBetterBlockAsm10B
   8764 
   8765 match_extend_back_end_encodeBetterBlockAsm10B:
   8766 	MOVL CX, SI
   8767 	SUBL 12(SP), SI
   8768 	LEAQ 3(AX)(SI*1), SI
   8769 	CMPQ SI, (SP)
   8770 	JB   match_dst_size_check_encodeBetterBlockAsm10B
   8771 	MOVQ $0x00000000, ret+48(FP)
   8772 	RET
   8773 
   8774 match_dst_size_check_encodeBetterBlockAsm10B:
   8775 	MOVL CX, SI
   8776 	ADDL $0x04, CX
   8777 	ADDL $0x04, BX
   8778 	MOVQ src_len+32(FP), DI
   8779 	SUBL CX, DI
   8780 	LEAQ (DX)(CX*1), R8
   8781 	LEAQ (DX)(BX*1), R9
   8782 
   8783 	// matchLen
   8784 	XORL R11, R11
   8785 	CMPL DI, $0x08
   8786 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm10B
   8787 
   8788 matchlen_loopback_match_nolit_encodeBetterBlockAsm10B:
   8789 	MOVQ  (R8)(R11*1), R10
   8790 	XORQ  (R9)(R11*1), R10
   8791 	TESTQ R10, R10
   8792 	JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm10B
   8793 
   8794 #ifdef GOAMD64_v3
   8795 	TZCNTQ R10, R10
   8796 
   8797 #else
   8798 	BSFQ R10, R10
   8799 
   8800 #endif
   8801 	SARQ $0x03, R10
   8802 	LEAL (R11)(R10*1), R11
   8803 	JMP  match_nolit_end_encodeBetterBlockAsm10B
   8804 
   8805 matchlen_loop_match_nolit_encodeBetterBlockAsm10B:
   8806 	LEAL -8(DI), DI
   8807 	LEAL 8(R11), R11
   8808 	CMPL DI, $0x08
   8809 	JAE  matchlen_loopback_match_nolit_encodeBetterBlockAsm10B
   8810 	JZ   match_nolit_end_encodeBetterBlockAsm10B
   8811 
   8812 matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
   8813 	CMPL DI, $0x04
   8814 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm10B
   8815 	MOVL (R8)(R11*1), R10
   8816 	CMPL (R9)(R11*1), R10
   8817 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm10B
   8818 	SUBL $0x04, DI
   8819 	LEAL 4(R11), R11
   8820 
   8821 matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
   8822 	CMPL DI, $0x02
   8823 	JB   matchlen_match1_match_nolit_encodeBetterBlockAsm10B
   8824 	MOVW (R8)(R11*1), R10
   8825 	CMPW (R9)(R11*1), R10
   8826 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm10B
   8827 	SUBL $0x02, DI
   8828 	LEAL 2(R11), R11
   8829 
   8830 matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
   8831 	CMPL DI, $0x01
   8832 	JB   match_nolit_end_encodeBetterBlockAsm10B
   8833 	MOVB (R8)(R11*1), R10
   8834 	CMPB (R9)(R11*1), R10
   8835 	JNE  match_nolit_end_encodeBetterBlockAsm10B
   8836 	LEAL 1(R11), R11
   8837 
   8838 match_nolit_end_encodeBetterBlockAsm10B:
   8839 	MOVL CX, DI
   8840 	SUBL BX, DI
   8841 
   8842 	// Check if repeat
   8843 	CMPL 16(SP), DI
   8844 	JEQ  match_is_repeat_encodeBetterBlockAsm10B
   8845 	MOVL DI, 16(SP)
   8846 	MOVL 12(SP), BX
   8847 	CMPL BX, SI
   8848 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm10B
   8849 	MOVL SI, R8
   8850 	MOVL SI, 12(SP)
   8851 	LEAQ (DX)(BX*1), R9
   8852 	SUBL BX, R8
   8853 	LEAL -1(R8), BX
   8854 	CMPL BX, $0x3c
   8855 	JB   one_byte_match_emit_encodeBetterBlockAsm10B
   8856 	CMPL BX, $0x00000100
   8857 	JB   two_bytes_match_emit_encodeBetterBlockAsm10B
   8858 	JB   three_bytes_match_emit_encodeBetterBlockAsm10B
   8859 
   8860 three_bytes_match_emit_encodeBetterBlockAsm10B:
   8861 	MOVB $0xf4, (AX)
   8862 	MOVW BX, 1(AX)
   8863 	ADDQ $0x03, AX
   8864 	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
   8865 
   8866 two_bytes_match_emit_encodeBetterBlockAsm10B:
   8867 	MOVB $0xf0, (AX)
   8868 	MOVB BL, 1(AX)
   8869 	ADDQ $0x02, AX
   8870 	CMPL BX, $0x40
   8871 	JB   memmove_match_emit_encodeBetterBlockAsm10B
   8872 	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
   8873 
   8874 one_byte_match_emit_encodeBetterBlockAsm10B:
   8875 	SHLB $0x02, BL
   8876 	MOVB BL, (AX)
   8877 	ADDQ $0x01, AX
   8878 
   8879 memmove_match_emit_encodeBetterBlockAsm10B:
   8880 	LEAQ (AX)(R8*1), BX
   8881 
   8882 	// genMemMoveShort
   8883 	CMPQ R8, $0x04
   8884 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
   8885 	CMPQ R8, $0x08
   8886 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
   8887 	CMPQ R8, $0x10
   8888 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
   8889 	CMPQ R8, $0x20
   8890 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
   8891 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
   8892 
   8893 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
   8894 	MOVL (R9), R10
   8895 	MOVL R10, (AX)
   8896 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
   8897 
   8898 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
   8899 	MOVL (R9), R10
   8900 	MOVL -4(R9)(R8*1), R9
   8901 	MOVL R10, (AX)
   8902 	MOVL R9, -4(AX)(R8*1)
   8903 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
   8904 
   8905 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
   8906 	MOVQ (R9), R10
   8907 	MOVQ -8(R9)(R8*1), R9
   8908 	MOVQ R10, (AX)
   8909 	MOVQ R9, -8(AX)(R8*1)
   8910 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
   8911 
   8912 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
   8913 	MOVOU (R9), X0
   8914 	MOVOU -16(R9)(R8*1), X1
   8915 	MOVOU X0, (AX)
   8916 	MOVOU X1, -16(AX)(R8*1)
   8917 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm10B
   8918 
   8919 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
   8920 	MOVOU (R9), X0
   8921 	MOVOU 16(R9), X1
   8922 	MOVOU -32(R9)(R8*1), X2
   8923 	MOVOU -16(R9)(R8*1), X3
   8924 	MOVOU X0, (AX)
   8925 	MOVOU X1, 16(AX)
   8926 	MOVOU X2, -32(AX)(R8*1)
   8927 	MOVOU X3, -16(AX)(R8*1)
   8928 
   8929 memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
   8930 	MOVQ BX, AX
   8931 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm10B
   8932 
   8933 memmove_long_match_emit_encodeBetterBlockAsm10B:
   8934 	LEAQ (AX)(R8*1), BX
   8935 
   8936 	// genMemMoveLong
   8937 	MOVOU (R9), X0
   8938 	MOVOU 16(R9), X1
   8939 	MOVOU -32(R9)(R8*1), X2
   8940 	MOVOU -16(R9)(R8*1), X3
   8941 	MOVQ  R8, R12
   8942 	SHRQ  $0x05, R12
   8943 	MOVQ  AX, R10
   8944 	ANDL  $0x0000001f, R10
   8945 	MOVQ  $0x00000040, R13
   8946 	SUBQ  R10, R13
   8947 	DECQ  R12
   8948 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
   8949 	LEAQ  -32(R9)(R13*1), R10
   8950 	LEAQ  -32(AX)(R13*1), R14
   8951 
   8952 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
   8953 	MOVOU (R10), X4
   8954 	MOVOU 16(R10), X5
   8955 	MOVOA X4, (R14)
   8956 	MOVOA X5, 16(R14)
   8957 	ADDQ  $0x20, R14
   8958 	ADDQ  $0x20, R10
   8959 	ADDQ  $0x20, R13
   8960 	DECQ  R12
   8961 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
   8962 
   8963 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
   8964 	MOVOU -32(R9)(R13*1), X4
   8965 	MOVOU -16(R9)(R13*1), X5
   8966 	MOVOA X4, -32(AX)(R13*1)
   8967 	MOVOA X5, -16(AX)(R13*1)
   8968 	ADDQ  $0x20, R13
   8969 	CMPQ  R8, R13
   8970 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
   8971 	MOVOU X0, (AX)
   8972 	MOVOU X1, 16(AX)
   8973 	MOVOU X2, -32(AX)(R8*1)
   8974 	MOVOU X3, -16(AX)(R8*1)
   8975 	MOVQ  BX, AX
   8976 
   8977 emit_literal_done_match_emit_encodeBetterBlockAsm10B:
   8978 	ADDL R11, CX
   8979 	ADDL $0x04, R11
   8980 	MOVL CX, 12(SP)
   8981 
   8982 	// emitCopy
   8983 	CMPL R11, $0x40
   8984 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
   8985 	CMPL DI, $0x00000800
   8986 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm10B
   8987 	MOVL $0x00000001, BX
   8988 	LEAL 16(BX), BX
   8989 	MOVB DI, 1(AX)
   8990 	SHRL $0x08, DI
   8991 	SHLL $0x05, DI
   8992 	ORL  DI, BX
   8993 	MOVB BL, (AX)
   8994 	ADDQ $0x02, AX
   8995 	SUBL $0x08, R11
   8996 
   8997 	// emitRepeat
   8998 	LEAL -4(R11), R11
   8999 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
   9000 	MOVL R11, BX
   9001 	LEAL -4(R11), R11
   9002 	CMPL BX, $0x08
   9003 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
   9004 	CMPL BX, $0x0c
   9005 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
   9006 	CMPL DI, $0x00000800
   9007 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
   9008 
   9009 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
   9010 	CMPL R11, $0x00000104
   9011 	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
   9012 	LEAL -256(R11), R11
   9013 	MOVW $0x0019, (AX)
   9014 	MOVW R11, 2(AX)
   9015 	ADDQ $0x04, AX
   9016 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9017 
   9018 repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
   9019 	LEAL -4(R11), R11
   9020 	MOVW $0x0015, (AX)
   9021 	MOVB R11, 2(AX)
   9022 	ADDQ $0x03, AX
   9023 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9024 
   9025 repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
   9026 	SHLL $0x02, R11
   9027 	ORL  $0x01, R11
   9028 	MOVW R11, (AX)
   9029 	ADDQ $0x02, AX
   9030 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9031 
   9032 repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
   9033 	XORQ BX, BX
   9034 	LEAL 1(BX)(R11*4), R11
   9035 	MOVB DI, 1(AX)
   9036 	SARL $0x08, DI
   9037 	SHLL $0x05, DI
   9038 	ORL  DI, R11
   9039 	MOVB R11, (AX)
   9040 	ADDQ $0x02, AX
   9041 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9042 
   9043 long_offset_short_match_nolit_encodeBetterBlockAsm10B:
   9044 	MOVB $0xee, (AX)
   9045 	MOVW DI, 1(AX)
   9046 	LEAL -60(R11), R11
   9047 	ADDQ $0x03, AX
   9048 
   9049 	// emitRepeat
   9050 	MOVL R11, BX
   9051 	LEAL -4(R11), R11
   9052 	CMPL BX, $0x08
   9053 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
   9054 	CMPL BX, $0x0c
   9055 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
   9056 	CMPL DI, $0x00000800
   9057 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
   9058 
   9059 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
   9060 	CMPL R11, $0x00000104
   9061 	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
   9062 	LEAL -256(R11), R11
   9063 	MOVW $0x0019, (AX)
   9064 	MOVW R11, 2(AX)
   9065 	ADDQ $0x04, AX
   9066 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9067 
   9068 repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
   9069 	LEAL -4(R11), R11
   9070 	MOVW $0x0015, (AX)
   9071 	MOVB R11, 2(AX)
   9072 	ADDQ $0x03, AX
   9073 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9074 
   9075 repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
   9076 	SHLL $0x02, R11
   9077 	ORL  $0x01, R11
   9078 	MOVW R11, (AX)
   9079 	ADDQ $0x02, AX
   9080 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9081 
   9082 repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
   9083 	XORQ BX, BX
   9084 	LEAL 1(BX)(R11*4), R11
   9085 	MOVB DI, 1(AX)
   9086 	SARL $0x08, DI
   9087 	SHLL $0x05, DI
   9088 	ORL  DI, R11
   9089 	MOVB R11, (AX)
   9090 	ADDQ $0x02, AX
   9091 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9092 
   9093 two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
   9094 	MOVL R11, BX
   9095 	SHLL $0x02, BX
   9096 	CMPL R11, $0x0c
   9097 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
   9098 	CMPL DI, $0x00000800
   9099 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
   9100 	LEAL -15(BX), BX
   9101 	MOVB DI, 1(AX)
   9102 	SHRL $0x08, DI
   9103 	SHLL $0x05, DI
   9104 	ORL  DI, BX
   9105 	MOVB BL, (AX)
   9106 	ADDQ $0x02, AX
   9107 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9108 
   9109 emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
   9110 	LEAL -2(BX), BX
   9111 	MOVB BL, (AX)
   9112 	MOVW DI, 1(AX)
   9113 	ADDQ $0x03, AX
   9114 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9115 
   9116 match_is_repeat_encodeBetterBlockAsm10B:
   9117 	MOVL 12(SP), BX
   9118 	CMPL BX, SI
   9119 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
   9120 	MOVL SI, R8
   9121 	MOVL SI, 12(SP)
   9122 	LEAQ (DX)(BX*1), R9
   9123 	SUBL BX, R8
   9124 	LEAL -1(R8), BX
   9125 	CMPL BX, $0x3c
   9126 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm10B
   9127 	CMPL BX, $0x00000100
   9128 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
   9129 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
   9130 
   9131 three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
   9132 	MOVB $0xf4, (AX)
   9133 	MOVW BX, 1(AX)
   9134 	ADDQ $0x03, AX
   9135 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
   9136 
   9137 two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
   9138 	MOVB $0xf0, (AX)
   9139 	MOVB BL, 1(AX)
   9140 	ADDQ $0x02, AX
   9141 	CMPL BX, $0x40
   9142 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm10B
   9143 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
   9144 
   9145 one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
   9146 	SHLB $0x02, BL
   9147 	MOVB BL, (AX)
   9148 	ADDQ $0x01, AX
   9149 
   9150 memmove_match_emit_repeat_encodeBetterBlockAsm10B:
   9151 	LEAQ (AX)(R8*1), BX
   9152 
   9153 	// genMemMoveShort
   9154 	CMPQ R8, $0x04
   9155 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
   9156 	CMPQ R8, $0x08
   9157 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
   9158 	CMPQ R8, $0x10
   9159 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
   9160 	CMPQ R8, $0x20
   9161 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
   9162 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
   9163 
   9164 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
   9165 	MOVL (R9), R10
   9166 	MOVL R10, (AX)
   9167 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
   9168 
   9169 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
   9170 	MOVL (R9), R10
   9171 	MOVL -4(R9)(R8*1), R9
   9172 	MOVL R10, (AX)
   9173 	MOVL R9, -4(AX)(R8*1)
   9174 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
   9175 
   9176 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
   9177 	MOVQ (R9), R10
   9178 	MOVQ -8(R9)(R8*1), R9
   9179 	MOVQ R10, (AX)
   9180 	MOVQ R9, -8(AX)(R8*1)
   9181 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
   9182 
   9183 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
   9184 	MOVOU (R9), X0
   9185 	MOVOU -16(R9)(R8*1), X1
   9186 	MOVOU X0, (AX)
   9187 	MOVOU X1, -16(AX)(R8*1)
   9188 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
   9189 
   9190 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
   9191 	MOVOU (R9), X0
   9192 	MOVOU 16(R9), X1
   9193 	MOVOU -32(R9)(R8*1), X2
   9194 	MOVOU -16(R9)(R8*1), X3
   9195 	MOVOU X0, (AX)
   9196 	MOVOU X1, 16(AX)
   9197 	MOVOU X2, -32(AX)(R8*1)
   9198 	MOVOU X3, -16(AX)(R8*1)
   9199 
   9200 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
   9201 	MOVQ BX, AX
   9202 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
   9203 
   9204 memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
   9205 	LEAQ (AX)(R8*1), BX
   9206 
   9207 	// genMemMoveLong
   9208 	MOVOU (R9), X0
   9209 	MOVOU 16(R9), X1
   9210 	MOVOU -32(R9)(R8*1), X2
   9211 	MOVOU -16(R9)(R8*1), X3
   9212 	MOVQ  R8, R12
   9213 	SHRQ  $0x05, R12
   9214 	MOVQ  AX, R10
   9215 	ANDL  $0x0000001f, R10
   9216 	MOVQ  $0x00000040, R13
   9217 	SUBQ  R10, R13
   9218 	DECQ  R12
   9219 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
   9220 	LEAQ  -32(R9)(R13*1), R10
   9221 	LEAQ  -32(AX)(R13*1), R14
   9222 
   9223 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
   9224 	MOVOU (R10), X4
   9225 	MOVOU 16(R10), X5
   9226 	MOVOA X4, (R14)
   9227 	MOVOA X5, 16(R14)
   9228 	ADDQ  $0x20, R14
   9229 	ADDQ  $0x20, R10
   9230 	ADDQ  $0x20, R13
   9231 	DECQ  R12
   9232 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
   9233 
   9234 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
   9235 	MOVOU -32(R9)(R13*1), X4
   9236 	MOVOU -16(R9)(R13*1), X5
   9237 	MOVOA X4, -32(AX)(R13*1)
   9238 	MOVOA X5, -16(AX)(R13*1)
   9239 	ADDQ  $0x20, R13
   9240 	CMPQ  R8, R13
   9241 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
   9242 	MOVOU X0, (AX)
   9243 	MOVOU X1, 16(AX)
   9244 	MOVOU X2, -32(AX)(R8*1)
   9245 	MOVOU X3, -16(AX)(R8*1)
   9246 	MOVQ  BX, AX
   9247 
   9248 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
   9249 	ADDL R11, CX
   9250 	ADDL $0x04, R11
   9251 	MOVL CX, 12(SP)
   9252 
   9253 	// emitRepeat
   9254 	MOVL R11, BX
   9255 	LEAL -4(R11), R11
   9256 	CMPL BX, $0x08
   9257 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
   9258 	CMPL BX, $0x0c
   9259 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
   9260 	CMPL DI, $0x00000800
   9261 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
   9262 
   9263 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
   9264 	CMPL R11, $0x00000104
   9265 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
   9266 	LEAL -256(R11), R11
   9267 	MOVW $0x0019, (AX)
   9268 	MOVW R11, 2(AX)
   9269 	ADDQ $0x04, AX
   9270 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9271 
   9272 repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
   9273 	LEAL -4(R11), R11
   9274 	MOVW $0x0015, (AX)
   9275 	MOVB R11, 2(AX)
   9276 	ADDQ $0x03, AX
   9277 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9278 
   9279 repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
   9280 	SHLL $0x02, R11
   9281 	ORL  $0x01, R11
   9282 	MOVW R11, (AX)
   9283 	ADDQ $0x02, AX
   9284 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
   9285 
   9286 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
   9287 	XORQ BX, BX
   9288 	LEAL 1(BX)(R11*4), R11
   9289 	MOVB DI, 1(AX)
   9290 	SARL $0x08, DI
   9291 	SHLL $0x05, DI
   9292 	ORL  DI, R11
   9293 	MOVB R11, (AX)
   9294 	ADDQ $0x02, AX
   9295 
   9296 match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
   9297 	CMPL CX, 8(SP)
   9298 	JAE  emit_remainder_encodeBetterBlockAsm10B
   9299 	CMPQ AX, (SP)
   9300 	JB   match_nolit_dst_ok_encodeBetterBlockAsm10B
   9301 	MOVQ $0x00000000, ret+48(FP)
   9302 	RET
   9303 
   9304 match_nolit_dst_ok_encodeBetterBlockAsm10B:
   9305 	MOVQ  $0x0000cf1bbcdcbf9b, BX
   9306 	MOVQ  $0x9e3779b1, DI
   9307 	LEAQ  1(SI), SI
   9308 	LEAQ  -2(CX), R8
   9309 	MOVQ  (DX)(SI*1), R9
   9310 	MOVQ  1(DX)(SI*1), R10
   9311 	MOVQ  (DX)(R8*1), R11
   9312 	MOVQ  1(DX)(R8*1), R12
   9313 	SHLQ  $0x10, R9
   9314 	IMULQ BX, R9
   9315 	SHRQ  $0x34, R9
   9316 	SHLQ  $0x20, R10
   9317 	IMULQ DI, R10
   9318 	SHRQ  $0x36, R10
   9319 	SHLQ  $0x10, R11
   9320 	IMULQ BX, R11
   9321 	SHRQ  $0x34, R11
   9322 	SHLQ  $0x20, R12
   9323 	IMULQ DI, R12
   9324 	SHRQ  $0x36, R12
   9325 	LEAQ  1(SI), DI
   9326 	LEAQ  1(R8), R13
   9327 	MOVL  SI, 24(SP)(R9*4)
   9328 	MOVL  R8, 24(SP)(R11*4)
   9329 	MOVL  DI, 16408(SP)(R10*4)
   9330 	MOVL  R13, 16408(SP)(R12*4)
   9331 	ADDQ  $0x01, SI
   9332 	SUBQ  $0x01, R8
   9333 
   9334 index_loop_encodeBetterBlockAsm10B:
   9335 	CMPQ  SI, R8
   9336 	JAE   search_loop_encodeBetterBlockAsm10B
   9337 	MOVQ  (DX)(SI*1), DI
   9338 	MOVQ  (DX)(R8*1), R9
   9339 	SHLQ  $0x10, DI
   9340 	IMULQ BX, DI
   9341 	SHRQ  $0x34, DI
   9342 	SHLQ  $0x10, R9
   9343 	IMULQ BX, R9
   9344 	SHRQ  $0x34, R9
   9345 	MOVL  SI, 24(SP)(DI*4)
   9346 	MOVL  R8, 24(SP)(R9*4)
   9347 	ADDQ  $0x02, SI
   9348 	SUBQ  $0x02, R8
   9349 	JMP   index_loop_encodeBetterBlockAsm10B
   9350 
   9351 emit_remainder_encodeBetterBlockAsm10B:
   9352 	MOVQ src_len+32(FP), CX
   9353 	SUBL 12(SP), CX
   9354 	LEAQ 3(AX)(CX*1), CX
   9355 	CMPQ CX, (SP)
   9356 	JB   emit_remainder_ok_encodeBetterBlockAsm10B
   9357 	MOVQ $0x00000000, ret+48(FP)
   9358 	RET
   9359 
   9360 emit_remainder_ok_encodeBetterBlockAsm10B:
   9361 	MOVQ src_len+32(FP), CX
   9362 	MOVL 12(SP), BX
   9363 	CMPL BX, CX
   9364 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
   9365 	MOVL CX, SI
   9366 	MOVL CX, 12(SP)
   9367 	LEAQ (DX)(BX*1), CX
   9368 	SUBL BX, SI
   9369 	LEAL -1(SI), DX
   9370 	CMPL DX, $0x3c
   9371 	JB   one_byte_emit_remainder_encodeBetterBlockAsm10B
   9372 	CMPL DX, $0x00000100
   9373 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm10B
   9374 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm10B
   9375 
   9376 three_bytes_emit_remainder_encodeBetterBlockAsm10B:
   9377 	MOVB $0xf4, (AX)
   9378 	MOVW DX, 1(AX)
   9379 	ADDQ $0x03, AX
   9380 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
   9381 
   9382 two_bytes_emit_remainder_encodeBetterBlockAsm10B:
   9383 	MOVB $0xf0, (AX)
   9384 	MOVB DL, 1(AX)
   9385 	ADDQ $0x02, AX
   9386 	CMPL DX, $0x40
   9387 	JB   memmove_emit_remainder_encodeBetterBlockAsm10B
   9388 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
   9389 
   9390 one_byte_emit_remainder_encodeBetterBlockAsm10B:
   9391 	SHLB $0x02, DL
   9392 	MOVB DL, (AX)
   9393 	ADDQ $0x01, AX
   9394 
   9395 memmove_emit_remainder_encodeBetterBlockAsm10B:
   9396 	LEAQ (AX)(SI*1), DX
   9397 	MOVL SI, BX
   9398 
   9399 	// genMemMoveShort
   9400 	CMPQ BX, $0x03
   9401 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2
   9402 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3
   9403 	CMPQ BX, $0x08
   9404 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
   9405 	CMPQ BX, $0x10
   9406 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
   9407 	CMPQ BX, $0x20
   9408 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
   9409 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
   9410 
   9411 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
   9412 	MOVB (CX), SI
   9413 	MOVB -1(CX)(BX*1), CL
   9414 	MOVB SI, (AX)
   9415 	MOVB CL, -1(AX)(BX*1)
   9416 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
   9417 
   9418 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
   9419 	MOVW (CX), SI
   9420 	MOVB 2(CX), CL
   9421 	MOVW SI, (AX)
   9422 	MOVB CL, 2(AX)
   9423 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
   9424 
   9425 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
   9426 	MOVL (CX), SI
   9427 	MOVL -4(CX)(BX*1), CX
   9428 	MOVL SI, (AX)
   9429 	MOVL CX, -4(AX)(BX*1)
   9430 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
   9431 
   9432 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
   9433 	MOVQ (CX), SI
   9434 	MOVQ -8(CX)(BX*1), CX
   9435 	MOVQ SI, (AX)
   9436 	MOVQ CX, -8(AX)(BX*1)
   9437 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
   9438 
   9439 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
   9440 	MOVOU (CX), X0
   9441 	MOVOU -16(CX)(BX*1), X1
   9442 	MOVOU X0, (AX)
   9443 	MOVOU X1, -16(AX)(BX*1)
   9444 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
   9445 
   9446 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
   9447 	MOVOU (CX), X0
   9448 	MOVOU 16(CX), X1
   9449 	MOVOU -32(CX)(BX*1), X2
   9450 	MOVOU -16(CX)(BX*1), X3
   9451 	MOVOU X0, (AX)
   9452 	MOVOU X1, 16(AX)
   9453 	MOVOU X2, -32(AX)(BX*1)
   9454 	MOVOU X3, -16(AX)(BX*1)
   9455 
   9456 memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
   9457 	MOVQ DX, AX
   9458 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
   9459 
   9460 memmove_long_emit_remainder_encodeBetterBlockAsm10B:
   9461 	LEAQ (AX)(SI*1), DX
   9462 	MOVL SI, BX
   9463 
   9464 	// genMemMoveLong
   9465 	MOVOU (CX), X0
   9466 	MOVOU 16(CX), X1
   9467 	MOVOU -32(CX)(BX*1), X2
   9468 	MOVOU -16(CX)(BX*1), X3
   9469 	MOVQ  BX, DI
   9470 	SHRQ  $0x05, DI
   9471 	MOVQ  AX, SI
   9472 	ANDL  $0x0000001f, SI
   9473 	MOVQ  $0x00000040, R8
   9474 	SUBQ  SI, R8
   9475 	DECQ  DI
   9476 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
   9477 	LEAQ  -32(CX)(R8*1), SI
   9478 	LEAQ  -32(AX)(R8*1), R9
   9479 
   9480 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
   9481 	MOVOU (SI), X4
   9482 	MOVOU 16(SI), X5
   9483 	MOVOA X4, (R9)
   9484 	MOVOA X5, 16(R9)
   9485 	ADDQ  $0x20, R9
   9486 	ADDQ  $0x20, SI
   9487 	ADDQ  $0x20, R8
   9488 	DECQ  DI
   9489 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
   9490 
   9491 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
   9492 	MOVOU -32(CX)(R8*1), X4
   9493 	MOVOU -16(CX)(R8*1), X5
   9494 	MOVOA X4, -32(AX)(R8*1)
   9495 	MOVOA X5, -16(AX)(R8*1)
   9496 	ADDQ  $0x20, R8
   9497 	CMPQ  BX, R8
   9498 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
   9499 	MOVOU X0, (AX)
   9500 	MOVOU X1, 16(AX)
   9501 	MOVOU X2, -32(AX)(BX*1)
   9502 	MOVOU X3, -16(AX)(BX*1)
   9503 	MOVQ  DX, AX
   9504 
   9505 emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
   9506 	MOVQ dst_base+0(FP), CX
   9507 	SUBQ CX, AX
   9508 	MOVQ AX, ret+48(FP)
   9509 	RET
   9510 
   9511 // func encodeBetterBlockAsm8B(dst []byte, src []byte) int
   9512 // Requires: BMI, SSE2
   9513 TEXT ·encodeBetterBlockAsm8B(SB), $5144-56
   9514 	MOVQ dst_base+0(FP), AX
   9515 	MOVQ $0x00000028, CX
   9516 	LEAQ 24(SP), DX
   9517 	PXOR X0, X0
   9518 
   9519 zero_loop_encodeBetterBlockAsm8B:
   9520 	MOVOU X0, (DX)
   9521 	MOVOU X0, 16(DX)
   9522 	MOVOU X0, 32(DX)
   9523 	MOVOU X0, 48(DX)
   9524 	MOVOU X0, 64(DX)
   9525 	MOVOU X0, 80(DX)
   9526 	MOVOU X0, 96(DX)
   9527 	MOVOU X0, 112(DX)
   9528 	ADDQ  $0x80, DX
   9529 	DECQ  CX
   9530 	JNZ   zero_loop_encodeBetterBlockAsm8B
   9531 	MOVL  $0x00000000, 12(SP)
   9532 	MOVQ  src_len+32(FP), CX
   9533 	LEAQ  -6(CX), DX
   9534 	LEAQ  -8(CX), BX
   9535 	MOVL  BX, 8(SP)
   9536 	SHRQ  $0x05, CX
   9537 	SUBL  CX, DX
   9538 	LEAQ  (AX)(DX*1), DX
   9539 	MOVQ  DX, (SP)
   9540 	MOVL  $0x00000001, CX
   9541 	MOVL  $0x00000000, 16(SP)
   9542 	MOVQ  src_base+24(FP), DX
   9543 
   9544 search_loop_encodeBetterBlockAsm8B:
   9545 	MOVL  CX, BX
   9546 	SUBL  12(SP), BX
   9547 	SHRL  $0x04, BX
   9548 	LEAL  1(CX)(BX*1), BX
   9549 	CMPL  BX, 8(SP)
   9550 	JAE   emit_remainder_encodeBetterBlockAsm8B
   9551 	MOVQ  (DX)(CX*1), SI
   9552 	MOVL  BX, 20(SP)
   9553 	MOVQ  $0x0000cf1bbcdcbf9b, R8
   9554 	MOVQ  $0x9e3779b1, BX
   9555 	MOVQ  SI, R9
   9556 	MOVQ  SI, R10
   9557 	SHLQ  $0x10, R9
   9558 	IMULQ R8, R9
   9559 	SHRQ  $0x36, R9
   9560 	SHLQ  $0x20, R10
   9561 	IMULQ BX, R10
   9562 	SHRQ  $0x38, R10
   9563 	MOVL  24(SP)(R9*4), BX
   9564 	MOVL  4120(SP)(R10*4), DI
   9565 	MOVL  CX, 24(SP)(R9*4)
   9566 	MOVL  CX, 4120(SP)(R10*4)
   9567 	MOVQ  (DX)(BX*1), R9
   9568 	MOVQ  (DX)(DI*1), R10
   9569 	CMPQ  R9, SI
   9570 	JEQ   candidate_match_encodeBetterBlockAsm8B
   9571 	CMPQ  R10, SI
   9572 	JNE   no_short_found_encodeBetterBlockAsm8B
   9573 	MOVL  DI, BX
   9574 	JMP   candidate_match_encodeBetterBlockAsm8B
   9575 
   9576 no_short_found_encodeBetterBlockAsm8B:
   9577 	CMPL R9, SI
   9578 	JEQ  candidate_match_encodeBetterBlockAsm8B
   9579 	CMPL R10, SI
   9580 	JEQ  candidateS_match_encodeBetterBlockAsm8B
   9581 	MOVL 20(SP), CX
   9582 	JMP  search_loop_encodeBetterBlockAsm8B
   9583 
   9584 candidateS_match_encodeBetterBlockAsm8B:
   9585 	SHRQ  $0x08, SI
   9586 	MOVQ  SI, R9
   9587 	SHLQ  $0x10, R9
   9588 	IMULQ R8, R9
   9589 	SHRQ  $0x36, R9
   9590 	MOVL  24(SP)(R9*4), BX
   9591 	INCL  CX
   9592 	MOVL  CX, 24(SP)(R9*4)
   9593 	CMPL  (DX)(BX*1), SI
   9594 	JEQ   candidate_match_encodeBetterBlockAsm8B
   9595 	DECL  CX
   9596 	MOVL  DI, BX
   9597 
   9598 candidate_match_encodeBetterBlockAsm8B:
   9599 	MOVL  12(SP), SI
   9600 	TESTL BX, BX
   9601 	JZ    match_extend_back_end_encodeBetterBlockAsm8B
   9602 
   9603 match_extend_back_loop_encodeBetterBlockAsm8B:
   9604 	CMPL CX, SI
   9605 	JBE  match_extend_back_end_encodeBetterBlockAsm8B
   9606 	MOVB -1(DX)(BX*1), DI
   9607 	MOVB -1(DX)(CX*1), R8
   9608 	CMPB DI, R8
   9609 	JNE  match_extend_back_end_encodeBetterBlockAsm8B
   9610 	LEAL -1(CX), CX
   9611 	DECL BX
   9612 	JZ   match_extend_back_end_encodeBetterBlockAsm8B
   9613 	JMP  match_extend_back_loop_encodeBetterBlockAsm8B
   9614 
   9615 match_extend_back_end_encodeBetterBlockAsm8B:
   9616 	MOVL CX, SI
   9617 	SUBL 12(SP), SI
   9618 	LEAQ 3(AX)(SI*1), SI
   9619 	CMPQ SI, (SP)
   9620 	JB   match_dst_size_check_encodeBetterBlockAsm8B
   9621 	MOVQ $0x00000000, ret+48(FP)
   9622 	RET
   9623 
   9624 match_dst_size_check_encodeBetterBlockAsm8B:
   9625 	MOVL CX, SI
   9626 	ADDL $0x04, CX
   9627 	ADDL $0x04, BX
   9628 	MOVQ src_len+32(FP), DI
   9629 	SUBL CX, DI
   9630 	LEAQ (DX)(CX*1), R8
   9631 	LEAQ (DX)(BX*1), R9
   9632 
   9633 	// matchLen
   9634 	XORL R11, R11
   9635 	CMPL DI, $0x08
   9636 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm8B
   9637 
   9638 matchlen_loopback_match_nolit_encodeBetterBlockAsm8B:
   9639 	MOVQ  (R8)(R11*1), R10
   9640 	XORQ  (R9)(R11*1), R10
   9641 	TESTQ R10, R10
   9642 	JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm8B
   9643 
   9644 #ifdef GOAMD64_v3
   9645 	TZCNTQ R10, R10
   9646 
   9647 #else
   9648 	BSFQ R10, R10
   9649 
   9650 #endif
   9651 	SARQ $0x03, R10
   9652 	LEAL (R11)(R10*1), R11
   9653 	JMP  match_nolit_end_encodeBetterBlockAsm8B
   9654 
   9655 matchlen_loop_match_nolit_encodeBetterBlockAsm8B:
   9656 	LEAL -8(DI), DI
   9657 	LEAL 8(R11), R11
   9658 	CMPL DI, $0x08
   9659 	JAE  matchlen_loopback_match_nolit_encodeBetterBlockAsm8B
   9660 	JZ   match_nolit_end_encodeBetterBlockAsm8B
   9661 
   9662 matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
   9663 	CMPL DI, $0x04
   9664 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm8B
   9665 	MOVL (R8)(R11*1), R10
   9666 	CMPL (R9)(R11*1), R10
   9667 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm8B
   9668 	SUBL $0x04, DI
   9669 	LEAL 4(R11), R11
   9670 
   9671 matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
   9672 	CMPL DI, $0x02
   9673 	JB   matchlen_match1_match_nolit_encodeBetterBlockAsm8B
   9674 	MOVW (R8)(R11*1), R10
   9675 	CMPW (R9)(R11*1), R10
   9676 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm8B
   9677 	SUBL $0x02, DI
   9678 	LEAL 2(R11), R11
   9679 
   9680 matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
   9681 	CMPL DI, $0x01
   9682 	JB   match_nolit_end_encodeBetterBlockAsm8B
   9683 	MOVB (R8)(R11*1), R10
   9684 	CMPB (R9)(R11*1), R10
   9685 	JNE  match_nolit_end_encodeBetterBlockAsm8B
   9686 	LEAL 1(R11), R11
   9687 
   9688 match_nolit_end_encodeBetterBlockAsm8B:
   9689 	MOVL CX, DI
   9690 	SUBL BX, DI
   9691 
   9692 	// Check if repeat
   9693 	CMPL 16(SP), DI
   9694 	JEQ  match_is_repeat_encodeBetterBlockAsm8B
   9695 	MOVL DI, 16(SP)
   9696 	MOVL 12(SP), BX
   9697 	CMPL BX, SI
   9698 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm8B
   9699 	MOVL SI, R8
   9700 	MOVL SI, 12(SP)
   9701 	LEAQ (DX)(BX*1), R9
   9702 	SUBL BX, R8
   9703 	LEAL -1(R8), BX
   9704 	CMPL BX, $0x3c
   9705 	JB   one_byte_match_emit_encodeBetterBlockAsm8B
   9706 	CMPL BX, $0x00000100
   9707 	JB   two_bytes_match_emit_encodeBetterBlockAsm8B
   9708 	JB   three_bytes_match_emit_encodeBetterBlockAsm8B
   9709 
   9710 three_bytes_match_emit_encodeBetterBlockAsm8B:
   9711 	MOVB $0xf4, (AX)
   9712 	MOVW BX, 1(AX)
   9713 	ADDQ $0x03, AX
   9714 	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
   9715 
   9716 two_bytes_match_emit_encodeBetterBlockAsm8B:
   9717 	MOVB $0xf0, (AX)
   9718 	MOVB BL, 1(AX)
   9719 	ADDQ $0x02, AX
   9720 	CMPL BX, $0x40
   9721 	JB   memmove_match_emit_encodeBetterBlockAsm8B
   9722 	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
   9723 
   9724 one_byte_match_emit_encodeBetterBlockAsm8B:
   9725 	SHLB $0x02, BL
   9726 	MOVB BL, (AX)
   9727 	ADDQ $0x01, AX
   9728 
   9729 memmove_match_emit_encodeBetterBlockAsm8B:
   9730 	LEAQ (AX)(R8*1), BX
   9731 
   9732 	// genMemMoveShort
   9733 	CMPQ R8, $0x04
   9734 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
   9735 	CMPQ R8, $0x08
   9736 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
   9737 	CMPQ R8, $0x10
   9738 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
   9739 	CMPQ R8, $0x20
   9740 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
   9741 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
   9742 
   9743 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
   9744 	MOVL (R9), R10
   9745 	MOVL R10, (AX)
   9746 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
   9747 
   9748 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
   9749 	MOVL (R9), R10
   9750 	MOVL -4(R9)(R8*1), R9
   9751 	MOVL R10, (AX)
   9752 	MOVL R9, -4(AX)(R8*1)
   9753 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
   9754 
   9755 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
   9756 	MOVQ (R9), R10
   9757 	MOVQ -8(R9)(R8*1), R9
   9758 	MOVQ R10, (AX)
   9759 	MOVQ R9, -8(AX)(R8*1)
   9760 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
   9761 
   9762 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
   9763 	MOVOU (R9), X0
   9764 	MOVOU -16(R9)(R8*1), X1
   9765 	MOVOU X0, (AX)
   9766 	MOVOU X1, -16(AX)(R8*1)
   9767 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm8B
   9768 
   9769 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
   9770 	MOVOU (R9), X0
   9771 	MOVOU 16(R9), X1
   9772 	MOVOU -32(R9)(R8*1), X2
   9773 	MOVOU -16(R9)(R8*1), X3
   9774 	MOVOU X0, (AX)
   9775 	MOVOU X1, 16(AX)
   9776 	MOVOU X2, -32(AX)(R8*1)
   9777 	MOVOU X3, -16(AX)(R8*1)
   9778 
   9779 memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
   9780 	MOVQ BX, AX
   9781 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm8B
   9782 
   9783 memmove_long_match_emit_encodeBetterBlockAsm8B:
   9784 	LEAQ (AX)(R8*1), BX
   9785 
   9786 	// genMemMoveLong
   9787 	MOVOU (R9), X0
   9788 	MOVOU 16(R9), X1
   9789 	MOVOU -32(R9)(R8*1), X2
   9790 	MOVOU -16(R9)(R8*1), X3
   9791 	MOVQ  R8, R12
   9792 	SHRQ  $0x05, R12
   9793 	MOVQ  AX, R10
   9794 	ANDL  $0x0000001f, R10
   9795 	MOVQ  $0x00000040, R13
   9796 	SUBQ  R10, R13
   9797 	DECQ  R12
   9798 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
   9799 	LEAQ  -32(R9)(R13*1), R10
   9800 	LEAQ  -32(AX)(R13*1), R14
   9801 
   9802 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
   9803 	MOVOU (R10), X4
   9804 	MOVOU 16(R10), X5
   9805 	MOVOA X4, (R14)
   9806 	MOVOA X5, 16(R14)
   9807 	ADDQ  $0x20, R14
   9808 	ADDQ  $0x20, R10
   9809 	ADDQ  $0x20, R13
   9810 	DECQ  R12
   9811 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
   9812 
   9813 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
   9814 	MOVOU -32(R9)(R13*1), X4
   9815 	MOVOU -16(R9)(R13*1), X5
   9816 	MOVOA X4, -32(AX)(R13*1)
   9817 	MOVOA X5, -16(AX)(R13*1)
   9818 	ADDQ  $0x20, R13
   9819 	CMPQ  R8, R13
   9820 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
   9821 	MOVOU X0, (AX)
   9822 	MOVOU X1, 16(AX)
   9823 	MOVOU X2, -32(AX)(R8*1)
   9824 	MOVOU X3, -16(AX)(R8*1)
   9825 	MOVQ  BX, AX
   9826 
   9827 emit_literal_done_match_emit_encodeBetterBlockAsm8B:
   9828 	ADDL R11, CX
   9829 	ADDL $0x04, R11
   9830 	MOVL CX, 12(SP)
   9831 
   9832 	// emitCopy
   9833 	CMPL R11, $0x40
   9834 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
   9835 	CMPL DI, $0x00000800
   9836 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm8B
   9837 	MOVL $0x00000001, BX
   9838 	LEAL 16(BX), BX
   9839 	MOVB DI, 1(AX)
   9840 	SHRL $0x08, DI
   9841 	SHLL $0x05, DI
   9842 	ORL  DI, BX
   9843 	MOVB BL, (AX)
   9844 	ADDQ $0x02, AX
   9845 	SUBL $0x08, R11
   9846 
   9847 	// emitRepeat
   9848 	LEAL -4(R11), R11
   9849 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
   9850 	MOVL R11, BX
   9851 	LEAL -4(R11), R11
   9852 	CMPL BX, $0x08
   9853 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
   9854 	CMPL BX, $0x0c
   9855 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
   9856 
   9857 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
   9858 	CMPL R11, $0x00000104
   9859 	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
   9860 	LEAL -256(R11), R11
   9861 	MOVW $0x0019, (AX)
   9862 	MOVW R11, 2(AX)
   9863 	ADDQ $0x04, AX
   9864 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9865 
   9866 repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
   9867 	LEAL -4(R11), R11
   9868 	MOVW $0x0015, (AX)
   9869 	MOVB R11, 2(AX)
   9870 	ADDQ $0x03, AX
   9871 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9872 
   9873 repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
   9874 	SHLL $0x02, R11
   9875 	ORL  $0x01, R11
   9876 	MOVW R11, (AX)
   9877 	ADDQ $0x02, AX
   9878 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9879 	XORQ BX, BX
   9880 	LEAL 1(BX)(R11*4), R11
   9881 	MOVB DI, 1(AX)
   9882 	SARL $0x08, DI
   9883 	SHLL $0x05, DI
   9884 	ORL  DI, R11
   9885 	MOVB R11, (AX)
   9886 	ADDQ $0x02, AX
   9887 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9888 
   9889 long_offset_short_match_nolit_encodeBetterBlockAsm8B:
   9890 	MOVB $0xee, (AX)
   9891 	MOVW DI, 1(AX)
   9892 	LEAL -60(R11), R11
   9893 	ADDQ $0x03, AX
   9894 
   9895 	// emitRepeat
   9896 	MOVL R11, BX
   9897 	LEAL -4(R11), R11
   9898 	CMPL BX, $0x08
   9899 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
   9900 	CMPL BX, $0x0c
   9901 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
   9902 
   9903 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
   9904 	CMPL R11, $0x00000104
   9905 	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
   9906 	LEAL -256(R11), R11
   9907 	MOVW $0x0019, (AX)
   9908 	MOVW R11, 2(AX)
   9909 	ADDQ $0x04, AX
   9910 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9911 
   9912 repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
   9913 	LEAL -4(R11), R11
   9914 	MOVW $0x0015, (AX)
   9915 	MOVB R11, 2(AX)
   9916 	ADDQ $0x03, AX
   9917 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9918 
   9919 repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
   9920 	SHLL $0x02, R11
   9921 	ORL  $0x01, R11
   9922 	MOVW R11, (AX)
   9923 	ADDQ $0x02, AX
   9924 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9925 	XORQ BX, BX
   9926 	LEAL 1(BX)(R11*4), R11
   9927 	MOVB DI, 1(AX)
   9928 	SARL $0x08, DI
   9929 	SHLL $0x05, DI
   9930 	ORL  DI, R11
   9931 	MOVB R11, (AX)
   9932 	ADDQ $0x02, AX
   9933 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9934 
   9935 two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
   9936 	MOVL R11, BX
   9937 	SHLL $0x02, BX
   9938 	CMPL R11, $0x0c
   9939 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm8B
   9940 	LEAL -15(BX), BX
   9941 	MOVB DI, 1(AX)
   9942 	SHRL $0x08, DI
   9943 	SHLL $0x05, DI
   9944 	ORL  DI, BX
   9945 	MOVB BL, (AX)
   9946 	ADDQ $0x02, AX
   9947 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9948 
   9949 emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
   9950 	LEAL -2(BX), BX
   9951 	MOVB BL, (AX)
   9952 	MOVW DI, 1(AX)
   9953 	ADDQ $0x03, AX
   9954 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
   9955 
   9956 match_is_repeat_encodeBetterBlockAsm8B:
   9957 	MOVL 12(SP), BX
   9958 	CMPL BX, SI
   9959 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
   9960 	MOVL SI, DI
   9961 	MOVL SI, 12(SP)
   9962 	LEAQ (DX)(BX*1), R8
   9963 	SUBL BX, DI
   9964 	LEAL -1(DI), BX
   9965 	CMPL BX, $0x3c
   9966 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm8B
   9967 	CMPL BX, $0x00000100
   9968 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
   9969 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
   9970 
   9971 three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
   9972 	MOVB $0xf4, (AX)
   9973 	MOVW BX, 1(AX)
   9974 	ADDQ $0x03, AX
   9975 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
   9976 
   9977 two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
   9978 	MOVB $0xf0, (AX)
   9979 	MOVB BL, 1(AX)
   9980 	ADDQ $0x02, AX
   9981 	CMPL BX, $0x40
   9982 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm8B
   9983 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
   9984 
   9985 one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
   9986 	SHLB $0x02, BL
   9987 	MOVB BL, (AX)
   9988 	ADDQ $0x01, AX
   9989 
   9990 memmove_match_emit_repeat_encodeBetterBlockAsm8B:
   9991 	LEAQ (AX)(DI*1), BX
   9992 
   9993 	// genMemMoveShort
   9994 	CMPQ DI, $0x04
   9995 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
   9996 	CMPQ DI, $0x08
   9997 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
   9998 	CMPQ DI, $0x10
   9999 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
  10000 	CMPQ DI, $0x20
  10001 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
  10002 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
  10003 
  10004 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
  10005 	MOVL (R8), R9
  10006 	MOVL R9, (AX)
  10007 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  10008 
  10009 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
  10010 	MOVL (R8), R9
  10011 	MOVL -4(R8)(DI*1), R8
  10012 	MOVL R9, (AX)
  10013 	MOVL R8, -4(AX)(DI*1)
  10014 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  10015 
  10016 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
  10017 	MOVQ (R8), R9
  10018 	MOVQ -8(R8)(DI*1), R8
  10019 	MOVQ R9, (AX)
  10020 	MOVQ R8, -8(AX)(DI*1)
  10021 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  10022 
  10023 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
  10024 	MOVOU (R8), X0
  10025 	MOVOU -16(R8)(DI*1), X1
  10026 	MOVOU X0, (AX)
  10027 	MOVOU X1, -16(AX)(DI*1)
  10028 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  10029 
  10030 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
  10031 	MOVOU (R8), X0
  10032 	MOVOU 16(R8), X1
  10033 	MOVOU -32(R8)(DI*1), X2
  10034 	MOVOU -16(R8)(DI*1), X3
  10035 	MOVOU X0, (AX)
  10036 	MOVOU X1, 16(AX)
  10037 	MOVOU X2, -32(AX)(DI*1)
  10038 	MOVOU X3, -16(AX)(DI*1)
  10039 
  10040 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
  10041 	MOVQ BX, AX
  10042 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
  10043 
  10044 memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
  10045 	LEAQ (AX)(DI*1), BX
  10046 
  10047 	// genMemMoveLong
  10048 	MOVOU (R8), X0
  10049 	MOVOU 16(R8), X1
  10050 	MOVOU -32(R8)(DI*1), X2
  10051 	MOVOU -16(R8)(DI*1), X3
  10052 	MOVQ  DI, R10
  10053 	SHRQ  $0x05, R10
  10054 	MOVQ  AX, R9
  10055 	ANDL  $0x0000001f, R9
  10056 	MOVQ  $0x00000040, R12
  10057 	SUBQ  R9, R12
  10058 	DECQ  R10
  10059 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  10060 	LEAQ  -32(R8)(R12*1), R9
  10061 	LEAQ  -32(AX)(R12*1), R13
  10062 
  10063 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
  10064 	MOVOU (R9), X4
  10065 	MOVOU 16(R9), X5
  10066 	MOVOA X4, (R13)
  10067 	MOVOA X5, 16(R13)
  10068 	ADDQ  $0x20, R13
  10069 	ADDQ  $0x20, R9
  10070 	ADDQ  $0x20, R12
  10071 	DECQ  R10
  10072 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
  10073 
  10074 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
  10075 	MOVOU -32(R8)(R12*1), X4
  10076 	MOVOU -16(R8)(R12*1), X5
  10077 	MOVOA X4, -32(AX)(R12*1)
  10078 	MOVOA X5, -16(AX)(R12*1)
  10079 	ADDQ  $0x20, R12
  10080 	CMPQ  DI, R12
  10081 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  10082 	MOVOU X0, (AX)
  10083 	MOVOU X1, 16(AX)
  10084 	MOVOU X2, -32(AX)(DI*1)
  10085 	MOVOU X3, -16(AX)(DI*1)
  10086 	MOVQ  BX, AX
  10087 
  10088 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
  10089 	ADDL R11, CX
  10090 	ADDL $0x04, R11
  10091 	MOVL CX, 12(SP)
  10092 
  10093 	// emitRepeat
  10094 	MOVL R11, BX
  10095 	LEAL -4(R11), R11
  10096 	CMPL BX, $0x08
  10097 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
  10098 	CMPL BX, $0x0c
  10099 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
  10100 
  10101 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
  10102 	CMPL R11, $0x00000104
  10103 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
  10104 	LEAL -256(R11), R11
  10105 	MOVW $0x0019, (AX)
  10106 	MOVW R11, 2(AX)
  10107 	ADDQ $0x04, AX
  10108 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  10109 
  10110 repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
  10111 	LEAL -4(R11), R11
  10112 	MOVW $0x0015, (AX)
  10113 	MOVB R11, 2(AX)
  10114 	ADDQ $0x03, AX
  10115 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  10116 
  10117 repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
  10118 	SHLL $0x02, R11
  10119 	ORL  $0x01, R11
  10120 	MOVW R11, (AX)
  10121 	ADDQ $0x02, AX
  10122 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  10123 	XORQ BX, BX
  10124 	LEAL 1(BX)(R11*4), R11
  10125 	MOVB DI, 1(AX)
  10126 	SARL $0x08, DI
  10127 	SHLL $0x05, DI
  10128 	ORL  DI, R11
  10129 	MOVB R11, (AX)
  10130 	ADDQ $0x02, AX
  10131 
  10132 match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
  10133 	CMPL CX, 8(SP)
  10134 	JAE  emit_remainder_encodeBetterBlockAsm8B
  10135 	CMPQ AX, (SP)
  10136 	JB   match_nolit_dst_ok_encodeBetterBlockAsm8B
  10137 	MOVQ $0x00000000, ret+48(FP)
  10138 	RET
  10139 
  10140 match_nolit_dst_ok_encodeBetterBlockAsm8B:
  10141 	MOVQ  $0x0000cf1bbcdcbf9b, BX
  10142 	MOVQ  $0x9e3779b1, DI
  10143 	LEAQ  1(SI), SI
  10144 	LEAQ  -2(CX), R8
  10145 	MOVQ  (DX)(SI*1), R9
  10146 	MOVQ  1(DX)(SI*1), R10
  10147 	MOVQ  (DX)(R8*1), R11
  10148 	MOVQ  1(DX)(R8*1), R12
  10149 	SHLQ  $0x10, R9
  10150 	IMULQ BX, R9
  10151 	SHRQ  $0x36, R9
  10152 	SHLQ  $0x20, R10
  10153 	IMULQ DI, R10
  10154 	SHRQ  $0x38, R10
  10155 	SHLQ  $0x10, R11
  10156 	IMULQ BX, R11
  10157 	SHRQ  $0x36, R11
  10158 	SHLQ  $0x20, R12
  10159 	IMULQ DI, R12
  10160 	SHRQ  $0x38, R12
  10161 	LEAQ  1(SI), DI
  10162 	LEAQ  1(R8), R13
  10163 	MOVL  SI, 24(SP)(R9*4)
  10164 	MOVL  R8, 24(SP)(R11*4)
  10165 	MOVL  DI, 4120(SP)(R10*4)
  10166 	MOVL  R13, 4120(SP)(R12*4)
  10167 	ADDQ  $0x01, SI
  10168 	SUBQ  $0x01, R8
  10169 
  10170 index_loop_encodeBetterBlockAsm8B:
  10171 	CMPQ  SI, R8
  10172 	JAE   search_loop_encodeBetterBlockAsm8B
  10173 	MOVQ  (DX)(SI*1), DI
  10174 	MOVQ  (DX)(R8*1), R9
  10175 	SHLQ  $0x10, DI
  10176 	IMULQ BX, DI
  10177 	SHRQ  $0x36, DI
  10178 	SHLQ  $0x10, R9
  10179 	IMULQ BX, R9
  10180 	SHRQ  $0x36, R9
  10181 	MOVL  SI, 24(SP)(DI*4)
  10182 	MOVL  R8, 24(SP)(R9*4)
  10183 	ADDQ  $0x02, SI
  10184 	SUBQ  $0x02, R8
  10185 	JMP   index_loop_encodeBetterBlockAsm8B
  10186 
  10187 emit_remainder_encodeBetterBlockAsm8B:
  10188 	MOVQ src_len+32(FP), CX
  10189 	SUBL 12(SP), CX
  10190 	LEAQ 3(AX)(CX*1), CX
  10191 	CMPQ CX, (SP)
  10192 	JB   emit_remainder_ok_encodeBetterBlockAsm8B
  10193 	MOVQ $0x00000000, ret+48(FP)
  10194 	RET
  10195 
  10196 emit_remainder_ok_encodeBetterBlockAsm8B:
  10197 	MOVQ src_len+32(FP), CX
  10198 	MOVL 12(SP), BX
  10199 	CMPL BX, CX
  10200 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
  10201 	MOVL CX, SI
  10202 	MOVL CX, 12(SP)
  10203 	LEAQ (DX)(BX*1), CX
  10204 	SUBL BX, SI
  10205 	LEAL -1(SI), DX
  10206 	CMPL DX, $0x3c
  10207 	JB   one_byte_emit_remainder_encodeBetterBlockAsm8B
  10208 	CMPL DX, $0x00000100
  10209 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm8B
  10210 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm8B
  10211 
  10212 three_bytes_emit_remainder_encodeBetterBlockAsm8B:
  10213 	MOVB $0xf4, (AX)
  10214 	MOVW DX, 1(AX)
  10215 	ADDQ $0x03, AX
  10216 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
  10217 
  10218 two_bytes_emit_remainder_encodeBetterBlockAsm8B:
  10219 	MOVB $0xf0, (AX)
  10220 	MOVB DL, 1(AX)
  10221 	ADDQ $0x02, AX
  10222 	CMPL DX, $0x40
  10223 	JB   memmove_emit_remainder_encodeBetterBlockAsm8B
  10224 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
  10225 
  10226 one_byte_emit_remainder_encodeBetterBlockAsm8B:
  10227 	SHLB $0x02, DL
  10228 	MOVB DL, (AX)
  10229 	ADDQ $0x01, AX
  10230 
  10231 memmove_emit_remainder_encodeBetterBlockAsm8B:
  10232 	LEAQ (AX)(SI*1), DX
  10233 	MOVL SI, BX
  10234 
  10235 	// genMemMoveShort
  10236 	CMPQ BX, $0x03
  10237 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2
  10238 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3
  10239 	CMPQ BX, $0x08
  10240 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
  10241 	CMPQ BX, $0x10
  10242 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
  10243 	CMPQ BX, $0x20
  10244 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
  10245 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
  10246 
  10247 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
  10248 	MOVB (CX), SI
  10249 	MOVB -1(CX)(BX*1), CL
  10250 	MOVB SI, (AX)
  10251 	MOVB CL, -1(AX)(BX*1)
  10252 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  10253 
  10254 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
  10255 	MOVW (CX), SI
  10256 	MOVB 2(CX), CL
  10257 	MOVW SI, (AX)
  10258 	MOVB CL, 2(AX)
  10259 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  10260 
  10261 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
  10262 	MOVL (CX), SI
  10263 	MOVL -4(CX)(BX*1), CX
  10264 	MOVL SI, (AX)
  10265 	MOVL CX, -4(AX)(BX*1)
  10266 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  10267 
  10268 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
  10269 	MOVQ (CX), SI
  10270 	MOVQ -8(CX)(BX*1), CX
  10271 	MOVQ SI, (AX)
  10272 	MOVQ CX, -8(AX)(BX*1)
  10273 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  10274 
  10275 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
  10276 	MOVOU (CX), X0
  10277 	MOVOU -16(CX)(BX*1), X1
  10278 	MOVOU X0, (AX)
  10279 	MOVOU X1, -16(AX)(BX*1)
  10280 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  10281 
  10282 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
  10283 	MOVOU (CX), X0
  10284 	MOVOU 16(CX), X1
  10285 	MOVOU -32(CX)(BX*1), X2
  10286 	MOVOU -16(CX)(BX*1), X3
  10287 	MOVOU X0, (AX)
  10288 	MOVOU X1, 16(AX)
  10289 	MOVOU X2, -32(AX)(BX*1)
  10290 	MOVOU X3, -16(AX)(BX*1)
  10291 
  10292 memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
  10293 	MOVQ DX, AX
  10294 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
  10295 
  10296 memmove_long_emit_remainder_encodeBetterBlockAsm8B:
  10297 	LEAQ (AX)(SI*1), DX
  10298 	MOVL SI, BX
  10299 
  10300 	// genMemMoveLong
  10301 	MOVOU (CX), X0
  10302 	MOVOU 16(CX), X1
  10303 	MOVOU -32(CX)(BX*1), X2
  10304 	MOVOU -16(CX)(BX*1), X3
  10305 	MOVQ  BX, DI
  10306 	SHRQ  $0x05, DI
  10307 	MOVQ  AX, SI
  10308 	ANDL  $0x0000001f, SI
  10309 	MOVQ  $0x00000040, R8
  10310 	SUBQ  SI, R8
  10311 	DECQ  DI
  10312 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  10313 	LEAQ  -32(CX)(R8*1), SI
  10314 	LEAQ  -32(AX)(R8*1), R9
  10315 
  10316 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
  10317 	MOVOU (SI), X4
  10318 	MOVOU 16(SI), X5
  10319 	MOVOA X4, (R9)
  10320 	MOVOA X5, 16(R9)
  10321 	ADDQ  $0x20, R9
  10322 	ADDQ  $0x20, SI
  10323 	ADDQ  $0x20, R8
  10324 	DECQ  DI
  10325 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
  10326 
  10327 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
  10328 	MOVOU -32(CX)(R8*1), X4
  10329 	MOVOU -16(CX)(R8*1), X5
  10330 	MOVOA X4, -32(AX)(R8*1)
  10331 	MOVOA X5, -16(AX)(R8*1)
  10332 	ADDQ  $0x20, R8
  10333 	CMPQ  BX, R8
  10334 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  10335 	MOVOU X0, (AX)
  10336 	MOVOU X1, 16(AX)
  10337 	MOVOU X2, -32(AX)(BX*1)
  10338 	MOVOU X3, -16(AX)(BX*1)
  10339 	MOVQ  DX, AX
  10340 
  10341 emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
  10342 	MOVQ dst_base+0(FP), CX
  10343 	SUBQ CX, AX
  10344 	MOVQ AX, ret+48(FP)
  10345 	RET
  10346 
  10347 // func encodeSnappyBlockAsm(dst []byte, src []byte) int
  10348 // Requires: BMI, SSE2
  10349 TEXT ·encodeSnappyBlockAsm(SB), $65560-56
  10350 	MOVQ dst_base+0(FP), AX
  10351 	MOVQ $0x00000200, CX
  10352 	LEAQ 24(SP), DX
  10353 	PXOR X0, X0
  10354 
  10355 zero_loop_encodeSnappyBlockAsm:
  10356 	MOVOU X0, (DX)
  10357 	MOVOU X0, 16(DX)
  10358 	MOVOU X0, 32(DX)
  10359 	MOVOU X0, 48(DX)
  10360 	MOVOU X0, 64(DX)
  10361 	MOVOU X0, 80(DX)
  10362 	MOVOU X0, 96(DX)
  10363 	MOVOU X0, 112(DX)
  10364 	ADDQ  $0x80, DX
  10365 	DECQ  CX
  10366 	JNZ   zero_loop_encodeSnappyBlockAsm
  10367 	MOVL  $0x00000000, 12(SP)
  10368 	MOVQ  src_len+32(FP), CX
  10369 	LEAQ  -9(CX), DX
  10370 	LEAQ  -8(CX), BX
  10371 	MOVL  BX, 8(SP)
  10372 	SHRQ  $0x05, CX
  10373 	SUBL  CX, DX
  10374 	LEAQ  (AX)(DX*1), DX
  10375 	MOVQ  DX, (SP)
  10376 	MOVL  $0x00000001, CX
  10377 	MOVL  CX, 16(SP)
  10378 	MOVQ  src_base+24(FP), DX
  10379 
  10380 search_loop_encodeSnappyBlockAsm:
  10381 	MOVL  CX, BX
  10382 	SUBL  12(SP), BX
  10383 	SHRL  $0x06, BX
  10384 	LEAL  4(CX)(BX*1), BX
  10385 	CMPL  BX, 8(SP)
  10386 	JAE   emit_remainder_encodeSnappyBlockAsm
  10387 	MOVQ  (DX)(CX*1), SI
  10388 	MOVL  BX, 20(SP)
  10389 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  10390 	MOVQ  SI, R9
  10391 	MOVQ  SI, R10
  10392 	SHRQ  $0x08, R10
  10393 	SHLQ  $0x10, R9
  10394 	IMULQ R8, R9
  10395 	SHRQ  $0x32, R9
  10396 	SHLQ  $0x10, R10
  10397 	IMULQ R8, R10
  10398 	SHRQ  $0x32, R10
  10399 	MOVL  24(SP)(R9*4), BX
  10400 	MOVL  24(SP)(R10*4), DI
  10401 	MOVL  CX, 24(SP)(R9*4)
  10402 	LEAL  1(CX), R9
  10403 	MOVL  R9, 24(SP)(R10*4)
  10404 	MOVQ  SI, R9
  10405 	SHRQ  $0x10, R9
  10406 	SHLQ  $0x10, R9
  10407 	IMULQ R8, R9
  10408 	SHRQ  $0x32, R9
  10409 	MOVL  CX, R8
  10410 	SUBL  16(SP), R8
  10411 	MOVL  1(DX)(R8*1), R10
  10412 	MOVQ  SI, R8
  10413 	SHRQ  $0x08, R8
  10414 	CMPL  R8, R10
  10415 	JNE   no_repeat_found_encodeSnappyBlockAsm
  10416 	LEAL  1(CX), SI
  10417 	MOVL  12(SP), BX
  10418 	MOVL  SI, DI
  10419 	SUBL  16(SP), DI
  10420 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm
  10421 
  10422 repeat_extend_back_loop_encodeSnappyBlockAsm:
  10423 	CMPL SI, BX
  10424 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm
  10425 	MOVB -1(DX)(DI*1), R8
  10426 	MOVB -1(DX)(SI*1), R9
  10427 	CMPB R8, R9
  10428 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm
  10429 	LEAL -1(SI), SI
  10430 	DECL DI
  10431 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm
  10432 
  10433 repeat_extend_back_end_encodeSnappyBlockAsm:
  10434 	MOVL 12(SP), BX
  10435 	CMPL BX, SI
  10436 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
  10437 	MOVL SI, DI
  10438 	MOVL SI, 12(SP)
  10439 	LEAQ (DX)(BX*1), R8
  10440 	SUBL BX, DI
  10441 	LEAL -1(DI), BX
  10442 	CMPL BX, $0x3c
  10443 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm
  10444 	CMPL BX, $0x00000100
  10445 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm
  10446 	CMPL BX, $0x00010000
  10447 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm
  10448 	CMPL BX, $0x01000000
  10449 	JB   four_bytes_repeat_emit_encodeSnappyBlockAsm
  10450 	MOVB $0xfc, (AX)
  10451 	MOVL BX, 1(AX)
  10452 	ADDQ $0x05, AX
  10453 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
  10454 
  10455 four_bytes_repeat_emit_encodeSnappyBlockAsm:
  10456 	MOVL BX, R9
  10457 	SHRL $0x10, R9
  10458 	MOVB $0xf8, (AX)
  10459 	MOVW BX, 1(AX)
  10460 	MOVB R9, 3(AX)
  10461 	ADDQ $0x04, AX
  10462 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
  10463 
  10464 three_bytes_repeat_emit_encodeSnappyBlockAsm:
  10465 	MOVB $0xf4, (AX)
  10466 	MOVW BX, 1(AX)
  10467 	ADDQ $0x03, AX
  10468 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
  10469 
  10470 two_bytes_repeat_emit_encodeSnappyBlockAsm:
  10471 	MOVB $0xf0, (AX)
  10472 	MOVB BL, 1(AX)
  10473 	ADDQ $0x02, AX
  10474 	CMPL BX, $0x40
  10475 	JB   memmove_repeat_emit_encodeSnappyBlockAsm
  10476 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
  10477 
  10478 one_byte_repeat_emit_encodeSnappyBlockAsm:
  10479 	SHLB $0x02, BL
  10480 	MOVB BL, (AX)
  10481 	ADDQ $0x01, AX
  10482 
  10483 memmove_repeat_emit_encodeSnappyBlockAsm:
  10484 	LEAQ (AX)(DI*1), BX
  10485 
  10486 	// genMemMoveShort
  10487 	CMPQ DI, $0x08
  10488 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
  10489 	CMPQ DI, $0x10
  10490 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
  10491 	CMPQ DI, $0x20
  10492 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
  10493 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
  10494 
  10495 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
  10496 	MOVQ (R8), R9
  10497 	MOVQ R9, (AX)
  10498 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  10499 
  10500 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
  10501 	MOVQ (R8), R9
  10502 	MOVQ -8(R8)(DI*1), R8
  10503 	MOVQ R9, (AX)
  10504 	MOVQ R8, -8(AX)(DI*1)
  10505 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  10506 
  10507 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
  10508 	MOVOU (R8), X0
  10509 	MOVOU -16(R8)(DI*1), X1
  10510 	MOVOU X0, (AX)
  10511 	MOVOU X1, -16(AX)(DI*1)
  10512 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  10513 
  10514 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
  10515 	MOVOU (R8), X0
  10516 	MOVOU 16(R8), X1
  10517 	MOVOU -32(R8)(DI*1), X2
  10518 	MOVOU -16(R8)(DI*1), X3
  10519 	MOVOU X0, (AX)
  10520 	MOVOU X1, 16(AX)
  10521 	MOVOU X2, -32(AX)(DI*1)
  10522 	MOVOU X3, -16(AX)(DI*1)
  10523 
  10524 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
  10525 	MOVQ BX, AX
  10526 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
  10527 
  10528 memmove_long_repeat_emit_encodeSnappyBlockAsm:
  10529 	LEAQ (AX)(DI*1), BX
  10530 
  10531 	// genMemMoveLong
  10532 	MOVOU (R8), X0
  10533 	MOVOU 16(R8), X1
  10534 	MOVOU -32(R8)(DI*1), X2
  10535 	MOVOU -16(R8)(DI*1), X3
  10536 	MOVQ  DI, R10
  10537 	SHRQ  $0x05, R10
  10538 	MOVQ  AX, R9
  10539 	ANDL  $0x0000001f, R9
  10540 	MOVQ  $0x00000040, R11
  10541 	SUBQ  R9, R11
  10542 	DECQ  R10
  10543 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10544 	LEAQ  -32(R8)(R11*1), R9
  10545 	LEAQ  -32(AX)(R11*1), R12
  10546 
  10547 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
  10548 	MOVOU (R9), X4
  10549 	MOVOU 16(R9), X5
  10550 	MOVOA X4, (R12)
  10551 	MOVOA X5, 16(R12)
  10552 	ADDQ  $0x20, R12
  10553 	ADDQ  $0x20, R9
  10554 	ADDQ  $0x20, R11
  10555 	DECQ  R10
  10556 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
  10557 
  10558 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
  10559 	MOVOU -32(R8)(R11*1), X4
  10560 	MOVOU -16(R8)(R11*1), X5
  10561 	MOVOA X4, -32(AX)(R11*1)
  10562 	MOVOA X5, -16(AX)(R11*1)
  10563 	ADDQ  $0x20, R11
  10564 	CMPQ  DI, R11
  10565 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10566 	MOVOU X0, (AX)
  10567 	MOVOU X1, 16(AX)
  10568 	MOVOU X2, -32(AX)(DI*1)
  10569 	MOVOU X3, -16(AX)(DI*1)
  10570 	MOVQ  BX, AX
  10571 
  10572 emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
  10573 	ADDL $0x05, CX
  10574 	MOVL CX, BX
  10575 	SUBL 16(SP), BX
  10576 	MOVQ src_len+32(FP), DI
  10577 	SUBL CX, DI
  10578 	LEAQ (DX)(CX*1), R8
  10579 	LEAQ (DX)(BX*1), BX
  10580 
  10581 	// matchLen
  10582 	XORL R10, R10
  10583 	CMPL DI, $0x08
  10584 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm
  10585 
  10586 matchlen_loopback_repeat_extend_encodeSnappyBlockAsm:
  10587 	MOVQ  (R8)(R10*1), R9
  10588 	XORQ  (BX)(R10*1), R9
  10589 	TESTQ R9, R9
  10590 	JZ    matchlen_loop_repeat_extend_encodeSnappyBlockAsm
  10591 
  10592 #ifdef GOAMD64_v3
  10593 	TZCNTQ R9, R9
  10594 
  10595 #else
  10596 	BSFQ R9, R9
  10597 
  10598 #endif
  10599 	SARQ $0x03, R9
  10600 	LEAL (R10)(R9*1), R10
  10601 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
  10602 
  10603 matchlen_loop_repeat_extend_encodeSnappyBlockAsm:
  10604 	LEAL -8(DI), DI
  10605 	LEAL 8(R10), R10
  10606 	CMPL DI, $0x08
  10607 	JAE  matchlen_loopback_repeat_extend_encodeSnappyBlockAsm
  10608 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm
  10609 
  10610 matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
  10611 	CMPL DI, $0x04
  10612 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm
  10613 	MOVL (R8)(R10*1), R9
  10614 	CMPL (BX)(R10*1), R9
  10615 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm
  10616 	SUBL $0x04, DI
  10617 	LEAL 4(R10), R10
  10618 
  10619 matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
  10620 	CMPL DI, $0x02
  10621 	JB   matchlen_match1_repeat_extend_encodeSnappyBlockAsm
  10622 	MOVW (R8)(R10*1), R9
  10623 	CMPW (BX)(R10*1), R9
  10624 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm
  10625 	SUBL $0x02, DI
  10626 	LEAL 2(R10), R10
  10627 
  10628 matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
  10629 	CMPL DI, $0x01
  10630 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm
  10631 	MOVB (R8)(R10*1), R9
  10632 	CMPB (BX)(R10*1), R9
  10633 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm
  10634 	LEAL 1(R10), R10
  10635 
  10636 repeat_extend_forward_end_encodeSnappyBlockAsm:
  10637 	ADDL R10, CX
  10638 	MOVL CX, BX
  10639 	SUBL SI, BX
  10640 	MOVL 16(SP), SI
  10641 
  10642 	// emitCopy
  10643 	CMPL SI, $0x00010000
  10644 	JB   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
  10645 
  10646 four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
  10647 	CMPL BX, $0x40
  10648 	JBE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
  10649 	MOVB $0xff, (AX)
  10650 	MOVL SI, 1(AX)
  10651 	LEAL -64(BX), BX
  10652 	ADDQ $0x05, AX
  10653 	CMPL BX, $0x04
  10654 	JB   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
  10655 	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
  10656 
  10657 four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
  10658 	TESTL BX, BX
  10659 	JZ    repeat_end_emit_encodeSnappyBlockAsm
  10660 	XORL  DI, DI
  10661 	LEAL  -1(DI)(BX*4), BX
  10662 	MOVB  BL, (AX)
  10663 	MOVL  SI, 1(AX)
  10664 	ADDQ  $0x05, AX
  10665 	JMP   repeat_end_emit_encodeSnappyBlockAsm
  10666 
  10667 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
  10668 	CMPL BX, $0x40
  10669 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
  10670 	MOVB $0xee, (AX)
  10671 	MOVW SI, 1(AX)
  10672 	LEAL -60(BX), BX
  10673 	ADDQ $0x03, AX
  10674 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
  10675 
  10676 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
  10677 	MOVL BX, DI
  10678 	SHLL $0x02, DI
  10679 	CMPL BX, $0x0c
  10680 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
  10681 	CMPL SI, $0x00000800
  10682 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
  10683 	LEAL -15(DI), DI
  10684 	MOVB SI, 1(AX)
  10685 	SHRL $0x08, SI
  10686 	SHLL $0x05, SI
  10687 	ORL  SI, DI
  10688 	MOVB DI, (AX)
  10689 	ADDQ $0x02, AX
  10690 	JMP  repeat_end_emit_encodeSnappyBlockAsm
  10691 
  10692 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
  10693 	LEAL -2(DI), DI
  10694 	MOVB DI, (AX)
  10695 	MOVW SI, 1(AX)
  10696 	ADDQ $0x03, AX
  10697 
  10698 repeat_end_emit_encodeSnappyBlockAsm:
  10699 	MOVL CX, 12(SP)
  10700 	JMP  search_loop_encodeSnappyBlockAsm
  10701 
  10702 no_repeat_found_encodeSnappyBlockAsm:
  10703 	CMPL (DX)(BX*1), SI
  10704 	JEQ  candidate_match_encodeSnappyBlockAsm
  10705 	SHRQ $0x08, SI
  10706 	MOVL 24(SP)(R9*4), BX
  10707 	LEAL 2(CX), R8
  10708 	CMPL (DX)(DI*1), SI
  10709 	JEQ  candidate2_match_encodeSnappyBlockAsm
  10710 	MOVL R8, 24(SP)(R9*4)
  10711 	SHRQ $0x08, SI
  10712 	CMPL (DX)(BX*1), SI
  10713 	JEQ  candidate3_match_encodeSnappyBlockAsm
  10714 	MOVL 20(SP), CX
  10715 	JMP  search_loop_encodeSnappyBlockAsm
  10716 
  10717 candidate3_match_encodeSnappyBlockAsm:
  10718 	ADDL $0x02, CX
  10719 	JMP  candidate_match_encodeSnappyBlockAsm
  10720 
  10721 candidate2_match_encodeSnappyBlockAsm:
  10722 	MOVL R8, 24(SP)(R9*4)
  10723 	INCL CX
  10724 	MOVL DI, BX
  10725 
  10726 candidate_match_encodeSnappyBlockAsm:
  10727 	MOVL  12(SP), SI
  10728 	TESTL BX, BX
  10729 	JZ    match_extend_back_end_encodeSnappyBlockAsm
  10730 
  10731 match_extend_back_loop_encodeSnappyBlockAsm:
  10732 	CMPL CX, SI
  10733 	JBE  match_extend_back_end_encodeSnappyBlockAsm
  10734 	MOVB -1(DX)(BX*1), DI
  10735 	MOVB -1(DX)(CX*1), R8
  10736 	CMPB DI, R8
  10737 	JNE  match_extend_back_end_encodeSnappyBlockAsm
  10738 	LEAL -1(CX), CX
  10739 	DECL BX
  10740 	JZ   match_extend_back_end_encodeSnappyBlockAsm
  10741 	JMP  match_extend_back_loop_encodeSnappyBlockAsm
  10742 
  10743 match_extend_back_end_encodeSnappyBlockAsm:
  10744 	MOVL CX, SI
  10745 	SUBL 12(SP), SI
  10746 	LEAQ 5(AX)(SI*1), SI
  10747 	CMPQ SI, (SP)
  10748 	JB   match_dst_size_check_encodeSnappyBlockAsm
  10749 	MOVQ $0x00000000, ret+48(FP)
  10750 	RET
  10751 
  10752 match_dst_size_check_encodeSnappyBlockAsm:
  10753 	MOVL CX, SI
  10754 	MOVL 12(SP), DI
  10755 	CMPL DI, SI
  10756 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm
  10757 	MOVL SI, R8
  10758 	MOVL SI, 12(SP)
  10759 	LEAQ (DX)(DI*1), SI
  10760 	SUBL DI, R8
  10761 	LEAL -1(R8), DI
  10762 	CMPL DI, $0x3c
  10763 	JB   one_byte_match_emit_encodeSnappyBlockAsm
  10764 	CMPL DI, $0x00000100
  10765 	JB   two_bytes_match_emit_encodeSnappyBlockAsm
  10766 	CMPL DI, $0x00010000
  10767 	JB   three_bytes_match_emit_encodeSnappyBlockAsm
  10768 	CMPL DI, $0x01000000
  10769 	JB   four_bytes_match_emit_encodeSnappyBlockAsm
  10770 	MOVB $0xfc, (AX)
  10771 	MOVL DI, 1(AX)
  10772 	ADDQ $0x05, AX
  10773 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
  10774 
  10775 four_bytes_match_emit_encodeSnappyBlockAsm:
  10776 	MOVL DI, R9
  10777 	SHRL $0x10, R9
  10778 	MOVB $0xf8, (AX)
  10779 	MOVW DI, 1(AX)
  10780 	MOVB R9, 3(AX)
  10781 	ADDQ $0x04, AX
  10782 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
  10783 
  10784 three_bytes_match_emit_encodeSnappyBlockAsm:
  10785 	MOVB $0xf4, (AX)
  10786 	MOVW DI, 1(AX)
  10787 	ADDQ $0x03, AX
  10788 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
  10789 
  10790 two_bytes_match_emit_encodeSnappyBlockAsm:
  10791 	MOVB $0xf0, (AX)
  10792 	MOVB DI, 1(AX)
  10793 	ADDQ $0x02, AX
  10794 	CMPL DI, $0x40
  10795 	JB   memmove_match_emit_encodeSnappyBlockAsm
  10796 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
  10797 
  10798 one_byte_match_emit_encodeSnappyBlockAsm:
  10799 	SHLB $0x02, DI
  10800 	MOVB DI, (AX)
  10801 	ADDQ $0x01, AX
  10802 
  10803 memmove_match_emit_encodeSnappyBlockAsm:
  10804 	LEAQ (AX)(R8*1), DI
  10805 
  10806 	// genMemMoveShort
  10807 	CMPQ R8, $0x08
  10808 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
  10809 	CMPQ R8, $0x10
  10810 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
  10811 	CMPQ R8, $0x20
  10812 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
  10813 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
  10814 
  10815 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
  10816 	MOVQ (SI), R9
  10817 	MOVQ R9, (AX)
  10818 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
  10819 
  10820 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
  10821 	MOVQ (SI), R9
  10822 	MOVQ -8(SI)(R8*1), SI
  10823 	MOVQ R9, (AX)
  10824 	MOVQ SI, -8(AX)(R8*1)
  10825 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
  10826 
  10827 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
  10828 	MOVOU (SI), X0
  10829 	MOVOU -16(SI)(R8*1), X1
  10830 	MOVOU X0, (AX)
  10831 	MOVOU X1, -16(AX)(R8*1)
  10832 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm
  10833 
  10834 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
  10835 	MOVOU (SI), X0
  10836 	MOVOU 16(SI), X1
  10837 	MOVOU -32(SI)(R8*1), X2
  10838 	MOVOU -16(SI)(R8*1), X3
  10839 	MOVOU X0, (AX)
  10840 	MOVOU X1, 16(AX)
  10841 	MOVOU X2, -32(AX)(R8*1)
  10842 	MOVOU X3, -16(AX)(R8*1)
  10843 
  10844 memmove_end_copy_match_emit_encodeSnappyBlockAsm:
  10845 	MOVQ DI, AX
  10846 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm
  10847 
  10848 memmove_long_match_emit_encodeSnappyBlockAsm:
  10849 	LEAQ (AX)(R8*1), DI
  10850 
  10851 	// genMemMoveLong
  10852 	MOVOU (SI), X0
  10853 	MOVOU 16(SI), X1
  10854 	MOVOU -32(SI)(R8*1), X2
  10855 	MOVOU -16(SI)(R8*1), X3
  10856 	MOVQ  R8, R10
  10857 	SHRQ  $0x05, R10
  10858 	MOVQ  AX, R9
  10859 	ANDL  $0x0000001f, R9
  10860 	MOVQ  $0x00000040, R11
  10861 	SUBQ  R9, R11
  10862 	DECQ  R10
  10863 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10864 	LEAQ  -32(SI)(R11*1), R9
  10865 	LEAQ  -32(AX)(R11*1), R12
  10866 
  10867 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
  10868 	MOVOU (R9), X4
  10869 	MOVOU 16(R9), X5
  10870 	MOVOA X4, (R12)
  10871 	MOVOA X5, 16(R12)
  10872 	ADDQ  $0x20, R12
  10873 	ADDQ  $0x20, R9
  10874 	ADDQ  $0x20, R11
  10875 	DECQ  R10
  10876 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
  10877 
  10878 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
  10879 	MOVOU -32(SI)(R11*1), X4
  10880 	MOVOU -16(SI)(R11*1), X5
  10881 	MOVOA X4, -32(AX)(R11*1)
  10882 	MOVOA X5, -16(AX)(R11*1)
  10883 	ADDQ  $0x20, R11
  10884 	CMPQ  R8, R11
  10885 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10886 	MOVOU X0, (AX)
  10887 	MOVOU X1, 16(AX)
  10888 	MOVOU X2, -32(AX)(R8*1)
  10889 	MOVOU X3, -16(AX)(R8*1)
  10890 	MOVQ  DI, AX
  10891 
  10892 emit_literal_done_match_emit_encodeSnappyBlockAsm:
  10893 match_nolit_loop_encodeSnappyBlockAsm:
  10894 	MOVL CX, SI
  10895 	SUBL BX, SI
  10896 	MOVL SI, 16(SP)
  10897 	ADDL $0x04, CX
  10898 	ADDL $0x04, BX
  10899 	MOVQ src_len+32(FP), SI
  10900 	SUBL CX, SI
  10901 	LEAQ (DX)(CX*1), DI
  10902 	LEAQ (DX)(BX*1), BX
  10903 
  10904 	// matchLen
  10905 	XORL R9, R9
  10906 	CMPL SI, $0x08
  10907 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm
  10908 
  10909 matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
  10910 	MOVQ  (DI)(R9*1), R8
  10911 	XORQ  (BX)(R9*1), R8
  10912 	TESTQ R8, R8
  10913 	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm
  10914 
  10915 #ifdef GOAMD64_v3
  10916 	TZCNTQ R8, R8
  10917 
  10918 #else
  10919 	BSFQ R8, R8
  10920 
  10921 #endif
  10922 	SARQ $0x03, R8
  10923 	LEAL (R9)(R8*1), R9
  10924 	JMP  match_nolit_end_encodeSnappyBlockAsm
  10925 
  10926 matchlen_loop_match_nolit_encodeSnappyBlockAsm:
  10927 	LEAL -8(SI), SI
  10928 	LEAL 8(R9), R9
  10929 	CMPL SI, $0x08
  10930 	JAE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm
  10931 	JZ   match_nolit_end_encodeSnappyBlockAsm
  10932 
  10933 matchlen_match4_match_nolit_encodeSnappyBlockAsm:
  10934 	CMPL SI, $0x04
  10935 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm
  10936 	MOVL (DI)(R9*1), R8
  10937 	CMPL (BX)(R9*1), R8
  10938 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm
  10939 	SUBL $0x04, SI
  10940 	LEAL 4(R9), R9
  10941 
  10942 matchlen_match2_match_nolit_encodeSnappyBlockAsm:
  10943 	CMPL SI, $0x02
  10944 	JB   matchlen_match1_match_nolit_encodeSnappyBlockAsm
  10945 	MOVW (DI)(R9*1), R8
  10946 	CMPW (BX)(R9*1), R8
  10947 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm
  10948 	SUBL $0x02, SI
  10949 	LEAL 2(R9), R9
  10950 
  10951 matchlen_match1_match_nolit_encodeSnappyBlockAsm:
  10952 	CMPL SI, $0x01
  10953 	JB   match_nolit_end_encodeSnappyBlockAsm
  10954 	MOVB (DI)(R9*1), R8
  10955 	CMPB (BX)(R9*1), R8
  10956 	JNE  match_nolit_end_encodeSnappyBlockAsm
  10957 	LEAL 1(R9), R9
  10958 
  10959 match_nolit_end_encodeSnappyBlockAsm:
  10960 	ADDL R9, CX
  10961 	MOVL 16(SP), BX
  10962 	ADDL $0x04, R9
  10963 	MOVL CX, 12(SP)
  10964 
  10965 	// emitCopy
  10966 	CMPL BX, $0x00010000
  10967 	JB   two_byte_offset_match_nolit_encodeSnappyBlockAsm
  10968 
  10969 four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
  10970 	CMPL R9, $0x40
  10971 	JBE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm
  10972 	MOVB $0xff, (AX)
  10973 	MOVL BX, 1(AX)
  10974 	LEAL -64(R9), R9
  10975 	ADDQ $0x05, AX
  10976 	CMPL R9, $0x04
  10977 	JB   four_bytes_remain_match_nolit_encodeSnappyBlockAsm
  10978 	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
  10979 
  10980 four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
  10981 	TESTL R9, R9
  10982 	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm
  10983 	XORL  SI, SI
  10984 	LEAL  -1(SI)(R9*4), R9
  10985 	MOVB  R9, (AX)
  10986 	MOVL  BX, 1(AX)
  10987 	ADDQ  $0x05, AX
  10988 	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm
  10989 
  10990 two_byte_offset_match_nolit_encodeSnappyBlockAsm:
  10991 	CMPL R9, $0x40
  10992 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
  10993 	MOVB $0xee, (AX)
  10994 	MOVW BX, 1(AX)
  10995 	LEAL -60(R9), R9
  10996 	ADDQ $0x03, AX
  10997 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm
  10998 
  10999 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
  11000 	MOVL R9, SI
  11001 	SHLL $0x02, SI
  11002 	CMPL R9, $0x0c
  11003 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
  11004 	CMPL BX, $0x00000800
  11005 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
  11006 	LEAL -15(SI), SI
  11007 	MOVB BL, 1(AX)
  11008 	SHRL $0x08, BX
  11009 	SHLL $0x05, BX
  11010 	ORL  BX, SI
  11011 	MOVB SI, (AX)
  11012 	ADDQ $0x02, AX
  11013 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm
  11014 
  11015 emit_copy_three_match_nolit_encodeSnappyBlockAsm:
  11016 	LEAL -2(SI), SI
  11017 	MOVB SI, (AX)
  11018 	MOVW BX, 1(AX)
  11019 	ADDQ $0x03, AX
  11020 
  11021 match_nolit_emitcopy_end_encodeSnappyBlockAsm:
  11022 	CMPL CX, 8(SP)
  11023 	JAE  emit_remainder_encodeSnappyBlockAsm
  11024 	MOVQ -2(DX)(CX*1), SI
  11025 	CMPQ AX, (SP)
  11026 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm
  11027 	MOVQ $0x00000000, ret+48(FP)
  11028 	RET
  11029 
  11030 match_nolit_dst_ok_encodeSnappyBlockAsm:
  11031 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  11032 	MOVQ  SI, DI
  11033 	SHRQ  $0x10, SI
  11034 	MOVQ  SI, BX
  11035 	SHLQ  $0x10, DI
  11036 	IMULQ R8, DI
  11037 	SHRQ  $0x32, DI
  11038 	SHLQ  $0x10, BX
  11039 	IMULQ R8, BX
  11040 	SHRQ  $0x32, BX
  11041 	LEAL  -2(CX), R8
  11042 	LEAQ  24(SP)(BX*4), R9
  11043 	MOVL  (R9), BX
  11044 	MOVL  R8, 24(SP)(DI*4)
  11045 	MOVL  CX, (R9)
  11046 	CMPL  (DX)(BX*1), SI
  11047 	JEQ   match_nolit_loop_encodeSnappyBlockAsm
  11048 	INCL  CX
  11049 	JMP   search_loop_encodeSnappyBlockAsm
  11050 
  11051 emit_remainder_encodeSnappyBlockAsm:
  11052 	MOVQ src_len+32(FP), CX
  11053 	SUBL 12(SP), CX
  11054 	LEAQ 5(AX)(CX*1), CX
  11055 	CMPQ CX, (SP)
  11056 	JB   emit_remainder_ok_encodeSnappyBlockAsm
  11057 	MOVQ $0x00000000, ret+48(FP)
  11058 	RET
  11059 
  11060 emit_remainder_ok_encodeSnappyBlockAsm:
  11061 	MOVQ src_len+32(FP), CX
  11062 	MOVL 12(SP), BX
  11063 	CMPL BX, CX
  11064 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
  11065 	MOVL CX, SI
  11066 	MOVL CX, 12(SP)
  11067 	LEAQ (DX)(BX*1), CX
  11068 	SUBL BX, SI
  11069 	LEAL -1(SI), DX
  11070 	CMPL DX, $0x3c
  11071 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm
  11072 	CMPL DX, $0x00000100
  11073 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm
  11074 	CMPL DX, $0x00010000
  11075 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm
  11076 	CMPL DX, $0x01000000
  11077 	JB   four_bytes_emit_remainder_encodeSnappyBlockAsm
  11078 	MOVB $0xfc, (AX)
  11079 	MOVL DX, 1(AX)
  11080 	ADDQ $0x05, AX
  11081 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
  11082 
  11083 four_bytes_emit_remainder_encodeSnappyBlockAsm:
  11084 	MOVL DX, BX
  11085 	SHRL $0x10, BX
  11086 	MOVB $0xf8, (AX)
  11087 	MOVW DX, 1(AX)
  11088 	MOVB BL, 3(AX)
  11089 	ADDQ $0x04, AX
  11090 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
  11091 
  11092 three_bytes_emit_remainder_encodeSnappyBlockAsm:
  11093 	MOVB $0xf4, (AX)
  11094 	MOVW DX, 1(AX)
  11095 	ADDQ $0x03, AX
  11096 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
  11097 
  11098 two_bytes_emit_remainder_encodeSnappyBlockAsm:
  11099 	MOVB $0xf0, (AX)
  11100 	MOVB DL, 1(AX)
  11101 	ADDQ $0x02, AX
  11102 	CMPL DX, $0x40
  11103 	JB   memmove_emit_remainder_encodeSnappyBlockAsm
  11104 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
  11105 
  11106 one_byte_emit_remainder_encodeSnappyBlockAsm:
  11107 	SHLB $0x02, DL
  11108 	MOVB DL, (AX)
  11109 	ADDQ $0x01, AX
  11110 
  11111 memmove_emit_remainder_encodeSnappyBlockAsm:
  11112 	LEAQ (AX)(SI*1), DX
  11113 	MOVL SI, BX
  11114 
  11115 	// genMemMoveShort
  11116 	CMPQ BX, $0x03
  11117 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
  11118 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
  11119 	CMPQ BX, $0x08
  11120 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7
  11121 	CMPQ BX, $0x10
  11122 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
  11123 	CMPQ BX, $0x20
  11124 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
  11125 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
  11126 
  11127 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
  11128 	MOVB (CX), SI
  11129 	MOVB -1(CX)(BX*1), CL
  11130 	MOVB SI, (AX)
  11131 	MOVB CL, -1(AX)(BX*1)
  11132 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11133 
  11134 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
  11135 	MOVW (CX), SI
  11136 	MOVB 2(CX), CL
  11137 	MOVW SI, (AX)
  11138 	MOVB CL, 2(AX)
  11139 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11140 
  11141 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
  11142 	MOVL (CX), SI
  11143 	MOVL -4(CX)(BX*1), CX
  11144 	MOVL SI, (AX)
  11145 	MOVL CX, -4(AX)(BX*1)
  11146 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11147 
  11148 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
  11149 	MOVQ (CX), SI
  11150 	MOVQ -8(CX)(BX*1), CX
  11151 	MOVQ SI, (AX)
  11152 	MOVQ CX, -8(AX)(BX*1)
  11153 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11154 
  11155 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
  11156 	MOVOU (CX), X0
  11157 	MOVOU -16(CX)(BX*1), X1
  11158 	MOVOU X0, (AX)
  11159 	MOVOU X1, -16(AX)(BX*1)
  11160 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11161 
  11162 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
  11163 	MOVOU (CX), X0
  11164 	MOVOU 16(CX), X1
  11165 	MOVOU -32(CX)(BX*1), X2
  11166 	MOVOU -16(CX)(BX*1), X3
  11167 	MOVOU X0, (AX)
  11168 	MOVOU X1, 16(AX)
  11169 	MOVOU X2, -32(AX)(BX*1)
  11170 	MOVOU X3, -16(AX)(BX*1)
  11171 
  11172 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
  11173 	MOVQ DX, AX
  11174 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
  11175 
  11176 memmove_long_emit_remainder_encodeSnappyBlockAsm:
  11177 	LEAQ (AX)(SI*1), DX
  11178 	MOVL SI, BX
  11179 
  11180 	// genMemMoveLong
  11181 	MOVOU (CX), X0
  11182 	MOVOU 16(CX), X1
  11183 	MOVOU -32(CX)(BX*1), X2
  11184 	MOVOU -16(CX)(BX*1), X3
  11185 	MOVQ  BX, DI
  11186 	SHRQ  $0x05, DI
  11187 	MOVQ  AX, SI
  11188 	ANDL  $0x0000001f, SI
  11189 	MOVQ  $0x00000040, R8
  11190 	SUBQ  SI, R8
  11191 	DECQ  DI
  11192 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  11193 	LEAQ  -32(CX)(R8*1), SI
  11194 	LEAQ  -32(AX)(R8*1), R9
  11195 
  11196 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
  11197 	MOVOU (SI), X4
  11198 	MOVOU 16(SI), X5
  11199 	MOVOA X4, (R9)
  11200 	MOVOA X5, 16(R9)
  11201 	ADDQ  $0x20, R9
  11202 	ADDQ  $0x20, SI
  11203 	ADDQ  $0x20, R8
  11204 	DECQ  DI
  11205 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
  11206 
  11207 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
  11208 	MOVOU -32(CX)(R8*1), X4
  11209 	MOVOU -16(CX)(R8*1), X5
  11210 	MOVOA X4, -32(AX)(R8*1)
  11211 	MOVOA X5, -16(AX)(R8*1)
  11212 	ADDQ  $0x20, R8
  11213 	CMPQ  BX, R8
  11214 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  11215 	MOVOU X0, (AX)
  11216 	MOVOU X1, 16(AX)
  11217 	MOVOU X2, -32(AX)(BX*1)
  11218 	MOVOU X3, -16(AX)(BX*1)
  11219 	MOVQ  DX, AX
  11220 
  11221 emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
  11222 	MOVQ dst_base+0(FP), CX
  11223 	SUBQ CX, AX
  11224 	MOVQ AX, ret+48(FP)
  11225 	RET
  11226 
  11227 // func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
  11228 // Requires: BMI, SSE2
  11229 TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56
  11230 	MOVQ dst_base+0(FP), AX
  11231 	MOVQ $0x00000200, CX
  11232 	LEAQ 24(SP), DX
  11233 	PXOR X0, X0
  11234 
  11235 zero_loop_encodeSnappyBlockAsm64K:
  11236 	MOVOU X0, (DX)
  11237 	MOVOU X0, 16(DX)
  11238 	MOVOU X0, 32(DX)
  11239 	MOVOU X0, 48(DX)
  11240 	MOVOU X0, 64(DX)
  11241 	MOVOU X0, 80(DX)
  11242 	MOVOU X0, 96(DX)
  11243 	MOVOU X0, 112(DX)
  11244 	ADDQ  $0x80, DX
  11245 	DECQ  CX
  11246 	JNZ   zero_loop_encodeSnappyBlockAsm64K
  11247 	MOVL  $0x00000000, 12(SP)
  11248 	MOVQ  src_len+32(FP), CX
  11249 	LEAQ  -9(CX), DX
  11250 	LEAQ  -8(CX), BX
  11251 	MOVL  BX, 8(SP)
  11252 	SHRQ  $0x05, CX
  11253 	SUBL  CX, DX
  11254 	LEAQ  (AX)(DX*1), DX
  11255 	MOVQ  DX, (SP)
  11256 	MOVL  $0x00000001, CX
  11257 	MOVL  CX, 16(SP)
  11258 	MOVQ  src_base+24(FP), DX
  11259 
  11260 search_loop_encodeSnappyBlockAsm64K:
  11261 	MOVL  CX, BX
  11262 	SUBL  12(SP), BX
  11263 	SHRL  $0x06, BX
  11264 	LEAL  4(CX)(BX*1), BX
  11265 	CMPL  BX, 8(SP)
  11266 	JAE   emit_remainder_encodeSnappyBlockAsm64K
  11267 	MOVQ  (DX)(CX*1), SI
  11268 	MOVL  BX, 20(SP)
  11269 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  11270 	MOVQ  SI, R9
  11271 	MOVQ  SI, R10
  11272 	SHRQ  $0x08, R10
  11273 	SHLQ  $0x10, R9
  11274 	IMULQ R8, R9
  11275 	SHRQ  $0x32, R9
  11276 	SHLQ  $0x10, R10
  11277 	IMULQ R8, R10
  11278 	SHRQ  $0x32, R10
  11279 	MOVL  24(SP)(R9*4), BX
  11280 	MOVL  24(SP)(R10*4), DI
  11281 	MOVL  CX, 24(SP)(R9*4)
  11282 	LEAL  1(CX), R9
  11283 	MOVL  R9, 24(SP)(R10*4)
  11284 	MOVQ  SI, R9
  11285 	SHRQ  $0x10, R9
  11286 	SHLQ  $0x10, R9
  11287 	IMULQ R8, R9
  11288 	SHRQ  $0x32, R9
  11289 	MOVL  CX, R8
  11290 	SUBL  16(SP), R8
  11291 	MOVL  1(DX)(R8*1), R10
  11292 	MOVQ  SI, R8
  11293 	SHRQ  $0x08, R8
  11294 	CMPL  R8, R10
  11295 	JNE   no_repeat_found_encodeSnappyBlockAsm64K
  11296 	LEAL  1(CX), SI
  11297 	MOVL  12(SP), BX
  11298 	MOVL  SI, DI
  11299 	SUBL  16(SP), DI
  11300 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm64K
  11301 
  11302 repeat_extend_back_loop_encodeSnappyBlockAsm64K:
  11303 	CMPL SI, BX
  11304 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm64K
  11305 	MOVB -1(DX)(DI*1), R8
  11306 	MOVB -1(DX)(SI*1), R9
  11307 	CMPB R8, R9
  11308 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm64K
  11309 	LEAL -1(SI), SI
  11310 	DECL DI
  11311 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm64K
  11312 
  11313 repeat_extend_back_end_encodeSnappyBlockAsm64K:
  11314 	MOVL 12(SP), BX
  11315 	CMPL BX, SI
  11316 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
  11317 	MOVL SI, DI
  11318 	MOVL SI, 12(SP)
  11319 	LEAQ (DX)(BX*1), R8
  11320 	SUBL BX, DI
  11321 	LEAL -1(DI), BX
  11322 	CMPL BX, $0x3c
  11323 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm64K
  11324 	CMPL BX, $0x00000100
  11325 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm64K
  11326 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm64K
  11327 
  11328 three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
  11329 	MOVB $0xf4, (AX)
  11330 	MOVW BX, 1(AX)
  11331 	ADDQ $0x03, AX
  11332 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
  11333 
  11334 two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
  11335 	MOVB $0xf0, (AX)
  11336 	MOVB BL, 1(AX)
  11337 	ADDQ $0x02, AX
  11338 	CMPL BX, $0x40
  11339 	JB   memmove_repeat_emit_encodeSnappyBlockAsm64K
  11340 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
  11341 
  11342 one_byte_repeat_emit_encodeSnappyBlockAsm64K:
  11343 	SHLB $0x02, BL
  11344 	MOVB BL, (AX)
  11345 	ADDQ $0x01, AX
  11346 
  11347 memmove_repeat_emit_encodeSnappyBlockAsm64K:
  11348 	LEAQ (AX)(DI*1), BX
  11349 
  11350 	// genMemMoveShort
  11351 	CMPQ DI, $0x08
  11352 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
  11353 	CMPQ DI, $0x10
  11354 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
  11355 	CMPQ DI, $0x20
  11356 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
  11357 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
  11358 
  11359 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
  11360 	MOVQ (R8), R9
  11361 	MOVQ R9, (AX)
  11362 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
  11363 
  11364 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
  11365 	MOVQ (R8), R9
  11366 	MOVQ -8(R8)(DI*1), R8
  11367 	MOVQ R9, (AX)
  11368 	MOVQ R8, -8(AX)(DI*1)
  11369 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
  11370 
  11371 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
  11372 	MOVOU (R8), X0
  11373 	MOVOU -16(R8)(DI*1), X1
  11374 	MOVOU X0, (AX)
  11375 	MOVOU X1, -16(AX)(DI*1)
  11376 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
  11377 
  11378 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
  11379 	MOVOU (R8), X0
  11380 	MOVOU 16(R8), X1
  11381 	MOVOU -32(R8)(DI*1), X2
  11382 	MOVOU -16(R8)(DI*1), X3
  11383 	MOVOU X0, (AX)
  11384 	MOVOU X1, 16(AX)
  11385 	MOVOU X2, -32(AX)(DI*1)
  11386 	MOVOU X3, -16(AX)(DI*1)
  11387 
  11388 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
  11389 	MOVQ BX, AX
  11390 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
  11391 
  11392 memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
  11393 	LEAQ (AX)(DI*1), BX
  11394 
  11395 	// genMemMoveLong
  11396 	MOVOU (R8), X0
  11397 	MOVOU 16(R8), X1
  11398 	MOVOU -32(R8)(DI*1), X2
  11399 	MOVOU -16(R8)(DI*1), X3
  11400 	MOVQ  DI, R10
  11401 	SHRQ  $0x05, R10
  11402 	MOVQ  AX, R9
  11403 	ANDL  $0x0000001f, R9
  11404 	MOVQ  $0x00000040, R11
  11405 	SUBQ  R9, R11
  11406 	DECQ  R10
  11407 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11408 	LEAQ  -32(R8)(R11*1), R9
  11409 	LEAQ  -32(AX)(R11*1), R12
  11410 
  11411 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
  11412 	MOVOU (R9), X4
  11413 	MOVOU 16(R9), X5
  11414 	MOVOA X4, (R12)
  11415 	MOVOA X5, 16(R12)
  11416 	ADDQ  $0x20, R12
  11417 	ADDQ  $0x20, R9
  11418 	ADDQ  $0x20, R11
  11419 	DECQ  R10
  11420 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
  11421 
  11422 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
  11423 	MOVOU -32(R8)(R11*1), X4
  11424 	MOVOU -16(R8)(R11*1), X5
  11425 	MOVOA X4, -32(AX)(R11*1)
  11426 	MOVOA X5, -16(AX)(R11*1)
  11427 	ADDQ  $0x20, R11
  11428 	CMPQ  DI, R11
  11429 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11430 	MOVOU X0, (AX)
  11431 	MOVOU X1, 16(AX)
  11432 	MOVOU X2, -32(AX)(DI*1)
  11433 	MOVOU X3, -16(AX)(DI*1)
  11434 	MOVQ  BX, AX
  11435 
  11436 emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
  11437 	ADDL $0x05, CX
  11438 	MOVL CX, BX
  11439 	SUBL 16(SP), BX
  11440 	MOVQ src_len+32(FP), DI
  11441 	SUBL CX, DI
  11442 	LEAQ (DX)(CX*1), R8
  11443 	LEAQ (DX)(BX*1), BX
  11444 
  11445 	// matchLen
  11446 	XORL R10, R10
  11447 	CMPL DI, $0x08
  11448 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
  11449 
  11450 matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K:
  11451 	MOVQ  (R8)(R10*1), R9
  11452 	XORQ  (BX)(R10*1), R9
  11453 	TESTQ R9, R9
  11454 	JZ    matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K
  11455 
  11456 #ifdef GOAMD64_v3
  11457 	TZCNTQ R9, R9
  11458 
  11459 #else
  11460 	BSFQ R9, R9
  11461 
  11462 #endif
  11463 	SARQ $0x03, R9
  11464 	LEAL (R10)(R9*1), R10
  11465 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
  11466 
  11467 matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K:
  11468 	LEAL -8(DI), DI
  11469 	LEAL 8(R10), R10
  11470 	CMPL DI, $0x08
  11471 	JAE  matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K
  11472 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm64K
  11473 
  11474 matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
  11475 	CMPL DI, $0x04
  11476 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
  11477 	MOVL (R8)(R10*1), R9
  11478 	CMPL (BX)(R10*1), R9
  11479 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
  11480 	SUBL $0x04, DI
  11481 	LEAL 4(R10), R10
  11482 
  11483 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
  11484 	CMPL DI, $0x02
  11485 	JB   matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
  11486 	MOVW (R8)(R10*1), R9
  11487 	CMPW (BX)(R10*1), R9
  11488 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
  11489 	SUBL $0x02, DI
  11490 	LEAL 2(R10), R10
  11491 
  11492 matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
  11493 	CMPL DI, $0x01
  11494 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm64K
  11495 	MOVB (R8)(R10*1), R9
  11496 	CMPB (BX)(R10*1), R9
  11497 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm64K
  11498 	LEAL 1(R10), R10
  11499 
  11500 repeat_extend_forward_end_encodeSnappyBlockAsm64K:
  11501 	ADDL R10, CX
  11502 	MOVL CX, BX
  11503 	SUBL SI, BX
  11504 	MOVL 16(SP), SI
  11505 
  11506 	// emitCopy
  11507 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
  11508 	CMPL BX, $0x40
  11509 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
  11510 	MOVB $0xee, (AX)
  11511 	MOVW SI, 1(AX)
  11512 	LEAL -60(BX), BX
  11513 	ADDQ $0x03, AX
  11514 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
  11515 
  11516 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
  11517 	MOVL BX, DI
  11518 	SHLL $0x02, DI
  11519 	CMPL BX, $0x0c
  11520 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
  11521 	CMPL SI, $0x00000800
  11522 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
  11523 	LEAL -15(DI), DI
  11524 	MOVB SI, 1(AX)
  11525 	SHRL $0x08, SI
  11526 	SHLL $0x05, SI
  11527 	ORL  SI, DI
  11528 	MOVB DI, (AX)
  11529 	ADDQ $0x02, AX
  11530 	JMP  repeat_end_emit_encodeSnappyBlockAsm64K
  11531 
  11532 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
  11533 	LEAL -2(DI), DI
  11534 	MOVB DI, (AX)
  11535 	MOVW SI, 1(AX)
  11536 	ADDQ $0x03, AX
  11537 
  11538 repeat_end_emit_encodeSnappyBlockAsm64K:
  11539 	MOVL CX, 12(SP)
  11540 	JMP  search_loop_encodeSnappyBlockAsm64K
  11541 
  11542 no_repeat_found_encodeSnappyBlockAsm64K:
  11543 	CMPL (DX)(BX*1), SI
  11544 	JEQ  candidate_match_encodeSnappyBlockAsm64K
  11545 	SHRQ $0x08, SI
  11546 	MOVL 24(SP)(R9*4), BX
  11547 	LEAL 2(CX), R8
  11548 	CMPL (DX)(DI*1), SI
  11549 	JEQ  candidate2_match_encodeSnappyBlockAsm64K
  11550 	MOVL R8, 24(SP)(R9*4)
  11551 	SHRQ $0x08, SI
  11552 	CMPL (DX)(BX*1), SI
  11553 	JEQ  candidate3_match_encodeSnappyBlockAsm64K
  11554 	MOVL 20(SP), CX
  11555 	JMP  search_loop_encodeSnappyBlockAsm64K
  11556 
  11557 candidate3_match_encodeSnappyBlockAsm64K:
  11558 	ADDL $0x02, CX
  11559 	JMP  candidate_match_encodeSnappyBlockAsm64K
  11560 
  11561 candidate2_match_encodeSnappyBlockAsm64K:
  11562 	MOVL R8, 24(SP)(R9*4)
  11563 	INCL CX
  11564 	MOVL DI, BX
  11565 
  11566 candidate_match_encodeSnappyBlockAsm64K:
  11567 	MOVL  12(SP), SI
  11568 	TESTL BX, BX
  11569 	JZ    match_extend_back_end_encodeSnappyBlockAsm64K
  11570 
  11571 match_extend_back_loop_encodeSnappyBlockAsm64K:
  11572 	CMPL CX, SI
  11573 	JBE  match_extend_back_end_encodeSnappyBlockAsm64K
  11574 	MOVB -1(DX)(BX*1), DI
  11575 	MOVB -1(DX)(CX*1), R8
  11576 	CMPB DI, R8
  11577 	JNE  match_extend_back_end_encodeSnappyBlockAsm64K
  11578 	LEAL -1(CX), CX
  11579 	DECL BX
  11580 	JZ   match_extend_back_end_encodeSnappyBlockAsm64K
  11581 	JMP  match_extend_back_loop_encodeSnappyBlockAsm64K
  11582 
  11583 match_extend_back_end_encodeSnappyBlockAsm64K:
  11584 	MOVL CX, SI
  11585 	SUBL 12(SP), SI
  11586 	LEAQ 3(AX)(SI*1), SI
  11587 	CMPQ SI, (SP)
  11588 	JB   match_dst_size_check_encodeSnappyBlockAsm64K
  11589 	MOVQ $0x00000000, ret+48(FP)
  11590 	RET
  11591 
  11592 match_dst_size_check_encodeSnappyBlockAsm64K:
  11593 	MOVL CX, SI
  11594 	MOVL 12(SP), DI
  11595 	CMPL DI, SI
  11596 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
  11597 	MOVL SI, R8
  11598 	MOVL SI, 12(SP)
  11599 	LEAQ (DX)(DI*1), SI
  11600 	SUBL DI, R8
  11601 	LEAL -1(R8), DI
  11602 	CMPL DI, $0x3c
  11603 	JB   one_byte_match_emit_encodeSnappyBlockAsm64K
  11604 	CMPL DI, $0x00000100
  11605 	JB   two_bytes_match_emit_encodeSnappyBlockAsm64K
  11606 	JB   three_bytes_match_emit_encodeSnappyBlockAsm64K
  11607 
  11608 three_bytes_match_emit_encodeSnappyBlockAsm64K:
  11609 	MOVB $0xf4, (AX)
  11610 	MOVW DI, 1(AX)
  11611 	ADDQ $0x03, AX
  11612 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
  11613 
  11614 two_bytes_match_emit_encodeSnappyBlockAsm64K:
  11615 	MOVB $0xf0, (AX)
  11616 	MOVB DI, 1(AX)
  11617 	ADDQ $0x02, AX
  11618 	CMPL DI, $0x40
  11619 	JB   memmove_match_emit_encodeSnappyBlockAsm64K
  11620 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
  11621 
  11622 one_byte_match_emit_encodeSnappyBlockAsm64K:
  11623 	SHLB $0x02, DI
  11624 	MOVB DI, (AX)
  11625 	ADDQ $0x01, AX
  11626 
  11627 memmove_match_emit_encodeSnappyBlockAsm64K:
  11628 	LEAQ (AX)(R8*1), DI
  11629 
  11630 	// genMemMoveShort
  11631 	CMPQ R8, $0x08
  11632 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
  11633 	CMPQ R8, $0x10
  11634 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
  11635 	CMPQ R8, $0x20
  11636 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
  11637 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
  11638 
  11639 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
  11640 	MOVQ (SI), R9
  11641 	MOVQ R9, (AX)
  11642 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
  11643 
  11644 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
  11645 	MOVQ (SI), R9
  11646 	MOVQ -8(SI)(R8*1), SI
  11647 	MOVQ R9, (AX)
  11648 	MOVQ SI, -8(AX)(R8*1)
  11649 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
  11650 
  11651 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
  11652 	MOVOU (SI), X0
  11653 	MOVOU -16(SI)(R8*1), X1
  11654 	MOVOU X0, (AX)
  11655 	MOVOU X1, -16(AX)(R8*1)
  11656 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
  11657 
  11658 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
  11659 	MOVOU (SI), X0
  11660 	MOVOU 16(SI), X1
  11661 	MOVOU -32(SI)(R8*1), X2
  11662 	MOVOU -16(SI)(R8*1), X3
  11663 	MOVOU X0, (AX)
  11664 	MOVOU X1, 16(AX)
  11665 	MOVOU X2, -32(AX)(R8*1)
  11666 	MOVOU X3, -16(AX)(R8*1)
  11667 
  11668 memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
  11669 	MOVQ DI, AX
  11670 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
  11671 
  11672 memmove_long_match_emit_encodeSnappyBlockAsm64K:
  11673 	LEAQ (AX)(R8*1), DI
  11674 
  11675 	// genMemMoveLong
  11676 	MOVOU (SI), X0
  11677 	MOVOU 16(SI), X1
  11678 	MOVOU -32(SI)(R8*1), X2
  11679 	MOVOU -16(SI)(R8*1), X3
  11680 	MOVQ  R8, R10
  11681 	SHRQ  $0x05, R10
  11682 	MOVQ  AX, R9
  11683 	ANDL  $0x0000001f, R9
  11684 	MOVQ  $0x00000040, R11
  11685 	SUBQ  R9, R11
  11686 	DECQ  R10
  11687 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11688 	LEAQ  -32(SI)(R11*1), R9
  11689 	LEAQ  -32(AX)(R11*1), R12
  11690 
  11691 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
  11692 	MOVOU (R9), X4
  11693 	MOVOU 16(R9), X5
  11694 	MOVOA X4, (R12)
  11695 	MOVOA X5, 16(R12)
  11696 	ADDQ  $0x20, R12
  11697 	ADDQ  $0x20, R9
  11698 	ADDQ  $0x20, R11
  11699 	DECQ  R10
  11700 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
  11701 
  11702 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
  11703 	MOVOU -32(SI)(R11*1), X4
  11704 	MOVOU -16(SI)(R11*1), X5
  11705 	MOVOA X4, -32(AX)(R11*1)
  11706 	MOVOA X5, -16(AX)(R11*1)
  11707 	ADDQ  $0x20, R11
  11708 	CMPQ  R8, R11
  11709 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11710 	MOVOU X0, (AX)
  11711 	MOVOU X1, 16(AX)
  11712 	MOVOU X2, -32(AX)(R8*1)
  11713 	MOVOU X3, -16(AX)(R8*1)
  11714 	MOVQ  DI, AX
  11715 
  11716 emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
  11717 match_nolit_loop_encodeSnappyBlockAsm64K:
  11718 	MOVL CX, SI
  11719 	SUBL BX, SI
  11720 	MOVL SI, 16(SP)
  11721 	ADDL $0x04, CX
  11722 	ADDL $0x04, BX
  11723 	MOVQ src_len+32(FP), SI
  11724 	SUBL CX, SI
  11725 	LEAQ (DX)(CX*1), DI
  11726 	LEAQ (DX)(BX*1), BX
  11727 
  11728 	// matchLen
  11729 	XORL R9, R9
  11730 	CMPL SI, $0x08
  11731 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
  11732 
  11733 matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K:
  11734 	MOVQ  (DI)(R9*1), R8
  11735 	XORQ  (BX)(R9*1), R8
  11736 	TESTQ R8, R8
  11737 	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm64K
  11738 
  11739 #ifdef GOAMD64_v3
  11740 	TZCNTQ R8, R8
  11741 
  11742 #else
  11743 	BSFQ R8, R8
  11744 
  11745 #endif
  11746 	SARQ $0x03, R8
  11747 	LEAL (R9)(R8*1), R9
  11748 	JMP  match_nolit_end_encodeSnappyBlockAsm64K
  11749 
  11750 matchlen_loop_match_nolit_encodeSnappyBlockAsm64K:
  11751 	LEAL -8(SI), SI
  11752 	LEAL 8(R9), R9
  11753 	CMPL SI, $0x08
  11754 	JAE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K
  11755 	JZ   match_nolit_end_encodeSnappyBlockAsm64K
  11756 
  11757 matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
  11758 	CMPL SI, $0x04
  11759 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
  11760 	MOVL (DI)(R9*1), R8
  11761 	CMPL (BX)(R9*1), R8
  11762 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
  11763 	SUBL $0x04, SI
  11764 	LEAL 4(R9), R9
  11765 
  11766 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
  11767 	CMPL SI, $0x02
  11768 	JB   matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
  11769 	MOVW (DI)(R9*1), R8
  11770 	CMPW (BX)(R9*1), R8
  11771 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
  11772 	SUBL $0x02, SI
  11773 	LEAL 2(R9), R9
  11774 
  11775 matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
  11776 	CMPL SI, $0x01
  11777 	JB   match_nolit_end_encodeSnappyBlockAsm64K
  11778 	MOVB (DI)(R9*1), R8
  11779 	CMPB (BX)(R9*1), R8
  11780 	JNE  match_nolit_end_encodeSnappyBlockAsm64K
  11781 	LEAL 1(R9), R9
  11782 
  11783 match_nolit_end_encodeSnappyBlockAsm64K:
  11784 	ADDL R9, CX
  11785 	MOVL 16(SP), BX
  11786 	ADDL $0x04, R9
  11787 	MOVL CX, 12(SP)
  11788 
  11789 	// emitCopy
  11790 two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
  11791 	CMPL R9, $0x40
  11792 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
  11793 	MOVB $0xee, (AX)
  11794 	MOVW BX, 1(AX)
  11795 	LEAL -60(R9), R9
  11796 	ADDQ $0x03, AX
  11797 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
  11798 
  11799 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
  11800 	MOVL R9, SI
  11801 	SHLL $0x02, SI
  11802 	CMPL R9, $0x0c
  11803 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
  11804 	CMPL BX, $0x00000800
  11805 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
  11806 	LEAL -15(SI), SI
  11807 	MOVB BL, 1(AX)
  11808 	SHRL $0x08, BX
  11809 	SHLL $0x05, BX
  11810 	ORL  BX, SI
  11811 	MOVB SI, (AX)
  11812 	ADDQ $0x02, AX
  11813 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
  11814 
  11815 emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
  11816 	LEAL -2(SI), SI
  11817 	MOVB SI, (AX)
  11818 	MOVW BX, 1(AX)
  11819 	ADDQ $0x03, AX
  11820 
  11821 match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
  11822 	CMPL CX, 8(SP)
  11823 	JAE  emit_remainder_encodeSnappyBlockAsm64K
  11824 	MOVQ -2(DX)(CX*1), SI
  11825 	CMPQ AX, (SP)
  11826 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm64K
  11827 	MOVQ $0x00000000, ret+48(FP)
  11828 	RET
  11829 
  11830 match_nolit_dst_ok_encodeSnappyBlockAsm64K:
  11831 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  11832 	MOVQ  SI, DI
  11833 	SHRQ  $0x10, SI
  11834 	MOVQ  SI, BX
  11835 	SHLQ  $0x10, DI
  11836 	IMULQ R8, DI
  11837 	SHRQ  $0x32, DI
  11838 	SHLQ  $0x10, BX
  11839 	IMULQ R8, BX
  11840 	SHRQ  $0x32, BX
  11841 	LEAL  -2(CX), R8
  11842 	LEAQ  24(SP)(BX*4), R9
  11843 	MOVL  (R9), BX
  11844 	MOVL  R8, 24(SP)(DI*4)
  11845 	MOVL  CX, (R9)
  11846 	CMPL  (DX)(BX*1), SI
  11847 	JEQ   match_nolit_loop_encodeSnappyBlockAsm64K
  11848 	INCL  CX
  11849 	JMP   search_loop_encodeSnappyBlockAsm64K
  11850 
  11851 emit_remainder_encodeSnappyBlockAsm64K:
  11852 	MOVQ src_len+32(FP), CX
  11853 	SUBL 12(SP), CX
  11854 	LEAQ 3(AX)(CX*1), CX
  11855 	CMPQ CX, (SP)
  11856 	JB   emit_remainder_ok_encodeSnappyBlockAsm64K
  11857 	MOVQ $0x00000000, ret+48(FP)
  11858 	RET
  11859 
  11860 emit_remainder_ok_encodeSnappyBlockAsm64K:
  11861 	MOVQ src_len+32(FP), CX
  11862 	MOVL 12(SP), BX
  11863 	CMPL BX, CX
  11864 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
  11865 	MOVL CX, SI
  11866 	MOVL CX, 12(SP)
  11867 	LEAQ (DX)(BX*1), CX
  11868 	SUBL BX, SI
  11869 	LEAL -1(SI), DX
  11870 	CMPL DX, $0x3c
  11871 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm64K
  11872 	CMPL DX, $0x00000100
  11873 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm64K
  11874 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm64K
  11875 
  11876 three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
  11877 	MOVB $0xf4, (AX)
  11878 	MOVW DX, 1(AX)
  11879 	ADDQ $0x03, AX
  11880 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
  11881 
  11882 two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
  11883 	MOVB $0xf0, (AX)
  11884 	MOVB DL, 1(AX)
  11885 	ADDQ $0x02, AX
  11886 	CMPL DX, $0x40
  11887 	JB   memmove_emit_remainder_encodeSnappyBlockAsm64K
  11888 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
  11889 
  11890 one_byte_emit_remainder_encodeSnappyBlockAsm64K:
  11891 	SHLB $0x02, DL
  11892 	MOVB DL, (AX)
  11893 	ADDQ $0x01, AX
  11894 
  11895 memmove_emit_remainder_encodeSnappyBlockAsm64K:
  11896 	LEAQ (AX)(SI*1), DX
  11897 	MOVL SI, BX
  11898 
  11899 	// genMemMoveShort
  11900 	CMPQ BX, $0x03
  11901 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2
  11902 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3
  11903 	CMPQ BX, $0x08
  11904 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7
  11905 	CMPQ BX, $0x10
  11906 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
  11907 	CMPQ BX, $0x20
  11908 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
  11909 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
  11910 
  11911 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
  11912 	MOVB (CX), SI
  11913 	MOVB -1(CX)(BX*1), CL
  11914 	MOVB SI, (AX)
  11915 	MOVB CL, -1(AX)(BX*1)
  11916 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11917 
  11918 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
  11919 	MOVW (CX), SI
  11920 	MOVB 2(CX), CL
  11921 	MOVW SI, (AX)
  11922 	MOVB CL, 2(AX)
  11923 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11924 
  11925 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
  11926 	MOVL (CX), SI
  11927 	MOVL -4(CX)(BX*1), CX
  11928 	MOVL SI, (AX)
  11929 	MOVL CX, -4(AX)(BX*1)
  11930 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11931 
  11932 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
  11933 	MOVQ (CX), SI
  11934 	MOVQ -8(CX)(BX*1), CX
  11935 	MOVQ SI, (AX)
  11936 	MOVQ CX, -8(AX)(BX*1)
  11937 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11938 
  11939 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
  11940 	MOVOU (CX), X0
  11941 	MOVOU -16(CX)(BX*1), X1
  11942 	MOVOU X0, (AX)
  11943 	MOVOU X1, -16(AX)(BX*1)
  11944 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11945 
  11946 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
  11947 	MOVOU (CX), X0
  11948 	MOVOU 16(CX), X1
  11949 	MOVOU -32(CX)(BX*1), X2
  11950 	MOVOU -16(CX)(BX*1), X3
  11951 	MOVOU X0, (AX)
  11952 	MOVOU X1, 16(AX)
  11953 	MOVOU X2, -32(AX)(BX*1)
  11954 	MOVOU X3, -16(AX)(BX*1)
  11955 
  11956 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
  11957 	MOVQ DX, AX
  11958 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
  11959 
  11960 memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
  11961 	LEAQ (AX)(SI*1), DX
  11962 	MOVL SI, BX
  11963 
  11964 	// genMemMoveLong
  11965 	MOVOU (CX), X0
  11966 	MOVOU 16(CX), X1
  11967 	MOVOU -32(CX)(BX*1), X2
  11968 	MOVOU -16(CX)(BX*1), X3
  11969 	MOVQ  BX, DI
  11970 	SHRQ  $0x05, DI
  11971 	MOVQ  AX, SI
  11972 	ANDL  $0x0000001f, SI
  11973 	MOVQ  $0x00000040, R8
  11974 	SUBQ  SI, R8
  11975 	DECQ  DI
  11976 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11977 	LEAQ  -32(CX)(R8*1), SI
  11978 	LEAQ  -32(AX)(R8*1), R9
  11979 
  11980 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
  11981 	MOVOU (SI), X4
  11982 	MOVOU 16(SI), X5
  11983 	MOVOA X4, (R9)
  11984 	MOVOA X5, 16(R9)
  11985 	ADDQ  $0x20, R9
  11986 	ADDQ  $0x20, SI
  11987 	ADDQ  $0x20, R8
  11988 	DECQ  DI
  11989 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
  11990 
  11991 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
  11992 	MOVOU -32(CX)(R8*1), X4
  11993 	MOVOU -16(CX)(R8*1), X5
  11994 	MOVOA X4, -32(AX)(R8*1)
  11995 	MOVOA X5, -16(AX)(R8*1)
  11996 	ADDQ  $0x20, R8
  11997 	CMPQ  BX, R8
  11998 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11999 	MOVOU X0, (AX)
  12000 	MOVOU X1, 16(AX)
  12001 	MOVOU X2, -32(AX)(BX*1)
  12002 	MOVOU X3, -16(AX)(BX*1)
  12003 	MOVQ  DX, AX
  12004 
  12005 emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
  12006 	MOVQ dst_base+0(FP), CX
  12007 	SUBQ CX, AX
  12008 	MOVQ AX, ret+48(FP)
  12009 	RET
  12010 
  12011 // func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
  12012 // Requires: BMI, SSE2
  12013 TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
  12014 	MOVQ dst_base+0(FP), AX
  12015 	MOVQ $0x00000080, CX
  12016 	LEAQ 24(SP), DX
  12017 	PXOR X0, X0
  12018 
  12019 zero_loop_encodeSnappyBlockAsm12B:
  12020 	MOVOU X0, (DX)
  12021 	MOVOU X0, 16(DX)
  12022 	MOVOU X0, 32(DX)
  12023 	MOVOU X0, 48(DX)
  12024 	MOVOU X0, 64(DX)
  12025 	MOVOU X0, 80(DX)
  12026 	MOVOU X0, 96(DX)
  12027 	MOVOU X0, 112(DX)
  12028 	ADDQ  $0x80, DX
  12029 	DECQ  CX
  12030 	JNZ   zero_loop_encodeSnappyBlockAsm12B
  12031 	MOVL  $0x00000000, 12(SP)
  12032 	MOVQ  src_len+32(FP), CX
  12033 	LEAQ  -9(CX), DX
  12034 	LEAQ  -8(CX), BX
  12035 	MOVL  BX, 8(SP)
  12036 	SHRQ  $0x05, CX
  12037 	SUBL  CX, DX
  12038 	LEAQ  (AX)(DX*1), DX
  12039 	MOVQ  DX, (SP)
  12040 	MOVL  $0x00000001, CX
  12041 	MOVL  CX, 16(SP)
  12042 	MOVQ  src_base+24(FP), DX
  12043 
  12044 search_loop_encodeSnappyBlockAsm12B:
  12045 	MOVL  CX, BX
  12046 	SUBL  12(SP), BX
  12047 	SHRL  $0x05, BX
  12048 	LEAL  4(CX)(BX*1), BX
  12049 	CMPL  BX, 8(SP)
  12050 	JAE   emit_remainder_encodeSnappyBlockAsm12B
  12051 	MOVQ  (DX)(CX*1), SI
  12052 	MOVL  BX, 20(SP)
  12053 	MOVQ  $0x000000cf1bbcdcbb, R8
  12054 	MOVQ  SI, R9
  12055 	MOVQ  SI, R10
  12056 	SHRQ  $0x08, R10
  12057 	SHLQ  $0x18, R9
  12058 	IMULQ R8, R9
  12059 	SHRQ  $0x34, R9
  12060 	SHLQ  $0x18, R10
  12061 	IMULQ R8, R10
  12062 	SHRQ  $0x34, R10
  12063 	MOVL  24(SP)(R9*4), BX
  12064 	MOVL  24(SP)(R10*4), DI
  12065 	MOVL  CX, 24(SP)(R9*4)
  12066 	LEAL  1(CX), R9
  12067 	MOVL  R9, 24(SP)(R10*4)
  12068 	MOVQ  SI, R9
  12069 	SHRQ  $0x10, R9
  12070 	SHLQ  $0x18, R9
  12071 	IMULQ R8, R9
  12072 	SHRQ  $0x34, R9
  12073 	MOVL  CX, R8
  12074 	SUBL  16(SP), R8
  12075 	MOVL  1(DX)(R8*1), R10
  12076 	MOVQ  SI, R8
  12077 	SHRQ  $0x08, R8
  12078 	CMPL  R8, R10
  12079 	JNE   no_repeat_found_encodeSnappyBlockAsm12B
  12080 	LEAL  1(CX), SI
  12081 	MOVL  12(SP), BX
  12082 	MOVL  SI, DI
  12083 	SUBL  16(SP), DI
  12084 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm12B
  12085 
  12086 repeat_extend_back_loop_encodeSnappyBlockAsm12B:
  12087 	CMPL SI, BX
  12088 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm12B
  12089 	MOVB -1(DX)(DI*1), R8
  12090 	MOVB -1(DX)(SI*1), R9
  12091 	CMPB R8, R9
  12092 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm12B
  12093 	LEAL -1(SI), SI
  12094 	DECL DI
  12095 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm12B
  12096 
  12097 repeat_extend_back_end_encodeSnappyBlockAsm12B:
  12098 	MOVL 12(SP), BX
  12099 	CMPL BX, SI
  12100 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
  12101 	MOVL SI, DI
  12102 	MOVL SI, 12(SP)
  12103 	LEAQ (DX)(BX*1), R8
  12104 	SUBL BX, DI
  12105 	LEAL -1(DI), BX
  12106 	CMPL BX, $0x3c
  12107 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm12B
  12108 	CMPL BX, $0x00000100
  12109 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm12B
  12110 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm12B
  12111 
  12112 three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  12113 	MOVB $0xf4, (AX)
  12114 	MOVW BX, 1(AX)
  12115 	ADDQ $0x03, AX
  12116 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
  12117 
  12118 two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  12119 	MOVB $0xf0, (AX)
  12120 	MOVB BL, 1(AX)
  12121 	ADDQ $0x02, AX
  12122 	CMPL BX, $0x40
  12123 	JB   memmove_repeat_emit_encodeSnappyBlockAsm12B
  12124 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
  12125 
  12126 one_byte_repeat_emit_encodeSnappyBlockAsm12B:
  12127 	SHLB $0x02, BL
  12128 	MOVB BL, (AX)
  12129 	ADDQ $0x01, AX
  12130 
  12131 memmove_repeat_emit_encodeSnappyBlockAsm12B:
  12132 	LEAQ (AX)(DI*1), BX
  12133 
  12134 	// genMemMoveShort
  12135 	CMPQ DI, $0x08
  12136 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
  12137 	CMPQ DI, $0x10
  12138 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
  12139 	CMPQ DI, $0x20
  12140 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
  12141 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
  12142 
  12143 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
  12144 	MOVQ (R8), R9
  12145 	MOVQ R9, (AX)
  12146 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12147 
  12148 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
  12149 	MOVQ (R8), R9
  12150 	MOVQ -8(R8)(DI*1), R8
  12151 	MOVQ R9, (AX)
  12152 	MOVQ R8, -8(AX)(DI*1)
  12153 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12154 
  12155 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
  12156 	MOVOU (R8), X0
  12157 	MOVOU -16(R8)(DI*1), X1
  12158 	MOVOU X0, (AX)
  12159 	MOVOU X1, -16(AX)(DI*1)
  12160 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12161 
  12162 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
  12163 	MOVOU (R8), X0
  12164 	MOVOU 16(R8), X1
  12165 	MOVOU -32(R8)(DI*1), X2
  12166 	MOVOU -16(R8)(DI*1), X3
  12167 	MOVOU X0, (AX)
  12168 	MOVOU X1, 16(AX)
  12169 	MOVOU X2, -32(AX)(DI*1)
  12170 	MOVOU X3, -16(AX)(DI*1)
  12171 
  12172 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
  12173 	MOVQ BX, AX
  12174 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
  12175 
  12176 memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
  12177 	LEAQ (AX)(DI*1), BX
  12178 
  12179 	// genMemMoveLong
  12180 	MOVOU (R8), X0
  12181 	MOVOU 16(R8), X1
  12182 	MOVOU -32(R8)(DI*1), X2
  12183 	MOVOU -16(R8)(DI*1), X3
  12184 	MOVQ  DI, R10
  12185 	SHRQ  $0x05, R10
  12186 	MOVQ  AX, R9
  12187 	ANDL  $0x0000001f, R9
  12188 	MOVQ  $0x00000040, R11
  12189 	SUBQ  R9, R11
  12190 	DECQ  R10
  12191 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  12192 	LEAQ  -32(R8)(R11*1), R9
  12193 	LEAQ  -32(AX)(R11*1), R12
  12194 
  12195 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
  12196 	MOVOU (R9), X4
  12197 	MOVOU 16(R9), X5
  12198 	MOVOA X4, (R12)
  12199 	MOVOA X5, 16(R12)
  12200 	ADDQ  $0x20, R12
  12201 	ADDQ  $0x20, R9
  12202 	ADDQ  $0x20, R11
  12203 	DECQ  R10
  12204 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
  12205 
  12206 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
  12207 	MOVOU -32(R8)(R11*1), X4
  12208 	MOVOU -16(R8)(R11*1), X5
  12209 	MOVOA X4, -32(AX)(R11*1)
  12210 	MOVOA X5, -16(AX)(R11*1)
  12211 	ADDQ  $0x20, R11
  12212 	CMPQ  DI, R11
  12213 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  12214 	MOVOU X0, (AX)
  12215 	MOVOU X1, 16(AX)
  12216 	MOVOU X2, -32(AX)(DI*1)
  12217 	MOVOU X3, -16(AX)(DI*1)
  12218 	MOVQ  BX, AX
  12219 
  12220 emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
  12221 	ADDL $0x05, CX
  12222 	MOVL CX, BX
  12223 	SUBL 16(SP), BX
  12224 	MOVQ src_len+32(FP), DI
  12225 	SUBL CX, DI
  12226 	LEAQ (DX)(CX*1), R8
  12227 	LEAQ (DX)(BX*1), BX
  12228 
  12229 	// matchLen
  12230 	XORL R10, R10
  12231 	CMPL DI, $0x08
  12232 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
  12233 
  12234 matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B:
  12235 	MOVQ  (R8)(R10*1), R9
  12236 	XORQ  (BX)(R10*1), R9
  12237 	TESTQ R9, R9
  12238 	JZ    matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B
  12239 
  12240 #ifdef GOAMD64_v3
  12241 	TZCNTQ R9, R9
  12242 
  12243 #else
  12244 	BSFQ R9, R9
  12245 
  12246 #endif
  12247 	SARQ $0x03, R9
  12248 	LEAL (R10)(R9*1), R10
  12249 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12250 
  12251 matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B:
  12252 	LEAL -8(DI), DI
  12253 	LEAL 8(R10), R10
  12254 	CMPL DI, $0x08
  12255 	JAE  matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B
  12256 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12257 
  12258 matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
  12259 	CMPL DI, $0x04
  12260 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
  12261 	MOVL (R8)(R10*1), R9
  12262 	CMPL (BX)(R10*1), R9
  12263 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
  12264 	SUBL $0x04, DI
  12265 	LEAL 4(R10), R10
  12266 
  12267 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
  12268 	CMPL DI, $0x02
  12269 	JB   matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
  12270 	MOVW (R8)(R10*1), R9
  12271 	CMPW (BX)(R10*1), R9
  12272 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
  12273 	SUBL $0x02, DI
  12274 	LEAL 2(R10), R10
  12275 
  12276 matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
  12277 	CMPL DI, $0x01
  12278 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12279 	MOVB (R8)(R10*1), R9
  12280 	CMPB (BX)(R10*1), R9
  12281 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12282 	LEAL 1(R10), R10
  12283 
  12284 repeat_extend_forward_end_encodeSnappyBlockAsm12B:
  12285 	ADDL R10, CX
  12286 	MOVL CX, BX
  12287 	SUBL SI, BX
  12288 	MOVL 16(SP), SI
  12289 
  12290 	// emitCopy
  12291 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
  12292 	CMPL BX, $0x40
  12293 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
  12294 	MOVB $0xee, (AX)
  12295 	MOVW SI, 1(AX)
  12296 	LEAL -60(BX), BX
  12297 	ADDQ $0x03, AX
  12298 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
  12299 
  12300 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
  12301 	MOVL BX, DI
  12302 	SHLL $0x02, DI
  12303 	CMPL BX, $0x0c
  12304 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
  12305 	CMPL SI, $0x00000800
  12306 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
  12307 	LEAL -15(DI), DI
  12308 	MOVB SI, 1(AX)
  12309 	SHRL $0x08, SI
  12310 	SHLL $0x05, SI
  12311 	ORL  SI, DI
  12312 	MOVB DI, (AX)
  12313 	ADDQ $0x02, AX
  12314 	JMP  repeat_end_emit_encodeSnappyBlockAsm12B
  12315 
  12316 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
  12317 	LEAL -2(DI), DI
  12318 	MOVB DI, (AX)
  12319 	MOVW SI, 1(AX)
  12320 	ADDQ $0x03, AX
  12321 
  12322 repeat_end_emit_encodeSnappyBlockAsm12B:
  12323 	MOVL CX, 12(SP)
  12324 	JMP  search_loop_encodeSnappyBlockAsm12B
  12325 
  12326 no_repeat_found_encodeSnappyBlockAsm12B:
  12327 	CMPL (DX)(BX*1), SI
  12328 	JEQ  candidate_match_encodeSnappyBlockAsm12B
  12329 	SHRQ $0x08, SI
  12330 	MOVL 24(SP)(R9*4), BX
  12331 	LEAL 2(CX), R8
  12332 	CMPL (DX)(DI*1), SI
  12333 	JEQ  candidate2_match_encodeSnappyBlockAsm12B
  12334 	MOVL R8, 24(SP)(R9*4)
  12335 	SHRQ $0x08, SI
  12336 	CMPL (DX)(BX*1), SI
  12337 	JEQ  candidate3_match_encodeSnappyBlockAsm12B
  12338 	MOVL 20(SP), CX
  12339 	JMP  search_loop_encodeSnappyBlockAsm12B
  12340 
  12341 candidate3_match_encodeSnappyBlockAsm12B:
  12342 	ADDL $0x02, CX
  12343 	JMP  candidate_match_encodeSnappyBlockAsm12B
  12344 
  12345 candidate2_match_encodeSnappyBlockAsm12B:
  12346 	MOVL R8, 24(SP)(R9*4)
  12347 	INCL CX
  12348 	MOVL DI, BX
  12349 
  12350 candidate_match_encodeSnappyBlockAsm12B:
  12351 	MOVL  12(SP), SI
  12352 	TESTL BX, BX
  12353 	JZ    match_extend_back_end_encodeSnappyBlockAsm12B
  12354 
  12355 match_extend_back_loop_encodeSnappyBlockAsm12B:
  12356 	CMPL CX, SI
  12357 	JBE  match_extend_back_end_encodeSnappyBlockAsm12B
  12358 	MOVB -1(DX)(BX*1), DI
  12359 	MOVB -1(DX)(CX*1), R8
  12360 	CMPB DI, R8
  12361 	JNE  match_extend_back_end_encodeSnappyBlockAsm12B
  12362 	LEAL -1(CX), CX
  12363 	DECL BX
  12364 	JZ   match_extend_back_end_encodeSnappyBlockAsm12B
  12365 	JMP  match_extend_back_loop_encodeSnappyBlockAsm12B
  12366 
  12367 match_extend_back_end_encodeSnappyBlockAsm12B:
  12368 	MOVL CX, SI
  12369 	SUBL 12(SP), SI
  12370 	LEAQ 3(AX)(SI*1), SI
  12371 	CMPQ SI, (SP)
  12372 	JB   match_dst_size_check_encodeSnappyBlockAsm12B
  12373 	MOVQ $0x00000000, ret+48(FP)
  12374 	RET
  12375 
  12376 match_dst_size_check_encodeSnappyBlockAsm12B:
  12377 	MOVL CX, SI
  12378 	MOVL 12(SP), DI
  12379 	CMPL DI, SI
  12380 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
  12381 	MOVL SI, R8
  12382 	MOVL SI, 12(SP)
  12383 	LEAQ (DX)(DI*1), SI
  12384 	SUBL DI, R8
  12385 	LEAL -1(R8), DI
  12386 	CMPL DI, $0x3c
  12387 	JB   one_byte_match_emit_encodeSnappyBlockAsm12B
  12388 	CMPL DI, $0x00000100
  12389 	JB   two_bytes_match_emit_encodeSnappyBlockAsm12B
  12390 	JB   three_bytes_match_emit_encodeSnappyBlockAsm12B
  12391 
  12392 three_bytes_match_emit_encodeSnappyBlockAsm12B:
  12393 	MOVB $0xf4, (AX)
  12394 	MOVW DI, 1(AX)
  12395 	ADDQ $0x03, AX
  12396 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
  12397 
  12398 two_bytes_match_emit_encodeSnappyBlockAsm12B:
  12399 	MOVB $0xf0, (AX)
  12400 	MOVB DI, 1(AX)
  12401 	ADDQ $0x02, AX
  12402 	CMPL DI, $0x40
  12403 	JB   memmove_match_emit_encodeSnappyBlockAsm12B
  12404 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
  12405 
  12406 one_byte_match_emit_encodeSnappyBlockAsm12B:
  12407 	SHLB $0x02, DI
  12408 	MOVB DI, (AX)
  12409 	ADDQ $0x01, AX
  12410 
  12411 memmove_match_emit_encodeSnappyBlockAsm12B:
  12412 	LEAQ (AX)(R8*1), DI
  12413 
  12414 	// genMemMoveShort
  12415 	CMPQ R8, $0x08
  12416 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
  12417 	CMPQ R8, $0x10
  12418 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
  12419 	CMPQ R8, $0x20
  12420 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
  12421 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
  12422 
  12423 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
  12424 	MOVQ (SI), R9
  12425 	MOVQ R9, (AX)
  12426 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12427 
  12428 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
  12429 	MOVQ (SI), R9
  12430 	MOVQ -8(SI)(R8*1), SI
  12431 	MOVQ R9, (AX)
  12432 	MOVQ SI, -8(AX)(R8*1)
  12433 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12434 
  12435 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
  12436 	MOVOU (SI), X0
  12437 	MOVOU -16(SI)(R8*1), X1
  12438 	MOVOU X0, (AX)
  12439 	MOVOU X1, -16(AX)(R8*1)
  12440 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12441 
  12442 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
  12443 	MOVOU (SI), X0
  12444 	MOVOU 16(SI), X1
  12445 	MOVOU -32(SI)(R8*1), X2
  12446 	MOVOU -16(SI)(R8*1), X3
  12447 	MOVOU X0, (AX)
  12448 	MOVOU X1, 16(AX)
  12449 	MOVOU X2, -32(AX)(R8*1)
  12450 	MOVOU X3, -16(AX)(R8*1)
  12451 
  12452 memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
  12453 	MOVQ DI, AX
  12454 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
  12455 
  12456 memmove_long_match_emit_encodeSnappyBlockAsm12B:
  12457 	LEAQ (AX)(R8*1), DI
  12458 
  12459 	// genMemMoveLong
  12460 	MOVOU (SI), X0
  12461 	MOVOU 16(SI), X1
  12462 	MOVOU -32(SI)(R8*1), X2
  12463 	MOVOU -16(SI)(R8*1), X3
  12464 	MOVQ  R8, R10
  12465 	SHRQ  $0x05, R10
  12466 	MOVQ  AX, R9
  12467 	ANDL  $0x0000001f, R9
  12468 	MOVQ  $0x00000040, R11
  12469 	SUBQ  R9, R11
  12470 	DECQ  R10
  12471 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  12472 	LEAQ  -32(SI)(R11*1), R9
  12473 	LEAQ  -32(AX)(R11*1), R12
  12474 
  12475 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
  12476 	MOVOU (R9), X4
  12477 	MOVOU 16(R9), X5
  12478 	MOVOA X4, (R12)
  12479 	MOVOA X5, 16(R12)
  12480 	ADDQ  $0x20, R12
  12481 	ADDQ  $0x20, R9
  12482 	ADDQ  $0x20, R11
  12483 	DECQ  R10
  12484 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
  12485 
  12486 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
  12487 	MOVOU -32(SI)(R11*1), X4
  12488 	MOVOU -16(SI)(R11*1), X5
  12489 	MOVOA X4, -32(AX)(R11*1)
  12490 	MOVOA X5, -16(AX)(R11*1)
  12491 	ADDQ  $0x20, R11
  12492 	CMPQ  R8, R11
  12493 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  12494 	MOVOU X0, (AX)
  12495 	MOVOU X1, 16(AX)
  12496 	MOVOU X2, -32(AX)(R8*1)
  12497 	MOVOU X3, -16(AX)(R8*1)
  12498 	MOVQ  DI, AX
  12499 
  12500 emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
  12501 match_nolit_loop_encodeSnappyBlockAsm12B:
  12502 	MOVL CX, SI
  12503 	SUBL BX, SI
  12504 	MOVL SI, 16(SP)
  12505 	ADDL $0x04, CX
  12506 	ADDL $0x04, BX
  12507 	MOVQ src_len+32(FP), SI
  12508 	SUBL CX, SI
  12509 	LEAQ (DX)(CX*1), DI
  12510 	LEAQ (DX)(BX*1), BX
  12511 
  12512 	// matchLen
  12513 	XORL R9, R9
  12514 	CMPL SI, $0x08
  12515 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
  12516 
  12517 matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
  12518 	MOVQ  (DI)(R9*1), R8
  12519 	XORQ  (BX)(R9*1), R8
  12520 	TESTQ R8, R8
  12521 	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm12B
  12522 
  12523 #ifdef GOAMD64_v3
  12524 	TZCNTQ R8, R8
  12525 
  12526 #else
  12527 	BSFQ R8, R8
  12528 
  12529 #endif
  12530 	SARQ $0x03, R8
  12531 	LEAL (R9)(R8*1), R9
  12532 	JMP  match_nolit_end_encodeSnappyBlockAsm12B
  12533 
  12534 matchlen_loop_match_nolit_encodeSnappyBlockAsm12B:
  12535 	LEAL -8(SI), SI
  12536 	LEAL 8(R9), R9
  12537 	CMPL SI, $0x08
  12538 	JAE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B
  12539 	JZ   match_nolit_end_encodeSnappyBlockAsm12B
  12540 
  12541 matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
  12542 	CMPL SI, $0x04
  12543 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
  12544 	MOVL (DI)(R9*1), R8
  12545 	CMPL (BX)(R9*1), R8
  12546 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
  12547 	SUBL $0x04, SI
  12548 	LEAL 4(R9), R9
  12549 
  12550 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
  12551 	CMPL SI, $0x02
  12552 	JB   matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
  12553 	MOVW (DI)(R9*1), R8
  12554 	CMPW (BX)(R9*1), R8
  12555 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
  12556 	SUBL $0x02, SI
  12557 	LEAL 2(R9), R9
  12558 
  12559 matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
  12560 	CMPL SI, $0x01
  12561 	JB   match_nolit_end_encodeSnappyBlockAsm12B
  12562 	MOVB (DI)(R9*1), R8
  12563 	CMPB (BX)(R9*1), R8
  12564 	JNE  match_nolit_end_encodeSnappyBlockAsm12B
  12565 	LEAL 1(R9), R9
  12566 
  12567 match_nolit_end_encodeSnappyBlockAsm12B:
  12568 	ADDL R9, CX
  12569 	MOVL 16(SP), BX
  12570 	ADDL $0x04, R9
  12571 	MOVL CX, 12(SP)
  12572 
  12573 	// emitCopy
  12574 two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
  12575 	CMPL R9, $0x40
  12576 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
  12577 	MOVB $0xee, (AX)
  12578 	MOVW BX, 1(AX)
  12579 	LEAL -60(R9), R9
  12580 	ADDQ $0x03, AX
  12581 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
  12582 
  12583 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
  12584 	MOVL R9, SI
  12585 	SHLL $0x02, SI
  12586 	CMPL R9, $0x0c
  12587 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
  12588 	CMPL BX, $0x00000800
  12589 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
  12590 	LEAL -15(SI), SI
  12591 	MOVB BL, 1(AX)
  12592 	SHRL $0x08, BX
  12593 	SHLL $0x05, BX
  12594 	ORL  BX, SI
  12595 	MOVB SI, (AX)
  12596 	ADDQ $0x02, AX
  12597 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
  12598 
  12599 emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
  12600 	LEAL -2(SI), SI
  12601 	MOVB SI, (AX)
  12602 	MOVW BX, 1(AX)
  12603 	ADDQ $0x03, AX
  12604 
  12605 match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
  12606 	CMPL CX, 8(SP)
  12607 	JAE  emit_remainder_encodeSnappyBlockAsm12B
  12608 	MOVQ -2(DX)(CX*1), SI
  12609 	CMPQ AX, (SP)
  12610 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm12B
  12611 	MOVQ $0x00000000, ret+48(FP)
  12612 	RET
  12613 
  12614 match_nolit_dst_ok_encodeSnappyBlockAsm12B:
  12615 	MOVQ  $0x000000cf1bbcdcbb, R8
  12616 	MOVQ  SI, DI
  12617 	SHRQ  $0x10, SI
  12618 	MOVQ  SI, BX
  12619 	SHLQ  $0x18, DI
  12620 	IMULQ R8, DI
  12621 	SHRQ  $0x34, DI
  12622 	SHLQ  $0x18, BX
  12623 	IMULQ R8, BX
  12624 	SHRQ  $0x34, BX
  12625 	LEAL  -2(CX), R8
  12626 	LEAQ  24(SP)(BX*4), R9
  12627 	MOVL  (R9), BX
  12628 	MOVL  R8, 24(SP)(DI*4)
  12629 	MOVL  CX, (R9)
  12630 	CMPL  (DX)(BX*1), SI
  12631 	JEQ   match_nolit_loop_encodeSnappyBlockAsm12B
  12632 	INCL  CX
  12633 	JMP   search_loop_encodeSnappyBlockAsm12B
  12634 
  12635 emit_remainder_encodeSnappyBlockAsm12B:
  12636 	MOVQ src_len+32(FP), CX
  12637 	SUBL 12(SP), CX
  12638 	LEAQ 3(AX)(CX*1), CX
  12639 	CMPQ CX, (SP)
  12640 	JB   emit_remainder_ok_encodeSnappyBlockAsm12B
  12641 	MOVQ $0x00000000, ret+48(FP)
  12642 	RET
  12643 
  12644 emit_remainder_ok_encodeSnappyBlockAsm12B:
  12645 	MOVQ src_len+32(FP), CX
  12646 	MOVL 12(SP), BX
  12647 	CMPL BX, CX
  12648 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
  12649 	MOVL CX, SI
  12650 	MOVL CX, 12(SP)
  12651 	LEAQ (DX)(BX*1), CX
  12652 	SUBL BX, SI
  12653 	LEAL -1(SI), DX
  12654 	CMPL DX, $0x3c
  12655 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm12B
  12656 	CMPL DX, $0x00000100
  12657 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm12B
  12658 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm12B
  12659 
  12660 three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  12661 	MOVB $0xf4, (AX)
  12662 	MOVW DX, 1(AX)
  12663 	ADDQ $0x03, AX
  12664 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
  12665 
  12666 two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  12667 	MOVB $0xf0, (AX)
  12668 	MOVB DL, 1(AX)
  12669 	ADDQ $0x02, AX
  12670 	CMPL DX, $0x40
  12671 	JB   memmove_emit_remainder_encodeSnappyBlockAsm12B
  12672 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
  12673 
  12674 one_byte_emit_remainder_encodeSnappyBlockAsm12B:
  12675 	SHLB $0x02, DL
  12676 	MOVB DL, (AX)
  12677 	ADDQ $0x01, AX
  12678 
  12679 memmove_emit_remainder_encodeSnappyBlockAsm12B:
  12680 	LEAQ (AX)(SI*1), DX
  12681 	MOVL SI, BX
  12682 
  12683 	// genMemMoveShort
  12684 	CMPQ BX, $0x03
  12685 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
  12686 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
  12687 	CMPQ BX, $0x08
  12688 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7
  12689 	CMPQ BX, $0x10
  12690 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
  12691 	CMPQ BX, $0x20
  12692 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
  12693 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
  12694 
  12695 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
  12696 	MOVB (CX), SI
  12697 	MOVB -1(CX)(BX*1), CL
  12698 	MOVB SI, (AX)
  12699 	MOVB CL, -1(AX)(BX*1)
  12700 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12701 
  12702 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
  12703 	MOVW (CX), SI
  12704 	MOVB 2(CX), CL
  12705 	MOVW SI, (AX)
  12706 	MOVB CL, 2(AX)
  12707 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12708 
  12709 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
  12710 	MOVL (CX), SI
  12711 	MOVL -4(CX)(BX*1), CX
  12712 	MOVL SI, (AX)
  12713 	MOVL CX, -4(AX)(BX*1)
  12714 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12715 
  12716 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
  12717 	MOVQ (CX), SI
  12718 	MOVQ -8(CX)(BX*1), CX
  12719 	MOVQ SI, (AX)
  12720 	MOVQ CX, -8(AX)(BX*1)
  12721 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12722 
  12723 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
  12724 	MOVOU (CX), X0
  12725 	MOVOU -16(CX)(BX*1), X1
  12726 	MOVOU X0, (AX)
  12727 	MOVOU X1, -16(AX)(BX*1)
  12728 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12729 
  12730 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
  12731 	MOVOU (CX), X0
  12732 	MOVOU 16(CX), X1
  12733 	MOVOU -32(CX)(BX*1), X2
  12734 	MOVOU -16(CX)(BX*1), X3
  12735 	MOVOU X0, (AX)
  12736 	MOVOU X1, 16(AX)
  12737 	MOVOU X2, -32(AX)(BX*1)
  12738 	MOVOU X3, -16(AX)(BX*1)
  12739 
  12740 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
  12741 	MOVQ DX, AX
  12742 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
  12743 
  12744 memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
  12745 	LEAQ (AX)(SI*1), DX
  12746 	MOVL SI, BX
  12747 
  12748 	// genMemMoveLong
  12749 	MOVOU (CX), X0
  12750 	MOVOU 16(CX), X1
  12751 	MOVOU -32(CX)(BX*1), X2
  12752 	MOVOU -16(CX)(BX*1), X3
  12753 	MOVQ  BX, DI
  12754 	SHRQ  $0x05, DI
  12755 	MOVQ  AX, SI
  12756 	ANDL  $0x0000001f, SI
  12757 	MOVQ  $0x00000040, R8
  12758 	SUBQ  SI, R8
  12759 	DECQ  DI
  12760 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  12761 	LEAQ  -32(CX)(R8*1), SI
  12762 	LEAQ  -32(AX)(R8*1), R9
  12763 
  12764 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
  12765 	MOVOU (SI), X4
  12766 	MOVOU 16(SI), X5
  12767 	MOVOA X4, (R9)
  12768 	MOVOA X5, 16(R9)
  12769 	ADDQ  $0x20, R9
  12770 	ADDQ  $0x20, SI
  12771 	ADDQ  $0x20, R8
  12772 	DECQ  DI
  12773 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
  12774 
  12775 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
  12776 	MOVOU -32(CX)(R8*1), X4
  12777 	MOVOU -16(CX)(R8*1), X5
  12778 	MOVOA X4, -32(AX)(R8*1)
  12779 	MOVOA X5, -16(AX)(R8*1)
  12780 	ADDQ  $0x20, R8
  12781 	CMPQ  BX, R8
  12782 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  12783 	MOVOU X0, (AX)
  12784 	MOVOU X1, 16(AX)
  12785 	MOVOU X2, -32(AX)(BX*1)
  12786 	MOVOU X3, -16(AX)(BX*1)
  12787 	MOVQ  DX, AX
  12788 
  12789 emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
  12790 	MOVQ dst_base+0(FP), CX
  12791 	SUBQ CX, AX
  12792 	MOVQ AX, ret+48(FP)
  12793 	RET
  12794 
  12795 // func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
  12796 // Requires: BMI, SSE2
  12797 TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
  12798 	MOVQ dst_base+0(FP), AX
  12799 	MOVQ $0x00000020, CX
  12800 	LEAQ 24(SP), DX
  12801 	PXOR X0, X0
  12802 
  12803 zero_loop_encodeSnappyBlockAsm10B:
  12804 	MOVOU X0, (DX)
  12805 	MOVOU X0, 16(DX)
  12806 	MOVOU X0, 32(DX)
  12807 	MOVOU X0, 48(DX)
  12808 	MOVOU X0, 64(DX)
  12809 	MOVOU X0, 80(DX)
  12810 	MOVOU X0, 96(DX)
  12811 	MOVOU X0, 112(DX)
  12812 	ADDQ  $0x80, DX
  12813 	DECQ  CX
  12814 	JNZ   zero_loop_encodeSnappyBlockAsm10B
  12815 	MOVL  $0x00000000, 12(SP)
  12816 	MOVQ  src_len+32(FP), CX
  12817 	LEAQ  -9(CX), DX
  12818 	LEAQ  -8(CX), BX
  12819 	MOVL  BX, 8(SP)
  12820 	SHRQ  $0x05, CX
  12821 	SUBL  CX, DX
  12822 	LEAQ  (AX)(DX*1), DX
  12823 	MOVQ  DX, (SP)
  12824 	MOVL  $0x00000001, CX
  12825 	MOVL  CX, 16(SP)
  12826 	MOVQ  src_base+24(FP), DX
  12827 
  12828 search_loop_encodeSnappyBlockAsm10B:
  12829 	MOVL  CX, BX
  12830 	SUBL  12(SP), BX
  12831 	SHRL  $0x05, BX
  12832 	LEAL  4(CX)(BX*1), BX
  12833 	CMPL  BX, 8(SP)
  12834 	JAE   emit_remainder_encodeSnappyBlockAsm10B
  12835 	MOVQ  (DX)(CX*1), SI
  12836 	MOVL  BX, 20(SP)
  12837 	MOVQ  $0x9e3779b1, R8
  12838 	MOVQ  SI, R9
  12839 	MOVQ  SI, R10
  12840 	SHRQ  $0x08, R10
  12841 	SHLQ  $0x20, R9
  12842 	IMULQ R8, R9
  12843 	SHRQ  $0x36, R9
  12844 	SHLQ  $0x20, R10
  12845 	IMULQ R8, R10
  12846 	SHRQ  $0x36, R10
  12847 	MOVL  24(SP)(R9*4), BX
  12848 	MOVL  24(SP)(R10*4), DI
  12849 	MOVL  CX, 24(SP)(R9*4)
  12850 	LEAL  1(CX), R9
  12851 	MOVL  R9, 24(SP)(R10*4)
  12852 	MOVQ  SI, R9
  12853 	SHRQ  $0x10, R9
  12854 	SHLQ  $0x20, R9
  12855 	IMULQ R8, R9
  12856 	SHRQ  $0x36, R9
  12857 	MOVL  CX, R8
  12858 	SUBL  16(SP), R8
  12859 	MOVL  1(DX)(R8*1), R10
  12860 	MOVQ  SI, R8
  12861 	SHRQ  $0x08, R8
  12862 	CMPL  R8, R10
  12863 	JNE   no_repeat_found_encodeSnappyBlockAsm10B
  12864 	LEAL  1(CX), SI
  12865 	MOVL  12(SP), BX
  12866 	MOVL  SI, DI
  12867 	SUBL  16(SP), DI
  12868 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm10B
  12869 
  12870 repeat_extend_back_loop_encodeSnappyBlockAsm10B:
  12871 	CMPL SI, BX
  12872 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm10B
  12873 	MOVB -1(DX)(DI*1), R8
  12874 	MOVB -1(DX)(SI*1), R9
  12875 	CMPB R8, R9
  12876 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm10B
  12877 	LEAL -1(SI), SI
  12878 	DECL DI
  12879 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm10B
  12880 
  12881 repeat_extend_back_end_encodeSnappyBlockAsm10B:
  12882 	MOVL 12(SP), BX
  12883 	CMPL BX, SI
  12884 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
  12885 	MOVL SI, DI
  12886 	MOVL SI, 12(SP)
  12887 	LEAQ (DX)(BX*1), R8
  12888 	SUBL BX, DI
  12889 	LEAL -1(DI), BX
  12890 	CMPL BX, $0x3c
  12891 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm10B
  12892 	CMPL BX, $0x00000100
  12893 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm10B
  12894 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm10B
  12895 
  12896 three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  12897 	MOVB $0xf4, (AX)
  12898 	MOVW BX, 1(AX)
  12899 	ADDQ $0x03, AX
  12900 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
  12901 
  12902 two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  12903 	MOVB $0xf0, (AX)
  12904 	MOVB BL, 1(AX)
  12905 	ADDQ $0x02, AX
  12906 	CMPL BX, $0x40
  12907 	JB   memmove_repeat_emit_encodeSnappyBlockAsm10B
  12908 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
  12909 
  12910 one_byte_repeat_emit_encodeSnappyBlockAsm10B:
  12911 	SHLB $0x02, BL
  12912 	MOVB BL, (AX)
  12913 	ADDQ $0x01, AX
  12914 
  12915 memmove_repeat_emit_encodeSnappyBlockAsm10B:
  12916 	LEAQ (AX)(DI*1), BX
  12917 
  12918 	// genMemMoveShort
  12919 	CMPQ DI, $0x08
  12920 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
  12921 	CMPQ DI, $0x10
  12922 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
  12923 	CMPQ DI, $0x20
  12924 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
  12925 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
  12926 
  12927 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
  12928 	MOVQ (R8), R9
  12929 	MOVQ R9, (AX)
  12930 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12931 
  12932 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
  12933 	MOVQ (R8), R9
  12934 	MOVQ -8(R8)(DI*1), R8
  12935 	MOVQ R9, (AX)
  12936 	MOVQ R8, -8(AX)(DI*1)
  12937 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12938 
  12939 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
  12940 	MOVOU (R8), X0
  12941 	MOVOU -16(R8)(DI*1), X1
  12942 	MOVOU X0, (AX)
  12943 	MOVOU X1, -16(AX)(DI*1)
  12944 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12945 
  12946 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
  12947 	MOVOU (R8), X0
  12948 	MOVOU 16(R8), X1
  12949 	MOVOU -32(R8)(DI*1), X2
  12950 	MOVOU -16(R8)(DI*1), X3
  12951 	MOVOU X0, (AX)
  12952 	MOVOU X1, 16(AX)
  12953 	MOVOU X2, -32(AX)(DI*1)
  12954 	MOVOU X3, -16(AX)(DI*1)
  12955 
  12956 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
  12957 	MOVQ BX, AX
  12958 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
  12959 
  12960 memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
  12961 	LEAQ (AX)(DI*1), BX
  12962 
  12963 	// genMemMoveLong
  12964 	MOVOU (R8), X0
  12965 	MOVOU 16(R8), X1
  12966 	MOVOU -32(R8)(DI*1), X2
  12967 	MOVOU -16(R8)(DI*1), X3
  12968 	MOVQ  DI, R10
  12969 	SHRQ  $0x05, R10
  12970 	MOVQ  AX, R9
  12971 	ANDL  $0x0000001f, R9
  12972 	MOVQ  $0x00000040, R11
  12973 	SUBQ  R9, R11
  12974 	DECQ  R10
  12975 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12976 	LEAQ  -32(R8)(R11*1), R9
  12977 	LEAQ  -32(AX)(R11*1), R12
  12978 
  12979 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
  12980 	MOVOU (R9), X4
  12981 	MOVOU 16(R9), X5
  12982 	MOVOA X4, (R12)
  12983 	MOVOA X5, 16(R12)
  12984 	ADDQ  $0x20, R12
  12985 	ADDQ  $0x20, R9
  12986 	ADDQ  $0x20, R11
  12987 	DECQ  R10
  12988 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
  12989 
  12990 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
  12991 	MOVOU -32(R8)(R11*1), X4
  12992 	MOVOU -16(R8)(R11*1), X5
  12993 	MOVOA X4, -32(AX)(R11*1)
  12994 	MOVOA X5, -16(AX)(R11*1)
  12995 	ADDQ  $0x20, R11
  12996 	CMPQ  DI, R11
  12997 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12998 	MOVOU X0, (AX)
  12999 	MOVOU X1, 16(AX)
  13000 	MOVOU X2, -32(AX)(DI*1)
  13001 	MOVOU X3, -16(AX)(DI*1)
  13002 	MOVQ  BX, AX
  13003 
  13004 emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
  13005 	ADDL $0x05, CX
  13006 	MOVL CX, BX
  13007 	SUBL 16(SP), BX
  13008 	MOVQ src_len+32(FP), DI
  13009 	SUBL CX, DI
  13010 	LEAQ (DX)(CX*1), R8
  13011 	LEAQ (DX)(BX*1), BX
  13012 
  13013 	// matchLen
  13014 	XORL R10, R10
  13015 	CMPL DI, $0x08
  13016 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
  13017 
  13018 matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B:
  13019 	MOVQ  (R8)(R10*1), R9
  13020 	XORQ  (BX)(R10*1), R9
  13021 	TESTQ R9, R9
  13022 	JZ    matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B
  13023 
  13024 #ifdef GOAMD64_v3
  13025 	TZCNTQ R9, R9
  13026 
  13027 #else
  13028 	BSFQ R9, R9
  13029 
  13030 #endif
  13031 	SARQ $0x03, R9
  13032 	LEAL (R10)(R9*1), R10
  13033 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13034 
  13035 matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B:
  13036 	LEAL -8(DI), DI
  13037 	LEAL 8(R10), R10
  13038 	CMPL DI, $0x08
  13039 	JAE  matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B
  13040 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13041 
  13042 matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
  13043 	CMPL DI, $0x04
  13044 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
  13045 	MOVL (R8)(R10*1), R9
  13046 	CMPL (BX)(R10*1), R9
  13047 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
  13048 	SUBL $0x04, DI
  13049 	LEAL 4(R10), R10
  13050 
  13051 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
  13052 	CMPL DI, $0x02
  13053 	JB   matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
  13054 	MOVW (R8)(R10*1), R9
  13055 	CMPW (BX)(R10*1), R9
  13056 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
  13057 	SUBL $0x02, DI
  13058 	LEAL 2(R10), R10
  13059 
  13060 matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
  13061 	CMPL DI, $0x01
  13062 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13063 	MOVB (R8)(R10*1), R9
  13064 	CMPB (BX)(R10*1), R9
  13065 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13066 	LEAL 1(R10), R10
  13067 
  13068 repeat_extend_forward_end_encodeSnappyBlockAsm10B:
  13069 	ADDL R10, CX
  13070 	MOVL CX, BX
  13071 	SUBL SI, BX
  13072 	MOVL 16(SP), SI
  13073 
  13074 	// emitCopy
  13075 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
  13076 	CMPL BX, $0x40
  13077 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
  13078 	MOVB $0xee, (AX)
  13079 	MOVW SI, 1(AX)
  13080 	LEAL -60(BX), BX
  13081 	ADDQ $0x03, AX
  13082 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
  13083 
  13084 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
  13085 	MOVL BX, DI
  13086 	SHLL $0x02, DI
  13087 	CMPL BX, $0x0c
  13088 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
  13089 	CMPL SI, $0x00000800
  13090 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
  13091 	LEAL -15(DI), DI
  13092 	MOVB SI, 1(AX)
  13093 	SHRL $0x08, SI
  13094 	SHLL $0x05, SI
  13095 	ORL  SI, DI
  13096 	MOVB DI, (AX)
  13097 	ADDQ $0x02, AX
  13098 	JMP  repeat_end_emit_encodeSnappyBlockAsm10B
  13099 
  13100 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
  13101 	LEAL -2(DI), DI
  13102 	MOVB DI, (AX)
  13103 	MOVW SI, 1(AX)
  13104 	ADDQ $0x03, AX
  13105 
  13106 repeat_end_emit_encodeSnappyBlockAsm10B:
  13107 	MOVL CX, 12(SP)
  13108 	JMP  search_loop_encodeSnappyBlockAsm10B
  13109 
  13110 no_repeat_found_encodeSnappyBlockAsm10B:
  13111 	CMPL (DX)(BX*1), SI
  13112 	JEQ  candidate_match_encodeSnappyBlockAsm10B
  13113 	SHRQ $0x08, SI
  13114 	MOVL 24(SP)(R9*4), BX
  13115 	LEAL 2(CX), R8
  13116 	CMPL (DX)(DI*1), SI
  13117 	JEQ  candidate2_match_encodeSnappyBlockAsm10B
  13118 	MOVL R8, 24(SP)(R9*4)
  13119 	SHRQ $0x08, SI
  13120 	CMPL (DX)(BX*1), SI
  13121 	JEQ  candidate3_match_encodeSnappyBlockAsm10B
  13122 	MOVL 20(SP), CX
  13123 	JMP  search_loop_encodeSnappyBlockAsm10B
  13124 
  13125 candidate3_match_encodeSnappyBlockAsm10B:
  13126 	ADDL $0x02, CX
  13127 	JMP  candidate_match_encodeSnappyBlockAsm10B
  13128 
  13129 candidate2_match_encodeSnappyBlockAsm10B:
  13130 	MOVL R8, 24(SP)(R9*4)
  13131 	INCL CX
  13132 	MOVL DI, BX
  13133 
  13134 candidate_match_encodeSnappyBlockAsm10B:
  13135 	MOVL  12(SP), SI
  13136 	TESTL BX, BX
  13137 	JZ    match_extend_back_end_encodeSnappyBlockAsm10B
  13138 
  13139 match_extend_back_loop_encodeSnappyBlockAsm10B:
  13140 	CMPL CX, SI
  13141 	JBE  match_extend_back_end_encodeSnappyBlockAsm10B
  13142 	MOVB -1(DX)(BX*1), DI
  13143 	MOVB -1(DX)(CX*1), R8
  13144 	CMPB DI, R8
  13145 	JNE  match_extend_back_end_encodeSnappyBlockAsm10B
  13146 	LEAL -1(CX), CX
  13147 	DECL BX
  13148 	JZ   match_extend_back_end_encodeSnappyBlockAsm10B
  13149 	JMP  match_extend_back_loop_encodeSnappyBlockAsm10B
  13150 
  13151 match_extend_back_end_encodeSnappyBlockAsm10B:
  13152 	MOVL CX, SI
  13153 	SUBL 12(SP), SI
  13154 	LEAQ 3(AX)(SI*1), SI
  13155 	CMPQ SI, (SP)
  13156 	JB   match_dst_size_check_encodeSnappyBlockAsm10B
  13157 	MOVQ $0x00000000, ret+48(FP)
  13158 	RET
  13159 
  13160 match_dst_size_check_encodeSnappyBlockAsm10B:
  13161 	MOVL CX, SI
  13162 	MOVL 12(SP), DI
  13163 	CMPL DI, SI
  13164 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
  13165 	MOVL SI, R8
  13166 	MOVL SI, 12(SP)
  13167 	LEAQ (DX)(DI*1), SI
  13168 	SUBL DI, R8
  13169 	LEAL -1(R8), DI
  13170 	CMPL DI, $0x3c
  13171 	JB   one_byte_match_emit_encodeSnappyBlockAsm10B
  13172 	CMPL DI, $0x00000100
  13173 	JB   two_bytes_match_emit_encodeSnappyBlockAsm10B
  13174 	JB   three_bytes_match_emit_encodeSnappyBlockAsm10B
  13175 
  13176 three_bytes_match_emit_encodeSnappyBlockAsm10B:
  13177 	MOVB $0xf4, (AX)
  13178 	MOVW DI, 1(AX)
  13179 	ADDQ $0x03, AX
  13180 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
  13181 
  13182 two_bytes_match_emit_encodeSnappyBlockAsm10B:
  13183 	MOVB $0xf0, (AX)
  13184 	MOVB DI, 1(AX)
  13185 	ADDQ $0x02, AX
  13186 	CMPL DI, $0x40
  13187 	JB   memmove_match_emit_encodeSnappyBlockAsm10B
  13188 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
  13189 
  13190 one_byte_match_emit_encodeSnappyBlockAsm10B:
  13191 	SHLB $0x02, DI
  13192 	MOVB DI, (AX)
  13193 	ADDQ $0x01, AX
  13194 
  13195 memmove_match_emit_encodeSnappyBlockAsm10B:
  13196 	LEAQ (AX)(R8*1), DI
  13197 
  13198 	// genMemMoveShort
  13199 	CMPQ R8, $0x08
  13200 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
  13201 	CMPQ R8, $0x10
  13202 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
  13203 	CMPQ R8, $0x20
  13204 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
  13205 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
  13206 
  13207 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
  13208 	MOVQ (SI), R9
  13209 	MOVQ R9, (AX)
  13210 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13211 
  13212 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
  13213 	MOVQ (SI), R9
  13214 	MOVQ -8(SI)(R8*1), SI
  13215 	MOVQ R9, (AX)
  13216 	MOVQ SI, -8(AX)(R8*1)
  13217 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13218 
  13219 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
  13220 	MOVOU (SI), X0
  13221 	MOVOU -16(SI)(R8*1), X1
  13222 	MOVOU X0, (AX)
  13223 	MOVOU X1, -16(AX)(R8*1)
  13224 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13225 
  13226 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
  13227 	MOVOU (SI), X0
  13228 	MOVOU 16(SI), X1
  13229 	MOVOU -32(SI)(R8*1), X2
  13230 	MOVOU -16(SI)(R8*1), X3
  13231 	MOVOU X0, (AX)
  13232 	MOVOU X1, 16(AX)
  13233 	MOVOU X2, -32(AX)(R8*1)
  13234 	MOVOU X3, -16(AX)(R8*1)
  13235 
  13236 memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
  13237 	MOVQ DI, AX
  13238 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
  13239 
  13240 memmove_long_match_emit_encodeSnappyBlockAsm10B:
  13241 	LEAQ (AX)(R8*1), DI
  13242 
  13243 	// genMemMoveLong
  13244 	MOVOU (SI), X0
  13245 	MOVOU 16(SI), X1
  13246 	MOVOU -32(SI)(R8*1), X2
  13247 	MOVOU -16(SI)(R8*1), X3
  13248 	MOVQ  R8, R10
  13249 	SHRQ  $0x05, R10
  13250 	MOVQ  AX, R9
  13251 	ANDL  $0x0000001f, R9
  13252 	MOVQ  $0x00000040, R11
  13253 	SUBQ  R9, R11
  13254 	DECQ  R10
  13255 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  13256 	LEAQ  -32(SI)(R11*1), R9
  13257 	LEAQ  -32(AX)(R11*1), R12
  13258 
  13259 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
  13260 	MOVOU (R9), X4
  13261 	MOVOU 16(R9), X5
  13262 	MOVOA X4, (R12)
  13263 	MOVOA X5, 16(R12)
  13264 	ADDQ  $0x20, R12
  13265 	ADDQ  $0x20, R9
  13266 	ADDQ  $0x20, R11
  13267 	DECQ  R10
  13268 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
  13269 
  13270 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
  13271 	MOVOU -32(SI)(R11*1), X4
  13272 	MOVOU -16(SI)(R11*1), X5
  13273 	MOVOA X4, -32(AX)(R11*1)
  13274 	MOVOA X5, -16(AX)(R11*1)
  13275 	ADDQ  $0x20, R11
  13276 	CMPQ  R8, R11
  13277 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  13278 	MOVOU X0, (AX)
  13279 	MOVOU X1, 16(AX)
  13280 	MOVOU X2, -32(AX)(R8*1)
  13281 	MOVOU X3, -16(AX)(R8*1)
  13282 	MOVQ  DI, AX
  13283 
  13284 emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
  13285 match_nolit_loop_encodeSnappyBlockAsm10B:
  13286 	MOVL CX, SI
  13287 	SUBL BX, SI
  13288 	MOVL SI, 16(SP)
  13289 	ADDL $0x04, CX
  13290 	ADDL $0x04, BX
  13291 	MOVQ src_len+32(FP), SI
  13292 	SUBL CX, SI
  13293 	LEAQ (DX)(CX*1), DI
  13294 	LEAQ (DX)(BX*1), BX
  13295 
  13296 	// matchLen
  13297 	XORL R9, R9
  13298 	CMPL SI, $0x08
  13299 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
  13300 
  13301 matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
  13302 	MOVQ  (DI)(R9*1), R8
  13303 	XORQ  (BX)(R9*1), R8
  13304 	TESTQ R8, R8
  13305 	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm10B
  13306 
  13307 #ifdef GOAMD64_v3
  13308 	TZCNTQ R8, R8
  13309 
  13310 #else
  13311 	BSFQ R8, R8
  13312 
  13313 #endif
  13314 	SARQ $0x03, R8
  13315 	LEAL (R9)(R8*1), R9
  13316 	JMP  match_nolit_end_encodeSnappyBlockAsm10B
  13317 
  13318 matchlen_loop_match_nolit_encodeSnappyBlockAsm10B:
  13319 	LEAL -8(SI), SI
  13320 	LEAL 8(R9), R9
  13321 	CMPL SI, $0x08
  13322 	JAE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B
  13323 	JZ   match_nolit_end_encodeSnappyBlockAsm10B
  13324 
  13325 matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
  13326 	CMPL SI, $0x04
  13327 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
  13328 	MOVL (DI)(R9*1), R8
  13329 	CMPL (BX)(R9*1), R8
  13330 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
  13331 	SUBL $0x04, SI
  13332 	LEAL 4(R9), R9
  13333 
  13334 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
  13335 	CMPL SI, $0x02
  13336 	JB   matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
  13337 	MOVW (DI)(R9*1), R8
  13338 	CMPW (BX)(R9*1), R8
  13339 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
  13340 	SUBL $0x02, SI
  13341 	LEAL 2(R9), R9
  13342 
  13343 matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
  13344 	CMPL SI, $0x01
  13345 	JB   match_nolit_end_encodeSnappyBlockAsm10B
  13346 	MOVB (DI)(R9*1), R8
  13347 	CMPB (BX)(R9*1), R8
  13348 	JNE  match_nolit_end_encodeSnappyBlockAsm10B
  13349 	LEAL 1(R9), R9
  13350 
  13351 match_nolit_end_encodeSnappyBlockAsm10B:
  13352 	ADDL R9, CX
  13353 	MOVL 16(SP), BX
  13354 	ADDL $0x04, R9
  13355 	MOVL CX, 12(SP)
  13356 
  13357 	// emitCopy
  13358 two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
  13359 	CMPL R9, $0x40
  13360 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
  13361 	MOVB $0xee, (AX)
  13362 	MOVW BX, 1(AX)
  13363 	LEAL -60(R9), R9
  13364 	ADDQ $0x03, AX
  13365 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
  13366 
  13367 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
  13368 	MOVL R9, SI
  13369 	SHLL $0x02, SI
  13370 	CMPL R9, $0x0c
  13371 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
  13372 	CMPL BX, $0x00000800
  13373 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
  13374 	LEAL -15(SI), SI
  13375 	MOVB BL, 1(AX)
  13376 	SHRL $0x08, BX
  13377 	SHLL $0x05, BX
  13378 	ORL  BX, SI
  13379 	MOVB SI, (AX)
  13380 	ADDQ $0x02, AX
  13381 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
  13382 
  13383 emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
  13384 	LEAL -2(SI), SI
  13385 	MOVB SI, (AX)
  13386 	MOVW BX, 1(AX)
  13387 	ADDQ $0x03, AX
  13388 
  13389 match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
  13390 	CMPL CX, 8(SP)
  13391 	JAE  emit_remainder_encodeSnappyBlockAsm10B
  13392 	MOVQ -2(DX)(CX*1), SI
  13393 	CMPQ AX, (SP)
  13394 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm10B
  13395 	MOVQ $0x00000000, ret+48(FP)
  13396 	RET
  13397 
  13398 match_nolit_dst_ok_encodeSnappyBlockAsm10B:
  13399 	MOVQ  $0x9e3779b1, R8
  13400 	MOVQ  SI, DI
  13401 	SHRQ  $0x10, SI
  13402 	MOVQ  SI, BX
  13403 	SHLQ  $0x20, DI
  13404 	IMULQ R8, DI
  13405 	SHRQ  $0x36, DI
  13406 	SHLQ  $0x20, BX
  13407 	IMULQ R8, BX
  13408 	SHRQ  $0x36, BX
  13409 	LEAL  -2(CX), R8
  13410 	LEAQ  24(SP)(BX*4), R9
  13411 	MOVL  (R9), BX
  13412 	MOVL  R8, 24(SP)(DI*4)
  13413 	MOVL  CX, (R9)
  13414 	CMPL  (DX)(BX*1), SI
  13415 	JEQ   match_nolit_loop_encodeSnappyBlockAsm10B
  13416 	INCL  CX
  13417 	JMP   search_loop_encodeSnappyBlockAsm10B
  13418 
  13419 emit_remainder_encodeSnappyBlockAsm10B:
  13420 	MOVQ src_len+32(FP), CX
  13421 	SUBL 12(SP), CX
  13422 	LEAQ 3(AX)(CX*1), CX
  13423 	CMPQ CX, (SP)
  13424 	JB   emit_remainder_ok_encodeSnappyBlockAsm10B
  13425 	MOVQ $0x00000000, ret+48(FP)
  13426 	RET
  13427 
  13428 emit_remainder_ok_encodeSnappyBlockAsm10B:
  13429 	MOVQ src_len+32(FP), CX
  13430 	MOVL 12(SP), BX
  13431 	CMPL BX, CX
  13432 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
  13433 	MOVL CX, SI
  13434 	MOVL CX, 12(SP)
  13435 	LEAQ (DX)(BX*1), CX
  13436 	SUBL BX, SI
  13437 	LEAL -1(SI), DX
  13438 	CMPL DX, $0x3c
  13439 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm10B
  13440 	CMPL DX, $0x00000100
  13441 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm10B
  13442 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm10B
  13443 
  13444 three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  13445 	MOVB $0xf4, (AX)
  13446 	MOVW DX, 1(AX)
  13447 	ADDQ $0x03, AX
  13448 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
  13449 
  13450 two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  13451 	MOVB $0xf0, (AX)
  13452 	MOVB DL, 1(AX)
  13453 	ADDQ $0x02, AX
  13454 	CMPL DX, $0x40
  13455 	JB   memmove_emit_remainder_encodeSnappyBlockAsm10B
  13456 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
  13457 
  13458 one_byte_emit_remainder_encodeSnappyBlockAsm10B:
  13459 	SHLB $0x02, DL
  13460 	MOVB DL, (AX)
  13461 	ADDQ $0x01, AX
  13462 
  13463 memmove_emit_remainder_encodeSnappyBlockAsm10B:
  13464 	LEAQ (AX)(SI*1), DX
  13465 	MOVL SI, BX
  13466 
  13467 	// genMemMoveShort
  13468 	CMPQ BX, $0x03
  13469 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
  13470 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
  13471 	CMPQ BX, $0x08
  13472 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7
  13473 	CMPQ BX, $0x10
  13474 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
  13475 	CMPQ BX, $0x20
  13476 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
  13477 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
  13478 
  13479 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
  13480 	MOVB (CX), SI
  13481 	MOVB -1(CX)(BX*1), CL
  13482 	MOVB SI, (AX)
  13483 	MOVB CL, -1(AX)(BX*1)
  13484 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13485 
  13486 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
  13487 	MOVW (CX), SI
  13488 	MOVB 2(CX), CL
  13489 	MOVW SI, (AX)
  13490 	MOVB CL, 2(AX)
  13491 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13492 
  13493 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
  13494 	MOVL (CX), SI
  13495 	MOVL -4(CX)(BX*1), CX
  13496 	MOVL SI, (AX)
  13497 	MOVL CX, -4(AX)(BX*1)
  13498 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13499 
  13500 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
  13501 	MOVQ (CX), SI
  13502 	MOVQ -8(CX)(BX*1), CX
  13503 	MOVQ SI, (AX)
  13504 	MOVQ CX, -8(AX)(BX*1)
  13505 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13506 
  13507 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
  13508 	MOVOU (CX), X0
  13509 	MOVOU -16(CX)(BX*1), X1
  13510 	MOVOU X0, (AX)
  13511 	MOVOU X1, -16(AX)(BX*1)
  13512 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13513 
  13514 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
  13515 	MOVOU (CX), X0
  13516 	MOVOU 16(CX), X1
  13517 	MOVOU -32(CX)(BX*1), X2
  13518 	MOVOU -16(CX)(BX*1), X3
  13519 	MOVOU X0, (AX)
  13520 	MOVOU X1, 16(AX)
  13521 	MOVOU X2, -32(AX)(BX*1)
  13522 	MOVOU X3, -16(AX)(BX*1)
  13523 
  13524 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
  13525 	MOVQ DX, AX
  13526 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
  13527 
  13528 memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
  13529 	LEAQ (AX)(SI*1), DX
  13530 	MOVL SI, BX
  13531 
  13532 	// genMemMoveLong
  13533 	MOVOU (CX), X0
  13534 	MOVOU 16(CX), X1
  13535 	MOVOU -32(CX)(BX*1), X2
  13536 	MOVOU -16(CX)(BX*1), X3
  13537 	MOVQ  BX, DI
  13538 	SHRQ  $0x05, DI
  13539 	MOVQ  AX, SI
  13540 	ANDL  $0x0000001f, SI
  13541 	MOVQ  $0x00000040, R8
  13542 	SUBQ  SI, R8
  13543 	DECQ  DI
  13544 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  13545 	LEAQ  -32(CX)(R8*1), SI
  13546 	LEAQ  -32(AX)(R8*1), R9
  13547 
  13548 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
  13549 	MOVOU (SI), X4
  13550 	MOVOU 16(SI), X5
  13551 	MOVOA X4, (R9)
  13552 	MOVOA X5, 16(R9)
  13553 	ADDQ  $0x20, R9
  13554 	ADDQ  $0x20, SI
  13555 	ADDQ  $0x20, R8
  13556 	DECQ  DI
  13557 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
  13558 
  13559 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
  13560 	MOVOU -32(CX)(R8*1), X4
  13561 	MOVOU -16(CX)(R8*1), X5
  13562 	MOVOA X4, -32(AX)(R8*1)
  13563 	MOVOA X5, -16(AX)(R8*1)
  13564 	ADDQ  $0x20, R8
  13565 	CMPQ  BX, R8
  13566 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  13567 	MOVOU X0, (AX)
  13568 	MOVOU X1, 16(AX)
  13569 	MOVOU X2, -32(AX)(BX*1)
  13570 	MOVOU X3, -16(AX)(BX*1)
  13571 	MOVQ  DX, AX
  13572 
  13573 emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
  13574 	MOVQ dst_base+0(FP), CX
  13575 	SUBQ CX, AX
  13576 	MOVQ AX, ret+48(FP)
  13577 	RET
  13578 
  13579 // func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
  13580 // Requires: BMI, SSE2
  13581 TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
  13582 	MOVQ dst_base+0(FP), AX
  13583 	MOVQ $0x00000008, CX
  13584 	LEAQ 24(SP), DX
  13585 	PXOR X0, X0
  13586 
  13587 zero_loop_encodeSnappyBlockAsm8B:
  13588 	MOVOU X0, (DX)
  13589 	MOVOU X0, 16(DX)
  13590 	MOVOU X0, 32(DX)
  13591 	MOVOU X0, 48(DX)
  13592 	MOVOU X0, 64(DX)
  13593 	MOVOU X0, 80(DX)
  13594 	MOVOU X0, 96(DX)
  13595 	MOVOU X0, 112(DX)
  13596 	ADDQ  $0x80, DX
  13597 	DECQ  CX
  13598 	JNZ   zero_loop_encodeSnappyBlockAsm8B
  13599 	MOVL  $0x00000000, 12(SP)
  13600 	MOVQ  src_len+32(FP), CX
  13601 	LEAQ  -9(CX), DX
  13602 	LEAQ  -8(CX), BX
  13603 	MOVL  BX, 8(SP)
  13604 	SHRQ  $0x05, CX
  13605 	SUBL  CX, DX
  13606 	LEAQ  (AX)(DX*1), DX
  13607 	MOVQ  DX, (SP)
  13608 	MOVL  $0x00000001, CX
  13609 	MOVL  CX, 16(SP)
  13610 	MOVQ  src_base+24(FP), DX
  13611 
  13612 search_loop_encodeSnappyBlockAsm8B:
  13613 	MOVL  CX, BX
  13614 	SUBL  12(SP), BX
  13615 	SHRL  $0x04, BX
  13616 	LEAL  4(CX)(BX*1), BX
  13617 	CMPL  BX, 8(SP)
  13618 	JAE   emit_remainder_encodeSnappyBlockAsm8B
  13619 	MOVQ  (DX)(CX*1), SI
  13620 	MOVL  BX, 20(SP)
  13621 	MOVQ  $0x9e3779b1, R8
  13622 	MOVQ  SI, R9
  13623 	MOVQ  SI, R10
  13624 	SHRQ  $0x08, R10
  13625 	SHLQ  $0x20, R9
  13626 	IMULQ R8, R9
  13627 	SHRQ  $0x38, R9
  13628 	SHLQ  $0x20, R10
  13629 	IMULQ R8, R10
  13630 	SHRQ  $0x38, R10
  13631 	MOVL  24(SP)(R9*4), BX
  13632 	MOVL  24(SP)(R10*4), DI
  13633 	MOVL  CX, 24(SP)(R9*4)
  13634 	LEAL  1(CX), R9
  13635 	MOVL  R9, 24(SP)(R10*4)
  13636 	MOVQ  SI, R9
  13637 	SHRQ  $0x10, R9
  13638 	SHLQ  $0x20, R9
  13639 	IMULQ R8, R9
  13640 	SHRQ  $0x38, R9
  13641 	MOVL  CX, R8
  13642 	SUBL  16(SP), R8
  13643 	MOVL  1(DX)(R8*1), R10
  13644 	MOVQ  SI, R8
  13645 	SHRQ  $0x08, R8
  13646 	CMPL  R8, R10
  13647 	JNE   no_repeat_found_encodeSnappyBlockAsm8B
  13648 	LEAL  1(CX), SI
  13649 	MOVL  12(SP), BX
  13650 	MOVL  SI, DI
  13651 	SUBL  16(SP), DI
  13652 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm8B
  13653 
  13654 repeat_extend_back_loop_encodeSnappyBlockAsm8B:
  13655 	CMPL SI, BX
  13656 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm8B
  13657 	MOVB -1(DX)(DI*1), R8
  13658 	MOVB -1(DX)(SI*1), R9
  13659 	CMPB R8, R9
  13660 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm8B
  13661 	LEAL -1(SI), SI
  13662 	DECL DI
  13663 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm8B
  13664 
  13665 repeat_extend_back_end_encodeSnappyBlockAsm8B:
  13666 	MOVL 12(SP), BX
  13667 	CMPL BX, SI
  13668 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
  13669 	MOVL SI, DI
  13670 	MOVL SI, 12(SP)
  13671 	LEAQ (DX)(BX*1), R8
  13672 	SUBL BX, DI
  13673 	LEAL -1(DI), BX
  13674 	CMPL BX, $0x3c
  13675 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm8B
  13676 	CMPL BX, $0x00000100
  13677 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm8B
  13678 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm8B
  13679 
  13680 three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  13681 	MOVB $0xf4, (AX)
  13682 	MOVW BX, 1(AX)
  13683 	ADDQ $0x03, AX
  13684 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
  13685 
  13686 two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  13687 	MOVB $0xf0, (AX)
  13688 	MOVB BL, 1(AX)
  13689 	ADDQ $0x02, AX
  13690 	CMPL BX, $0x40
  13691 	JB   memmove_repeat_emit_encodeSnappyBlockAsm8B
  13692 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
  13693 
  13694 one_byte_repeat_emit_encodeSnappyBlockAsm8B:
  13695 	SHLB $0x02, BL
  13696 	MOVB BL, (AX)
  13697 	ADDQ $0x01, AX
  13698 
  13699 memmove_repeat_emit_encodeSnappyBlockAsm8B:
  13700 	LEAQ (AX)(DI*1), BX
  13701 
  13702 	// genMemMoveShort
  13703 	CMPQ DI, $0x08
  13704 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
  13705 	CMPQ DI, $0x10
  13706 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
  13707 	CMPQ DI, $0x20
  13708 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
  13709 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
  13710 
  13711 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
  13712 	MOVQ (R8), R9
  13713 	MOVQ R9, (AX)
  13714 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13715 
  13716 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
  13717 	MOVQ (R8), R9
  13718 	MOVQ -8(R8)(DI*1), R8
  13719 	MOVQ R9, (AX)
  13720 	MOVQ R8, -8(AX)(DI*1)
  13721 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13722 
  13723 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
  13724 	MOVOU (R8), X0
  13725 	MOVOU -16(R8)(DI*1), X1
  13726 	MOVOU X0, (AX)
  13727 	MOVOU X1, -16(AX)(DI*1)
  13728 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13729 
  13730 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
  13731 	MOVOU (R8), X0
  13732 	MOVOU 16(R8), X1
  13733 	MOVOU -32(R8)(DI*1), X2
  13734 	MOVOU -16(R8)(DI*1), X3
  13735 	MOVOU X0, (AX)
  13736 	MOVOU X1, 16(AX)
  13737 	MOVOU X2, -32(AX)(DI*1)
  13738 	MOVOU X3, -16(AX)(DI*1)
  13739 
  13740 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
  13741 	MOVQ BX, AX
  13742 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
  13743 
  13744 memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
  13745 	LEAQ (AX)(DI*1), BX
  13746 
  13747 	// genMemMoveLong
  13748 	MOVOU (R8), X0
  13749 	MOVOU 16(R8), X1
  13750 	MOVOU -32(R8)(DI*1), X2
  13751 	MOVOU -16(R8)(DI*1), X3
  13752 	MOVQ  DI, R10
  13753 	SHRQ  $0x05, R10
  13754 	MOVQ  AX, R9
  13755 	ANDL  $0x0000001f, R9
  13756 	MOVQ  $0x00000040, R11
  13757 	SUBQ  R9, R11
  13758 	DECQ  R10
  13759 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  13760 	LEAQ  -32(R8)(R11*1), R9
  13761 	LEAQ  -32(AX)(R11*1), R12
  13762 
  13763 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
  13764 	MOVOU (R9), X4
  13765 	MOVOU 16(R9), X5
  13766 	MOVOA X4, (R12)
  13767 	MOVOA X5, 16(R12)
  13768 	ADDQ  $0x20, R12
  13769 	ADDQ  $0x20, R9
  13770 	ADDQ  $0x20, R11
  13771 	DECQ  R10
  13772 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
  13773 
  13774 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
  13775 	MOVOU -32(R8)(R11*1), X4
  13776 	MOVOU -16(R8)(R11*1), X5
  13777 	MOVOA X4, -32(AX)(R11*1)
  13778 	MOVOA X5, -16(AX)(R11*1)
  13779 	ADDQ  $0x20, R11
  13780 	CMPQ  DI, R11
  13781 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  13782 	MOVOU X0, (AX)
  13783 	MOVOU X1, 16(AX)
  13784 	MOVOU X2, -32(AX)(DI*1)
  13785 	MOVOU X3, -16(AX)(DI*1)
  13786 	MOVQ  BX, AX
  13787 
  13788 emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
  13789 	ADDL $0x05, CX
  13790 	MOVL CX, BX
  13791 	SUBL 16(SP), BX
  13792 	MOVQ src_len+32(FP), DI
  13793 	SUBL CX, DI
  13794 	LEAQ (DX)(CX*1), R8
  13795 	LEAQ (DX)(BX*1), BX
  13796 
  13797 	// matchLen
  13798 	XORL R10, R10
  13799 	CMPL DI, $0x08
  13800 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
  13801 
  13802 matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B:
  13803 	MOVQ  (R8)(R10*1), R9
  13804 	XORQ  (BX)(R10*1), R9
  13805 	TESTQ R9, R9
  13806 	JZ    matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B
  13807 
  13808 #ifdef GOAMD64_v3
  13809 	TZCNTQ R9, R9
  13810 
  13811 #else
  13812 	BSFQ R9, R9
  13813 
  13814 #endif
  13815 	SARQ $0x03, R9
  13816 	LEAL (R10)(R9*1), R10
  13817 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
  13818 
  13819 matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B:
  13820 	LEAL -8(DI), DI
  13821 	LEAL 8(R10), R10
  13822 	CMPL DI, $0x08
  13823 	JAE  matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B
  13824 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm8B
  13825 
  13826 matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
  13827 	CMPL DI, $0x04
  13828 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
  13829 	MOVL (R8)(R10*1), R9
  13830 	CMPL (BX)(R10*1), R9
  13831 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
  13832 	SUBL $0x04, DI
  13833 	LEAL 4(R10), R10
  13834 
  13835 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
  13836 	CMPL DI, $0x02
  13837 	JB   matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
  13838 	MOVW (R8)(R10*1), R9
  13839 	CMPW (BX)(R10*1), R9
  13840 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
  13841 	SUBL $0x02, DI
  13842 	LEAL 2(R10), R10
  13843 
  13844 matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
  13845 	CMPL DI, $0x01
  13846 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm8B
  13847 	MOVB (R8)(R10*1), R9
  13848 	CMPB (BX)(R10*1), R9
  13849 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm8B
  13850 	LEAL 1(R10), R10
  13851 
  13852 repeat_extend_forward_end_encodeSnappyBlockAsm8B:
  13853 	ADDL R10, CX
  13854 	MOVL CX, BX
  13855 	SUBL SI, BX
  13856 	MOVL 16(SP), SI
  13857 
  13858 	// emitCopy
  13859 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
  13860 	CMPL BX, $0x40
  13861 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
  13862 	MOVB $0xee, (AX)
  13863 	MOVW SI, 1(AX)
  13864 	LEAL -60(BX), BX
  13865 	ADDQ $0x03, AX
  13866 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
  13867 
  13868 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
  13869 	MOVL BX, DI
  13870 	SHLL $0x02, DI
  13871 	CMPL BX, $0x0c
  13872 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
  13873 	LEAL -15(DI), DI
  13874 	MOVB SI, 1(AX)
  13875 	SHRL $0x08, SI
  13876 	SHLL $0x05, SI
  13877 	ORL  SI, DI
  13878 	MOVB DI, (AX)
  13879 	ADDQ $0x02, AX
  13880 	JMP  repeat_end_emit_encodeSnappyBlockAsm8B
  13881 
  13882 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
  13883 	LEAL -2(DI), DI
  13884 	MOVB DI, (AX)
  13885 	MOVW SI, 1(AX)
  13886 	ADDQ $0x03, AX
  13887 
  13888 repeat_end_emit_encodeSnappyBlockAsm8B:
  13889 	MOVL CX, 12(SP)
  13890 	JMP  search_loop_encodeSnappyBlockAsm8B
  13891 
  13892 no_repeat_found_encodeSnappyBlockAsm8B:
  13893 	CMPL (DX)(BX*1), SI
  13894 	JEQ  candidate_match_encodeSnappyBlockAsm8B
  13895 	SHRQ $0x08, SI
  13896 	MOVL 24(SP)(R9*4), BX
  13897 	LEAL 2(CX), R8
  13898 	CMPL (DX)(DI*1), SI
  13899 	JEQ  candidate2_match_encodeSnappyBlockAsm8B
  13900 	MOVL R8, 24(SP)(R9*4)
  13901 	SHRQ $0x08, SI
  13902 	CMPL (DX)(BX*1), SI
  13903 	JEQ  candidate3_match_encodeSnappyBlockAsm8B
  13904 	MOVL 20(SP), CX
  13905 	JMP  search_loop_encodeSnappyBlockAsm8B
  13906 
  13907 candidate3_match_encodeSnappyBlockAsm8B:
  13908 	ADDL $0x02, CX
  13909 	JMP  candidate_match_encodeSnappyBlockAsm8B
  13910 
  13911 candidate2_match_encodeSnappyBlockAsm8B:
  13912 	MOVL R8, 24(SP)(R9*4)
  13913 	INCL CX
  13914 	MOVL DI, BX
  13915 
  13916 candidate_match_encodeSnappyBlockAsm8B:
  13917 	MOVL  12(SP), SI
  13918 	TESTL BX, BX
  13919 	JZ    match_extend_back_end_encodeSnappyBlockAsm8B
  13920 
  13921 match_extend_back_loop_encodeSnappyBlockAsm8B:
  13922 	CMPL CX, SI
  13923 	JBE  match_extend_back_end_encodeSnappyBlockAsm8B
  13924 	MOVB -1(DX)(BX*1), DI
  13925 	MOVB -1(DX)(CX*1), R8
  13926 	CMPB DI, R8
  13927 	JNE  match_extend_back_end_encodeSnappyBlockAsm8B
  13928 	LEAL -1(CX), CX
  13929 	DECL BX
  13930 	JZ   match_extend_back_end_encodeSnappyBlockAsm8B
  13931 	JMP  match_extend_back_loop_encodeSnappyBlockAsm8B
  13932 
  13933 match_extend_back_end_encodeSnappyBlockAsm8B:
  13934 	MOVL CX, SI
  13935 	SUBL 12(SP), SI
  13936 	LEAQ 3(AX)(SI*1), SI
  13937 	CMPQ SI, (SP)
  13938 	JB   match_dst_size_check_encodeSnappyBlockAsm8B
  13939 	MOVQ $0x00000000, ret+48(FP)
  13940 	RET
  13941 
  13942 match_dst_size_check_encodeSnappyBlockAsm8B:
  13943 	MOVL CX, SI
  13944 	MOVL 12(SP), DI
  13945 	CMPL DI, SI
  13946 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
  13947 	MOVL SI, R8
  13948 	MOVL SI, 12(SP)
  13949 	LEAQ (DX)(DI*1), SI
  13950 	SUBL DI, R8
  13951 	LEAL -1(R8), DI
  13952 	CMPL DI, $0x3c
  13953 	JB   one_byte_match_emit_encodeSnappyBlockAsm8B
  13954 	CMPL DI, $0x00000100
  13955 	JB   two_bytes_match_emit_encodeSnappyBlockAsm8B
  13956 	JB   three_bytes_match_emit_encodeSnappyBlockAsm8B
  13957 
  13958 three_bytes_match_emit_encodeSnappyBlockAsm8B:
  13959 	MOVB $0xf4, (AX)
  13960 	MOVW DI, 1(AX)
  13961 	ADDQ $0x03, AX
  13962 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
  13963 
  13964 two_bytes_match_emit_encodeSnappyBlockAsm8B:
  13965 	MOVB $0xf0, (AX)
  13966 	MOVB DI, 1(AX)
  13967 	ADDQ $0x02, AX
  13968 	CMPL DI, $0x40
  13969 	JB   memmove_match_emit_encodeSnappyBlockAsm8B
  13970 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
  13971 
  13972 one_byte_match_emit_encodeSnappyBlockAsm8B:
  13973 	SHLB $0x02, DI
  13974 	MOVB DI, (AX)
  13975 	ADDQ $0x01, AX
  13976 
  13977 memmove_match_emit_encodeSnappyBlockAsm8B:
  13978 	LEAQ (AX)(R8*1), DI
  13979 
  13980 	// genMemMoveShort
  13981 	CMPQ R8, $0x08
  13982 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
  13983 	CMPQ R8, $0x10
  13984 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
  13985 	CMPQ R8, $0x20
  13986 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
  13987 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
  13988 
  13989 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
  13990 	MOVQ (SI), R9
  13991 	MOVQ R9, (AX)
  13992 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  13993 
  13994 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
  13995 	MOVQ (SI), R9
  13996 	MOVQ -8(SI)(R8*1), SI
  13997 	MOVQ R9, (AX)
  13998 	MOVQ SI, -8(AX)(R8*1)
  13999 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14000 
  14001 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
  14002 	MOVOU (SI), X0
  14003 	MOVOU -16(SI)(R8*1), X1
  14004 	MOVOU X0, (AX)
  14005 	MOVOU X1, -16(AX)(R8*1)
  14006 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14007 
  14008 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
  14009 	MOVOU (SI), X0
  14010 	MOVOU 16(SI), X1
  14011 	MOVOU -32(SI)(R8*1), X2
  14012 	MOVOU -16(SI)(R8*1), X3
  14013 	MOVOU X0, (AX)
  14014 	MOVOU X1, 16(AX)
  14015 	MOVOU X2, -32(AX)(R8*1)
  14016 	MOVOU X3, -16(AX)(R8*1)
  14017 
  14018 memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
  14019 	MOVQ DI, AX
  14020 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
  14021 
  14022 memmove_long_match_emit_encodeSnappyBlockAsm8B:
  14023 	LEAQ (AX)(R8*1), DI
  14024 
  14025 	// genMemMoveLong
  14026 	MOVOU (SI), X0
  14027 	MOVOU 16(SI), X1
  14028 	MOVOU -32(SI)(R8*1), X2
  14029 	MOVOU -16(SI)(R8*1), X3
  14030 	MOVQ  R8, R10
  14031 	SHRQ  $0x05, R10
  14032 	MOVQ  AX, R9
  14033 	ANDL  $0x0000001f, R9
  14034 	MOVQ  $0x00000040, R11
  14035 	SUBQ  R9, R11
  14036 	DECQ  R10
  14037 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  14038 	LEAQ  -32(SI)(R11*1), R9
  14039 	LEAQ  -32(AX)(R11*1), R12
  14040 
  14041 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
  14042 	MOVOU (R9), X4
  14043 	MOVOU 16(R9), X5
  14044 	MOVOA X4, (R12)
  14045 	MOVOA X5, 16(R12)
  14046 	ADDQ  $0x20, R12
  14047 	ADDQ  $0x20, R9
  14048 	ADDQ  $0x20, R11
  14049 	DECQ  R10
  14050 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
  14051 
  14052 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
  14053 	MOVOU -32(SI)(R11*1), X4
  14054 	MOVOU -16(SI)(R11*1), X5
  14055 	MOVOA X4, -32(AX)(R11*1)
  14056 	MOVOA X5, -16(AX)(R11*1)
  14057 	ADDQ  $0x20, R11
  14058 	CMPQ  R8, R11
  14059 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  14060 	MOVOU X0, (AX)
  14061 	MOVOU X1, 16(AX)
  14062 	MOVOU X2, -32(AX)(R8*1)
  14063 	MOVOU X3, -16(AX)(R8*1)
  14064 	MOVQ  DI, AX
  14065 
  14066 emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
  14067 match_nolit_loop_encodeSnappyBlockAsm8B:
  14068 	MOVL CX, SI
  14069 	SUBL BX, SI
  14070 	MOVL SI, 16(SP)
  14071 	ADDL $0x04, CX
  14072 	ADDL $0x04, BX
  14073 	MOVQ src_len+32(FP), SI
  14074 	SUBL CX, SI
  14075 	LEAQ (DX)(CX*1), DI
  14076 	LEAQ (DX)(BX*1), BX
  14077 
  14078 	// matchLen
  14079 	XORL R9, R9
  14080 	CMPL SI, $0x08
  14081 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
  14082 
  14083 matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
  14084 	MOVQ  (DI)(R9*1), R8
  14085 	XORQ  (BX)(R9*1), R8
  14086 	TESTQ R8, R8
  14087 	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm8B
  14088 
  14089 #ifdef GOAMD64_v3
  14090 	TZCNTQ R8, R8
  14091 
  14092 #else
  14093 	BSFQ R8, R8
  14094 
  14095 #endif
  14096 	SARQ $0x03, R8
  14097 	LEAL (R9)(R8*1), R9
  14098 	JMP  match_nolit_end_encodeSnappyBlockAsm8B
  14099 
  14100 matchlen_loop_match_nolit_encodeSnappyBlockAsm8B:
  14101 	LEAL -8(SI), SI
  14102 	LEAL 8(R9), R9
  14103 	CMPL SI, $0x08
  14104 	JAE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B
  14105 	JZ   match_nolit_end_encodeSnappyBlockAsm8B
  14106 
  14107 matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
  14108 	CMPL SI, $0x04
  14109 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
  14110 	MOVL (DI)(R9*1), R8
  14111 	CMPL (BX)(R9*1), R8
  14112 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
  14113 	SUBL $0x04, SI
  14114 	LEAL 4(R9), R9
  14115 
  14116 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
  14117 	CMPL SI, $0x02
  14118 	JB   matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
  14119 	MOVW (DI)(R9*1), R8
  14120 	CMPW (BX)(R9*1), R8
  14121 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
  14122 	SUBL $0x02, SI
  14123 	LEAL 2(R9), R9
  14124 
  14125 matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
  14126 	CMPL SI, $0x01
  14127 	JB   match_nolit_end_encodeSnappyBlockAsm8B
  14128 	MOVB (DI)(R9*1), R8
  14129 	CMPB (BX)(R9*1), R8
  14130 	JNE  match_nolit_end_encodeSnappyBlockAsm8B
  14131 	LEAL 1(R9), R9
  14132 
  14133 match_nolit_end_encodeSnappyBlockAsm8B:
  14134 	ADDL R9, CX
  14135 	MOVL 16(SP), BX
  14136 	ADDL $0x04, R9
  14137 	MOVL CX, 12(SP)
  14138 
  14139 	// emitCopy
  14140 two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
  14141 	CMPL R9, $0x40
  14142 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
  14143 	MOVB $0xee, (AX)
  14144 	MOVW BX, 1(AX)
  14145 	LEAL -60(R9), R9
  14146 	ADDQ $0x03, AX
  14147 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
  14148 
  14149 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
  14150 	MOVL R9, SI
  14151 	SHLL $0x02, SI
  14152 	CMPL R9, $0x0c
  14153 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
  14154 	LEAL -15(SI), SI
  14155 	MOVB BL, 1(AX)
  14156 	SHRL $0x08, BX
  14157 	SHLL $0x05, BX
  14158 	ORL  BX, SI
  14159 	MOVB SI, (AX)
  14160 	ADDQ $0x02, AX
  14161 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
  14162 
  14163 emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
  14164 	LEAL -2(SI), SI
  14165 	MOVB SI, (AX)
  14166 	MOVW BX, 1(AX)
  14167 	ADDQ $0x03, AX
  14168 
  14169 match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
  14170 	CMPL CX, 8(SP)
  14171 	JAE  emit_remainder_encodeSnappyBlockAsm8B
  14172 	MOVQ -2(DX)(CX*1), SI
  14173 	CMPQ AX, (SP)
  14174 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm8B
  14175 	MOVQ $0x00000000, ret+48(FP)
  14176 	RET
  14177 
  14178 match_nolit_dst_ok_encodeSnappyBlockAsm8B:
  14179 	MOVQ  $0x9e3779b1, R8
  14180 	MOVQ  SI, DI
  14181 	SHRQ  $0x10, SI
  14182 	MOVQ  SI, BX
  14183 	SHLQ  $0x20, DI
  14184 	IMULQ R8, DI
  14185 	SHRQ  $0x38, DI
  14186 	SHLQ  $0x20, BX
  14187 	IMULQ R8, BX
  14188 	SHRQ  $0x38, BX
  14189 	LEAL  -2(CX), R8
  14190 	LEAQ  24(SP)(BX*4), R9
  14191 	MOVL  (R9), BX
  14192 	MOVL  R8, 24(SP)(DI*4)
  14193 	MOVL  CX, (R9)
  14194 	CMPL  (DX)(BX*1), SI
  14195 	JEQ   match_nolit_loop_encodeSnappyBlockAsm8B
  14196 	INCL  CX
  14197 	JMP   search_loop_encodeSnappyBlockAsm8B
  14198 
  14199 emit_remainder_encodeSnappyBlockAsm8B:
  14200 	MOVQ src_len+32(FP), CX
  14201 	SUBL 12(SP), CX
  14202 	LEAQ 3(AX)(CX*1), CX
  14203 	CMPQ CX, (SP)
  14204 	JB   emit_remainder_ok_encodeSnappyBlockAsm8B
  14205 	MOVQ $0x00000000, ret+48(FP)
  14206 	RET
  14207 
  14208 emit_remainder_ok_encodeSnappyBlockAsm8B:
  14209 	MOVQ src_len+32(FP), CX
  14210 	MOVL 12(SP), BX
  14211 	CMPL BX, CX
  14212 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
  14213 	MOVL CX, SI
  14214 	MOVL CX, 12(SP)
  14215 	LEAQ (DX)(BX*1), CX
  14216 	SUBL BX, SI
  14217 	LEAL -1(SI), DX
  14218 	CMPL DX, $0x3c
  14219 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm8B
  14220 	CMPL DX, $0x00000100
  14221 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm8B
  14222 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm8B
  14223 
  14224 three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  14225 	MOVB $0xf4, (AX)
  14226 	MOVW DX, 1(AX)
  14227 	ADDQ $0x03, AX
  14228 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
  14229 
  14230 two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  14231 	MOVB $0xf0, (AX)
  14232 	MOVB DL, 1(AX)
  14233 	ADDQ $0x02, AX
  14234 	CMPL DX, $0x40
  14235 	JB   memmove_emit_remainder_encodeSnappyBlockAsm8B
  14236 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
  14237 
  14238 one_byte_emit_remainder_encodeSnappyBlockAsm8B:
  14239 	SHLB $0x02, DL
  14240 	MOVB DL, (AX)
  14241 	ADDQ $0x01, AX
  14242 
  14243 memmove_emit_remainder_encodeSnappyBlockAsm8B:
  14244 	LEAQ (AX)(SI*1), DX
  14245 	MOVL SI, BX
  14246 
  14247 	// genMemMoveShort
  14248 	CMPQ BX, $0x03
  14249 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
  14250 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
  14251 	CMPQ BX, $0x08
  14252 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7
  14253 	CMPQ BX, $0x10
  14254 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
  14255 	CMPQ BX, $0x20
  14256 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
  14257 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
  14258 
  14259 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
  14260 	MOVB (CX), SI
  14261 	MOVB -1(CX)(BX*1), CL
  14262 	MOVB SI, (AX)
  14263 	MOVB CL, -1(AX)(BX*1)
  14264 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14265 
  14266 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
  14267 	MOVW (CX), SI
  14268 	MOVB 2(CX), CL
  14269 	MOVW SI, (AX)
  14270 	MOVB CL, 2(AX)
  14271 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14272 
  14273 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
  14274 	MOVL (CX), SI
  14275 	MOVL -4(CX)(BX*1), CX
  14276 	MOVL SI, (AX)
  14277 	MOVL CX, -4(AX)(BX*1)
  14278 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14279 
  14280 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
  14281 	MOVQ (CX), SI
  14282 	MOVQ -8(CX)(BX*1), CX
  14283 	MOVQ SI, (AX)
  14284 	MOVQ CX, -8(AX)(BX*1)
  14285 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14286 
  14287 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
  14288 	MOVOU (CX), X0
  14289 	MOVOU -16(CX)(BX*1), X1
  14290 	MOVOU X0, (AX)
  14291 	MOVOU X1, -16(AX)(BX*1)
  14292 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14293 
  14294 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
  14295 	MOVOU (CX), X0
  14296 	MOVOU 16(CX), X1
  14297 	MOVOU -32(CX)(BX*1), X2
  14298 	MOVOU -16(CX)(BX*1), X3
  14299 	MOVOU X0, (AX)
  14300 	MOVOU X1, 16(AX)
  14301 	MOVOU X2, -32(AX)(BX*1)
  14302 	MOVOU X3, -16(AX)(BX*1)
  14303 
  14304 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
  14305 	MOVQ DX, AX
  14306 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
  14307 
  14308 memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
  14309 	LEAQ (AX)(SI*1), DX
  14310 	MOVL SI, BX
  14311 
  14312 	// genMemMoveLong
  14313 	MOVOU (CX), X0
  14314 	MOVOU 16(CX), X1
  14315 	MOVOU -32(CX)(BX*1), X2
  14316 	MOVOU -16(CX)(BX*1), X3
  14317 	MOVQ  BX, DI
  14318 	SHRQ  $0x05, DI
  14319 	MOVQ  AX, SI
  14320 	ANDL  $0x0000001f, SI
  14321 	MOVQ  $0x00000040, R8
  14322 	SUBQ  SI, R8
  14323 	DECQ  DI
  14324 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  14325 	LEAQ  -32(CX)(R8*1), SI
  14326 	LEAQ  -32(AX)(R8*1), R9
  14327 
  14328 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
  14329 	MOVOU (SI), X4
  14330 	MOVOU 16(SI), X5
  14331 	MOVOA X4, (R9)
  14332 	MOVOA X5, 16(R9)
  14333 	ADDQ  $0x20, R9
  14334 	ADDQ  $0x20, SI
  14335 	ADDQ  $0x20, R8
  14336 	DECQ  DI
  14337 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
  14338 
  14339 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
  14340 	MOVOU -32(CX)(R8*1), X4
  14341 	MOVOU -16(CX)(R8*1), X5
  14342 	MOVOA X4, -32(AX)(R8*1)
  14343 	MOVOA X5, -16(AX)(R8*1)
  14344 	ADDQ  $0x20, R8
  14345 	CMPQ  BX, R8
  14346 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  14347 	MOVOU X0, (AX)
  14348 	MOVOU X1, 16(AX)
  14349 	MOVOU X2, -32(AX)(BX*1)
  14350 	MOVOU X3, -16(AX)(BX*1)
  14351 	MOVQ  DX, AX
  14352 
  14353 emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
  14354 	MOVQ dst_base+0(FP), CX
  14355 	SUBQ CX, AX
  14356 	MOVQ AX, ret+48(FP)
  14357 	RET
  14358 
  14359 // func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
  14360 // Requires: BMI, SSE2
  14361 TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56
  14362 	MOVQ dst_base+0(FP), AX
  14363 	MOVQ $0x00001200, CX
  14364 	LEAQ 24(SP), DX
  14365 	PXOR X0, X0
  14366 
  14367 zero_loop_encodeSnappyBetterBlockAsm:
  14368 	MOVOU X0, (DX)
  14369 	MOVOU X0, 16(DX)
  14370 	MOVOU X0, 32(DX)
  14371 	MOVOU X0, 48(DX)
  14372 	MOVOU X0, 64(DX)
  14373 	MOVOU X0, 80(DX)
  14374 	MOVOU X0, 96(DX)
  14375 	MOVOU X0, 112(DX)
  14376 	ADDQ  $0x80, DX
  14377 	DECQ  CX
  14378 	JNZ   zero_loop_encodeSnappyBetterBlockAsm
  14379 	MOVL  $0x00000000, 12(SP)
  14380 	MOVQ  src_len+32(FP), CX
  14381 	LEAQ  -9(CX), DX
  14382 	LEAQ  -8(CX), BX
  14383 	MOVL  BX, 8(SP)
  14384 	SHRQ  $0x05, CX
  14385 	SUBL  CX, DX
  14386 	LEAQ  (AX)(DX*1), DX
  14387 	MOVQ  DX, (SP)
  14388 	MOVL  $0x00000001, CX
  14389 	MOVL  $0x00000000, 16(SP)
  14390 	MOVQ  src_base+24(FP), DX
  14391 
  14392 search_loop_encodeSnappyBetterBlockAsm:
  14393 	MOVL CX, BX
  14394 	SUBL 12(SP), BX
  14395 	SHRL $0x07, BX
  14396 	CMPL BX, $0x63
  14397 	JBE  check_maxskip_ok_encodeSnappyBetterBlockAsm
  14398 	LEAL 100(CX), BX
  14399 	JMP  check_maxskip_cont_encodeSnappyBetterBlockAsm
  14400 
  14401 check_maxskip_ok_encodeSnappyBetterBlockAsm:
  14402 	LEAL 1(CX)(BX*1), BX
  14403 
  14404 check_maxskip_cont_encodeSnappyBetterBlockAsm:
  14405 	CMPL  BX, 8(SP)
  14406 	JAE   emit_remainder_encodeSnappyBetterBlockAsm
  14407 	MOVQ  (DX)(CX*1), SI
  14408 	MOVL  BX, 20(SP)
  14409 	MOVQ  $0x00cf1bbcdcbfa563, R8
  14410 	MOVQ  $0x9e3779b1, BX
  14411 	MOVQ  SI, R9
  14412 	MOVQ  SI, R10
  14413 	SHLQ  $0x08, R9
  14414 	IMULQ R8, R9
  14415 	SHRQ  $0x2f, R9
  14416 	SHLQ  $0x20, R10
  14417 	IMULQ BX, R10
  14418 	SHRQ  $0x32, R10
  14419 	MOVL  24(SP)(R9*4), BX
  14420 	MOVL  524312(SP)(R10*4), DI
  14421 	MOVL  CX, 24(SP)(R9*4)
  14422 	MOVL  CX, 524312(SP)(R10*4)
  14423 	MOVQ  (DX)(BX*1), R9
  14424 	MOVQ  (DX)(DI*1), R10
  14425 	CMPQ  R9, SI
  14426 	JEQ   candidate_match_encodeSnappyBetterBlockAsm
  14427 	CMPQ  R10, SI
  14428 	JNE   no_short_found_encodeSnappyBetterBlockAsm
  14429 	MOVL  DI, BX
  14430 	JMP   candidate_match_encodeSnappyBetterBlockAsm
  14431 
  14432 no_short_found_encodeSnappyBetterBlockAsm:
  14433 	CMPL R9, SI
  14434 	JEQ  candidate_match_encodeSnappyBetterBlockAsm
  14435 	CMPL R10, SI
  14436 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm
  14437 	MOVL 20(SP), CX
  14438 	JMP  search_loop_encodeSnappyBetterBlockAsm
  14439 
  14440 candidateS_match_encodeSnappyBetterBlockAsm:
  14441 	SHRQ  $0x08, SI
  14442 	MOVQ  SI, R9
  14443 	SHLQ  $0x08, R9
  14444 	IMULQ R8, R9
  14445 	SHRQ  $0x2f, R9
  14446 	MOVL  24(SP)(R9*4), BX
  14447 	INCL  CX
  14448 	MOVL  CX, 24(SP)(R9*4)
  14449 	CMPL  (DX)(BX*1), SI
  14450 	JEQ   candidate_match_encodeSnappyBetterBlockAsm
  14451 	DECL  CX
  14452 	MOVL  DI, BX
  14453 
  14454 candidate_match_encodeSnappyBetterBlockAsm:
  14455 	MOVL  12(SP), SI
  14456 	TESTL BX, BX
  14457 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm
  14458 
  14459 match_extend_back_loop_encodeSnappyBetterBlockAsm:
  14460 	CMPL CX, SI
  14461 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm
  14462 	MOVB -1(DX)(BX*1), DI
  14463 	MOVB -1(DX)(CX*1), R8
  14464 	CMPB DI, R8
  14465 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm
  14466 	LEAL -1(CX), CX
  14467 	DECL BX
  14468 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm
  14469 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm
  14470 
  14471 match_extend_back_end_encodeSnappyBetterBlockAsm:
  14472 	MOVL CX, SI
  14473 	SUBL 12(SP), SI
  14474 	LEAQ 5(AX)(SI*1), SI
  14475 	CMPQ SI, (SP)
  14476 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm
  14477 	MOVQ $0x00000000, ret+48(FP)
  14478 	RET
  14479 
  14480 match_dst_size_check_encodeSnappyBetterBlockAsm:
  14481 	MOVL CX, SI
  14482 	ADDL $0x04, CX
  14483 	ADDL $0x04, BX
  14484 	MOVQ src_len+32(FP), DI
  14485 	SUBL CX, DI
  14486 	LEAQ (DX)(CX*1), R8
  14487 	LEAQ (DX)(BX*1), R9
  14488 
  14489 	// matchLen
  14490 	XORL R11, R11
  14491 	CMPL DI, $0x08
  14492 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
  14493 
  14494 matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm:
  14495 	MOVQ  (R8)(R11*1), R10
  14496 	XORQ  (R9)(R11*1), R10
  14497 	TESTQ R10, R10
  14498 	JZ    matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm
  14499 
  14500 #ifdef GOAMD64_v3
  14501 	TZCNTQ R10, R10
  14502 
  14503 #else
  14504 	BSFQ R10, R10
  14505 
  14506 #endif
  14507 	SARQ $0x03, R10
  14508 	LEAL (R11)(R10*1), R11
  14509 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
  14510 
  14511 matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm:
  14512 	LEAL -8(DI), DI
  14513 	LEAL 8(R11), R11
  14514 	CMPL DI, $0x08
  14515 	JAE  matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm
  14516 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm
  14517 
  14518 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
  14519 	CMPL DI, $0x04
  14520 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
  14521 	MOVL (R8)(R11*1), R10
  14522 	CMPL (R9)(R11*1), R10
  14523 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
  14524 	SUBL $0x04, DI
  14525 	LEAL 4(R11), R11
  14526 
  14527 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
  14528 	CMPL DI, $0x02
  14529 	JB   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
  14530 	MOVW (R8)(R11*1), R10
  14531 	CMPW (R9)(R11*1), R10
  14532 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
  14533 	SUBL $0x02, DI
  14534 	LEAL 2(R11), R11
  14535 
  14536 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
  14537 	CMPL DI, $0x01
  14538 	JB   match_nolit_end_encodeSnappyBetterBlockAsm
  14539 	MOVB (R8)(R11*1), R10
  14540 	CMPB (R9)(R11*1), R10
  14541 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm
  14542 	LEAL 1(R11), R11
  14543 
  14544 match_nolit_end_encodeSnappyBetterBlockAsm:
  14545 	MOVL CX, DI
  14546 	SUBL BX, DI
  14547 
  14548 	// Check if repeat
  14549 	CMPL R11, $0x01
  14550 	JA   match_length_ok_encodeSnappyBetterBlockAsm
  14551 	CMPL DI, $0x0000ffff
  14552 	JBE  match_length_ok_encodeSnappyBetterBlockAsm
  14553 	MOVL 20(SP), CX
  14554 	INCL CX
  14555 	JMP  search_loop_encodeSnappyBetterBlockAsm
  14556 
  14557 match_length_ok_encodeSnappyBetterBlockAsm:
  14558 	MOVL DI, 16(SP)
  14559 	MOVL 12(SP), BX
  14560 	CMPL BX, SI
  14561 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
  14562 	MOVL SI, R8
  14563 	MOVL SI, 12(SP)
  14564 	LEAQ (DX)(BX*1), R9
  14565 	SUBL BX, R8
  14566 	LEAL -1(R8), BX
  14567 	CMPL BX, $0x3c
  14568 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm
  14569 	CMPL BX, $0x00000100
  14570 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm
  14571 	CMPL BX, $0x00010000
  14572 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm
  14573 	CMPL BX, $0x01000000
  14574 	JB   four_bytes_match_emit_encodeSnappyBetterBlockAsm
  14575 	MOVB $0xfc, (AX)
  14576 	MOVL BX, 1(AX)
  14577 	ADDQ $0x05, AX
  14578 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
  14579 
  14580 four_bytes_match_emit_encodeSnappyBetterBlockAsm:
  14581 	MOVL BX, R10
  14582 	SHRL $0x10, R10
  14583 	MOVB $0xf8, (AX)
  14584 	MOVW BX, 1(AX)
  14585 	MOVB R10, 3(AX)
  14586 	ADDQ $0x04, AX
  14587 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
  14588 
  14589 three_bytes_match_emit_encodeSnappyBetterBlockAsm:
  14590 	MOVB $0xf4, (AX)
  14591 	MOVW BX, 1(AX)
  14592 	ADDQ $0x03, AX
  14593 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
  14594 
  14595 two_bytes_match_emit_encodeSnappyBetterBlockAsm:
  14596 	MOVB $0xf0, (AX)
  14597 	MOVB BL, 1(AX)
  14598 	ADDQ $0x02, AX
  14599 	CMPL BX, $0x40
  14600 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm
  14601 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
  14602 
  14603 one_byte_match_emit_encodeSnappyBetterBlockAsm:
  14604 	SHLB $0x02, BL
  14605 	MOVB BL, (AX)
  14606 	ADDQ $0x01, AX
  14607 
  14608 memmove_match_emit_encodeSnappyBetterBlockAsm:
  14609 	LEAQ (AX)(R8*1), BX
  14610 
  14611 	// genMemMoveShort
  14612 	CMPQ R8, $0x08
  14613 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
  14614 	CMPQ R8, $0x10
  14615 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
  14616 	CMPQ R8, $0x20
  14617 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
  14618 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
  14619 
  14620 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
  14621 	MOVQ (R9), R10
  14622 	MOVQ R10, (AX)
  14623 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
  14624 
  14625 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
  14626 	MOVQ (R9), R10
  14627 	MOVQ -8(R9)(R8*1), R9
  14628 	MOVQ R10, (AX)
  14629 	MOVQ R9, -8(AX)(R8*1)
  14630 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
  14631 
  14632 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
  14633 	MOVOU (R9), X0
  14634 	MOVOU -16(R9)(R8*1), X1
  14635 	MOVOU X0, (AX)
  14636 	MOVOU X1, -16(AX)(R8*1)
  14637 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
  14638 
  14639 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
  14640 	MOVOU (R9), X0
  14641 	MOVOU 16(R9), X1
  14642 	MOVOU -32(R9)(R8*1), X2
  14643 	MOVOU -16(R9)(R8*1), X3
  14644 	MOVOU X0, (AX)
  14645 	MOVOU X1, 16(AX)
  14646 	MOVOU X2, -32(AX)(R8*1)
  14647 	MOVOU X3, -16(AX)(R8*1)
  14648 
  14649 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
  14650 	MOVQ BX, AX
  14651 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
  14652 
  14653 memmove_long_match_emit_encodeSnappyBetterBlockAsm:
  14654 	LEAQ (AX)(R8*1), BX
  14655 
  14656 	// genMemMoveLong
  14657 	MOVOU (R9), X0
  14658 	MOVOU 16(R9), X1
  14659 	MOVOU -32(R9)(R8*1), X2
  14660 	MOVOU -16(R9)(R8*1), X3
  14661 	MOVQ  R8, R12
  14662 	SHRQ  $0x05, R12
  14663 	MOVQ  AX, R10
  14664 	ANDL  $0x0000001f, R10
  14665 	MOVQ  $0x00000040, R13
  14666 	SUBQ  R10, R13
  14667 	DECQ  R12
  14668 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  14669 	LEAQ  -32(R9)(R13*1), R10
  14670 	LEAQ  -32(AX)(R13*1), R14
  14671 
  14672 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
  14673 	MOVOU (R10), X4
  14674 	MOVOU 16(R10), X5
  14675 	MOVOA X4, (R14)
  14676 	MOVOA X5, 16(R14)
  14677 	ADDQ  $0x20, R14
  14678 	ADDQ  $0x20, R10
  14679 	ADDQ  $0x20, R13
  14680 	DECQ  R12
  14681 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
  14682 
  14683 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
  14684 	MOVOU -32(R9)(R13*1), X4
  14685 	MOVOU -16(R9)(R13*1), X5
  14686 	MOVOA X4, -32(AX)(R13*1)
  14687 	MOVOA X5, -16(AX)(R13*1)
  14688 	ADDQ  $0x20, R13
  14689 	CMPQ  R8, R13
  14690 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  14691 	MOVOU X0, (AX)
  14692 	MOVOU X1, 16(AX)
  14693 	MOVOU X2, -32(AX)(R8*1)
  14694 	MOVOU X3, -16(AX)(R8*1)
  14695 	MOVQ  BX, AX
  14696 
  14697 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
  14698 	ADDL R11, CX
  14699 	ADDL $0x04, R11
  14700 	MOVL CX, 12(SP)
  14701 
  14702 	// emitCopy
  14703 	CMPL DI, $0x00010000
  14704 	JB   two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
  14705 
  14706 four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
  14707 	CMPL R11, $0x40
  14708 	JBE  four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
  14709 	MOVB $0xff, (AX)
  14710 	MOVL DI, 1(AX)
  14711 	LEAL -64(R11), R11
  14712 	ADDQ $0x05, AX
  14713 	CMPL R11, $0x04
  14714 	JB   four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
  14715 	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
  14716 
  14717 four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
  14718 	TESTL R11, R11
  14719 	JZ    match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
  14720 	XORL  BX, BX
  14721 	LEAL  -1(BX)(R11*4), R11
  14722 	MOVB  R11, (AX)
  14723 	MOVL  DI, 1(AX)
  14724 	ADDQ  $0x05, AX
  14725 	JMP   match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
  14726 
  14727 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
  14728 	CMPL R11, $0x40
  14729 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
  14730 	MOVB $0xee, (AX)
  14731 	MOVW DI, 1(AX)
  14732 	LEAL -60(R11), R11
  14733 	ADDQ $0x03, AX
  14734 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
  14735 
  14736 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
  14737 	MOVL R11, BX
  14738 	SHLL $0x02, BX
  14739 	CMPL R11, $0x0c
  14740 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
  14741 	CMPL DI, $0x00000800
  14742 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
  14743 	LEAL -15(BX), BX
  14744 	MOVB DI, 1(AX)
  14745 	SHRL $0x08, DI
  14746 	SHLL $0x05, DI
  14747 	ORL  DI, BX
  14748 	MOVB BL, (AX)
  14749 	ADDQ $0x02, AX
  14750 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
  14751 
  14752 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
  14753 	LEAL -2(BX), BX
  14754 	MOVB BL, (AX)
  14755 	MOVW DI, 1(AX)
  14756 	ADDQ $0x03, AX
  14757 
  14758 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
  14759 	CMPL CX, 8(SP)
  14760 	JAE  emit_remainder_encodeSnappyBetterBlockAsm
  14761 	CMPQ AX, (SP)
  14762 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm
  14763 	MOVQ $0x00000000, ret+48(FP)
  14764 	RET
  14765 
  14766 match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
  14767 	MOVQ  $0x00cf1bbcdcbfa563, BX
  14768 	MOVQ  $0x9e3779b1, DI
  14769 	LEAQ  1(SI), SI
  14770 	LEAQ  -2(CX), R8
  14771 	MOVQ  (DX)(SI*1), R9
  14772 	MOVQ  1(DX)(SI*1), R10
  14773 	MOVQ  (DX)(R8*1), R11
  14774 	MOVQ  1(DX)(R8*1), R12
  14775 	SHLQ  $0x08, R9
  14776 	IMULQ BX, R9
  14777 	SHRQ  $0x2f, R9
  14778 	SHLQ  $0x20, R10
  14779 	IMULQ DI, R10
  14780 	SHRQ  $0x32, R10
  14781 	SHLQ  $0x08, R11
  14782 	IMULQ BX, R11
  14783 	SHRQ  $0x2f, R11
  14784 	SHLQ  $0x20, R12
  14785 	IMULQ DI, R12
  14786 	SHRQ  $0x32, R12
  14787 	LEAQ  1(SI), DI
  14788 	LEAQ  1(R8), R13
  14789 	MOVL  SI, 24(SP)(R9*4)
  14790 	MOVL  R8, 24(SP)(R11*4)
  14791 	MOVL  DI, 524312(SP)(R10*4)
  14792 	MOVL  R13, 524312(SP)(R12*4)
  14793 	ADDQ  $0x01, SI
  14794 	SUBQ  $0x01, R8
  14795 
  14796 index_loop_encodeSnappyBetterBlockAsm:
  14797 	CMPQ  SI, R8
  14798 	JAE   search_loop_encodeSnappyBetterBlockAsm
  14799 	MOVQ  (DX)(SI*1), DI
  14800 	MOVQ  (DX)(R8*1), R9
  14801 	SHLQ  $0x08, DI
  14802 	IMULQ BX, DI
  14803 	SHRQ  $0x2f, DI
  14804 	SHLQ  $0x08, R9
  14805 	IMULQ BX, R9
  14806 	SHRQ  $0x2f, R9
  14807 	MOVL  SI, 24(SP)(DI*4)
  14808 	MOVL  R8, 24(SP)(R9*4)
  14809 	ADDQ  $0x02, SI
  14810 	SUBQ  $0x02, R8
  14811 	JMP   index_loop_encodeSnappyBetterBlockAsm
  14812 
  14813 emit_remainder_encodeSnappyBetterBlockAsm:
  14814 	MOVQ src_len+32(FP), CX
  14815 	SUBL 12(SP), CX
  14816 	LEAQ 5(AX)(CX*1), CX
  14817 	CMPQ CX, (SP)
  14818 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm
  14819 	MOVQ $0x00000000, ret+48(FP)
  14820 	RET
  14821 
  14822 emit_remainder_ok_encodeSnappyBetterBlockAsm:
  14823 	MOVQ src_len+32(FP), CX
  14824 	MOVL 12(SP), BX
  14825 	CMPL BX, CX
  14826 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
  14827 	MOVL CX, SI
  14828 	MOVL CX, 12(SP)
  14829 	LEAQ (DX)(BX*1), CX
  14830 	SUBL BX, SI
  14831 	LEAL -1(SI), DX
  14832 	CMPL DX, $0x3c
  14833 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm
  14834 	CMPL DX, $0x00000100
  14835 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
  14836 	CMPL DX, $0x00010000
  14837 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
  14838 	CMPL DX, $0x01000000
  14839 	JB   four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
  14840 	MOVB $0xfc, (AX)
  14841 	MOVL DX, 1(AX)
  14842 	ADDQ $0x05, AX
  14843 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  14844 
  14845 four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
  14846 	MOVL DX, BX
  14847 	SHRL $0x10, BX
  14848 	MOVB $0xf8, (AX)
  14849 	MOVW DX, 1(AX)
  14850 	MOVB BL, 3(AX)
  14851 	ADDQ $0x04, AX
  14852 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  14853 
  14854 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
  14855 	MOVB $0xf4, (AX)
  14856 	MOVW DX, 1(AX)
  14857 	ADDQ $0x03, AX
  14858 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  14859 
  14860 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
  14861 	MOVB $0xf0, (AX)
  14862 	MOVB DL, 1(AX)
  14863 	ADDQ $0x02, AX
  14864 	CMPL DX, $0x40
  14865 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm
  14866 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  14867 
  14868 one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
  14869 	SHLB $0x02, DL
  14870 	MOVB DL, (AX)
  14871 	ADDQ $0x01, AX
  14872 
  14873 memmove_emit_remainder_encodeSnappyBetterBlockAsm:
  14874 	LEAQ (AX)(SI*1), DX
  14875 	MOVL SI, BX
  14876 
  14877 	// genMemMoveShort
  14878 	CMPQ BX, $0x03
  14879 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2
  14880 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3
  14881 	CMPQ BX, $0x08
  14882 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7
  14883 	CMPQ BX, $0x10
  14884 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
  14885 	CMPQ BX, $0x20
  14886 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
  14887 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
  14888 
  14889 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
  14890 	MOVB (CX), SI
  14891 	MOVB -1(CX)(BX*1), CL
  14892 	MOVB SI, (AX)
  14893 	MOVB CL, -1(AX)(BX*1)
  14894 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  14895 
  14896 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
  14897 	MOVW (CX), SI
  14898 	MOVB 2(CX), CL
  14899 	MOVW SI, (AX)
  14900 	MOVB CL, 2(AX)
  14901 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  14902 
  14903 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
  14904 	MOVL (CX), SI
  14905 	MOVL -4(CX)(BX*1), CX
  14906 	MOVL SI, (AX)
  14907 	MOVL CX, -4(AX)(BX*1)
  14908 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  14909 
  14910 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
  14911 	MOVQ (CX), SI
  14912 	MOVQ -8(CX)(BX*1), CX
  14913 	MOVQ SI, (AX)
  14914 	MOVQ CX, -8(AX)(BX*1)
  14915 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  14916 
  14917 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
  14918 	MOVOU (CX), X0
  14919 	MOVOU -16(CX)(BX*1), X1
  14920 	MOVOU X0, (AX)
  14921 	MOVOU X1, -16(AX)(BX*1)
  14922 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  14923 
  14924 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
  14925 	MOVOU (CX), X0
  14926 	MOVOU 16(CX), X1
  14927 	MOVOU -32(CX)(BX*1), X2
  14928 	MOVOU -16(CX)(BX*1), X3
  14929 	MOVOU X0, (AX)
  14930 	MOVOU X1, 16(AX)
  14931 	MOVOU X2, -32(AX)(BX*1)
  14932 	MOVOU X3, -16(AX)(BX*1)
  14933 
  14934 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
  14935 	MOVQ DX, AX
  14936 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
  14937 
  14938 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
  14939 	LEAQ (AX)(SI*1), DX
  14940 	MOVL SI, BX
  14941 
  14942 	// genMemMoveLong
  14943 	MOVOU (CX), X0
  14944 	MOVOU 16(CX), X1
  14945 	MOVOU -32(CX)(BX*1), X2
  14946 	MOVOU -16(CX)(BX*1), X3
  14947 	MOVQ  BX, DI
  14948 	SHRQ  $0x05, DI
  14949 	MOVQ  AX, SI
  14950 	ANDL  $0x0000001f, SI
  14951 	MOVQ  $0x00000040, R8
  14952 	SUBQ  SI, R8
  14953 	DECQ  DI
  14954 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  14955 	LEAQ  -32(CX)(R8*1), SI
  14956 	LEAQ  -32(AX)(R8*1), R9
  14957 
  14958 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
  14959 	MOVOU (SI), X4
  14960 	MOVOU 16(SI), X5
  14961 	MOVOA X4, (R9)
  14962 	MOVOA X5, 16(R9)
  14963 	ADDQ  $0x20, R9
  14964 	ADDQ  $0x20, SI
  14965 	ADDQ  $0x20, R8
  14966 	DECQ  DI
  14967 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
  14968 
  14969 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
  14970 	MOVOU -32(CX)(R8*1), X4
  14971 	MOVOU -16(CX)(R8*1), X5
  14972 	MOVOA X4, -32(AX)(R8*1)
  14973 	MOVOA X5, -16(AX)(R8*1)
  14974 	ADDQ  $0x20, R8
  14975 	CMPQ  BX, R8
  14976 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  14977 	MOVOU X0, (AX)
  14978 	MOVOU X1, 16(AX)
  14979 	MOVOU X2, -32(AX)(BX*1)
  14980 	MOVOU X3, -16(AX)(BX*1)
  14981 	MOVQ  DX, AX
  14982 
  14983 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
  14984 	MOVQ dst_base+0(FP), CX
  14985 	SUBQ CX, AX
  14986 	MOVQ AX, ret+48(FP)
  14987 	RET
  14988 
  14989 // func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
  14990 // Requires: BMI, SSE2
  14991 TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56
  14992 	MOVQ dst_base+0(FP), AX
  14993 	MOVQ $0x00000a00, CX
  14994 	LEAQ 24(SP), DX
  14995 	PXOR X0, X0
  14996 
  14997 zero_loop_encodeSnappyBetterBlockAsm64K:
  14998 	MOVOU X0, (DX)
  14999 	MOVOU X0, 16(DX)
  15000 	MOVOU X0, 32(DX)
  15001 	MOVOU X0, 48(DX)
  15002 	MOVOU X0, 64(DX)
  15003 	MOVOU X0, 80(DX)
  15004 	MOVOU X0, 96(DX)
  15005 	MOVOU X0, 112(DX)
  15006 	ADDQ  $0x80, DX
  15007 	DECQ  CX
  15008 	JNZ   zero_loop_encodeSnappyBetterBlockAsm64K
  15009 	MOVL  $0x00000000, 12(SP)
  15010 	MOVQ  src_len+32(FP), CX
  15011 	LEAQ  -9(CX), DX
  15012 	LEAQ  -8(CX), BX
  15013 	MOVL  BX, 8(SP)
  15014 	SHRQ  $0x05, CX
  15015 	SUBL  CX, DX
  15016 	LEAQ  (AX)(DX*1), DX
  15017 	MOVQ  DX, (SP)
  15018 	MOVL  $0x00000001, CX
  15019 	MOVL  $0x00000000, 16(SP)
  15020 	MOVQ  src_base+24(FP), DX
  15021 
  15022 search_loop_encodeSnappyBetterBlockAsm64K:
  15023 	MOVL  CX, BX
  15024 	SUBL  12(SP), BX
  15025 	SHRL  $0x07, BX
  15026 	LEAL  1(CX)(BX*1), BX
  15027 	CMPL  BX, 8(SP)
  15028 	JAE   emit_remainder_encodeSnappyBetterBlockAsm64K
  15029 	MOVQ  (DX)(CX*1), SI
  15030 	MOVL  BX, 20(SP)
  15031 	MOVQ  $0x00cf1bbcdcbfa563, R8
  15032 	MOVQ  $0x9e3779b1, BX
  15033 	MOVQ  SI, R9
  15034 	MOVQ  SI, R10
  15035 	SHLQ  $0x08, R9
  15036 	IMULQ R8, R9
  15037 	SHRQ  $0x30, R9
  15038 	SHLQ  $0x20, R10
  15039 	IMULQ BX, R10
  15040 	SHRQ  $0x32, R10
  15041 	MOVL  24(SP)(R9*4), BX
  15042 	MOVL  262168(SP)(R10*4), DI
  15043 	MOVL  CX, 24(SP)(R9*4)
  15044 	MOVL  CX, 262168(SP)(R10*4)
  15045 	MOVQ  (DX)(BX*1), R9
  15046 	MOVQ  (DX)(DI*1), R10
  15047 	CMPQ  R9, SI
  15048 	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
  15049 	CMPQ  R10, SI
  15050 	JNE   no_short_found_encodeSnappyBetterBlockAsm64K
  15051 	MOVL  DI, BX
  15052 	JMP   candidate_match_encodeSnappyBetterBlockAsm64K
  15053 
  15054 no_short_found_encodeSnappyBetterBlockAsm64K:
  15055 	CMPL R9, SI
  15056 	JEQ  candidate_match_encodeSnappyBetterBlockAsm64K
  15057 	CMPL R10, SI
  15058 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm64K
  15059 	MOVL 20(SP), CX
  15060 	JMP  search_loop_encodeSnappyBetterBlockAsm64K
  15061 
  15062 candidateS_match_encodeSnappyBetterBlockAsm64K:
  15063 	SHRQ  $0x08, SI
  15064 	MOVQ  SI, R9
  15065 	SHLQ  $0x08, R9
  15066 	IMULQ R8, R9
  15067 	SHRQ  $0x30, R9
  15068 	MOVL  24(SP)(R9*4), BX
  15069 	INCL  CX
  15070 	MOVL  CX, 24(SP)(R9*4)
  15071 	CMPL  (DX)(BX*1), SI
  15072 	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
  15073 	DECL  CX
  15074 	MOVL  DI, BX
  15075 
  15076 candidate_match_encodeSnappyBetterBlockAsm64K:
  15077 	MOVL  12(SP), SI
  15078 	TESTL BX, BX
  15079 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm64K
  15080 
  15081 match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
  15082 	CMPL CX, SI
  15083 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
  15084 	MOVB -1(DX)(BX*1), DI
  15085 	MOVB -1(DX)(CX*1), R8
  15086 	CMPB DI, R8
  15087 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
  15088 	LEAL -1(CX), CX
  15089 	DECL BX
  15090 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm64K
  15091 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm64K
  15092 
  15093 match_extend_back_end_encodeSnappyBetterBlockAsm64K:
  15094 	MOVL CX, SI
  15095 	SUBL 12(SP), SI
  15096 	LEAQ 3(AX)(SI*1), SI
  15097 	CMPQ SI, (SP)
  15098 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm64K
  15099 	MOVQ $0x00000000, ret+48(FP)
  15100 	RET
  15101 
  15102 match_dst_size_check_encodeSnappyBetterBlockAsm64K:
  15103 	MOVL CX, SI
  15104 	ADDL $0x04, CX
  15105 	ADDL $0x04, BX
  15106 	MOVQ src_len+32(FP), DI
  15107 	SUBL CX, DI
  15108 	LEAQ (DX)(CX*1), R8
  15109 	LEAQ (DX)(BX*1), R9
  15110 
  15111 	// matchLen
  15112 	XORL R11, R11
  15113 	CMPL DI, $0x08
  15114 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
  15115 
  15116 matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K:
  15117 	MOVQ  (R8)(R11*1), R10
  15118 	XORQ  (R9)(R11*1), R10
  15119 	TESTQ R10, R10
  15120 	JZ    matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K
  15121 
  15122 #ifdef GOAMD64_v3
  15123 	TZCNTQ R10, R10
  15124 
  15125 #else
  15126 	BSFQ R10, R10
  15127 
  15128 #endif
  15129 	SARQ $0x03, R10
  15130 	LEAL (R11)(R10*1), R11
  15131 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
  15132 
  15133 matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K:
  15134 	LEAL -8(DI), DI
  15135 	LEAL 8(R11), R11
  15136 	CMPL DI, $0x08
  15137 	JAE  matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K
  15138 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm64K
  15139 
  15140 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
  15141 	CMPL DI, $0x04
  15142 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
  15143 	MOVL (R8)(R11*1), R10
  15144 	CMPL (R9)(R11*1), R10
  15145 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
  15146 	SUBL $0x04, DI
  15147 	LEAL 4(R11), R11
  15148 
  15149 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
  15150 	CMPL DI, $0x02
  15151 	JB   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
  15152 	MOVW (R8)(R11*1), R10
  15153 	CMPW (R9)(R11*1), R10
  15154 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
  15155 	SUBL $0x02, DI
  15156 	LEAL 2(R11), R11
  15157 
  15158 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
  15159 	CMPL DI, $0x01
  15160 	JB   match_nolit_end_encodeSnappyBetterBlockAsm64K
  15161 	MOVB (R8)(R11*1), R10
  15162 	CMPB (R9)(R11*1), R10
  15163 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm64K
  15164 	LEAL 1(R11), R11
  15165 
  15166 match_nolit_end_encodeSnappyBetterBlockAsm64K:
  15167 	MOVL CX, DI
  15168 	SUBL BX, DI
  15169 
  15170 	// Check if repeat
  15171 	MOVL DI, 16(SP)
  15172 	MOVL 12(SP), BX
  15173 	CMPL BX, SI
  15174 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
  15175 	MOVL SI, R8
  15176 	MOVL SI, 12(SP)
  15177 	LEAQ (DX)(BX*1), R9
  15178 	SUBL BX, R8
  15179 	LEAL -1(R8), BX
  15180 	CMPL BX, $0x3c
  15181 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm64K
  15182 	CMPL BX, $0x00000100
  15183 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
  15184 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
  15185 
  15186 three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
  15187 	MOVB $0xf4, (AX)
  15188 	MOVW BX, 1(AX)
  15189 	ADDQ $0x03, AX
  15190 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
  15191 
  15192 two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
  15193 	MOVB $0xf0, (AX)
  15194 	MOVB BL, 1(AX)
  15195 	ADDQ $0x02, AX
  15196 	CMPL BX, $0x40
  15197 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm64K
  15198 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
  15199 
  15200 one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
  15201 	SHLB $0x02, BL
  15202 	MOVB BL, (AX)
  15203 	ADDQ $0x01, AX
  15204 
  15205 memmove_match_emit_encodeSnappyBetterBlockAsm64K:
  15206 	LEAQ (AX)(R8*1), BX
  15207 
  15208 	// genMemMoveShort
  15209 	CMPQ R8, $0x08
  15210 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
  15211 	CMPQ R8, $0x10
  15212 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
  15213 	CMPQ R8, $0x20
  15214 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
  15215 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
  15216 
  15217 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
  15218 	MOVQ (R9), R10
  15219 	MOVQ R10, (AX)
  15220 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
  15221 
  15222 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
  15223 	MOVQ (R9), R10
  15224 	MOVQ -8(R9)(R8*1), R9
  15225 	MOVQ R10, (AX)
  15226 	MOVQ R9, -8(AX)(R8*1)
  15227 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
  15228 
  15229 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
  15230 	MOVOU (R9), X0
  15231 	MOVOU -16(R9)(R8*1), X1
  15232 	MOVOU X0, (AX)
  15233 	MOVOU X1, -16(AX)(R8*1)
  15234 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
  15235 
  15236 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
  15237 	MOVOU (R9), X0
  15238 	MOVOU 16(R9), X1
  15239 	MOVOU -32(R9)(R8*1), X2
  15240 	MOVOU -16(R9)(R8*1), X3
  15241 	MOVOU X0, (AX)
  15242 	MOVOU X1, 16(AX)
  15243 	MOVOU X2, -32(AX)(R8*1)
  15244 	MOVOU X3, -16(AX)(R8*1)
  15245 
  15246 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
  15247 	MOVQ BX, AX
  15248 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
  15249 
  15250 memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
  15251 	LEAQ (AX)(R8*1), BX
  15252 
  15253 	// genMemMoveLong
  15254 	MOVOU (R9), X0
  15255 	MOVOU 16(R9), X1
  15256 	MOVOU -32(R9)(R8*1), X2
  15257 	MOVOU -16(R9)(R8*1), X3
  15258 	MOVQ  R8, R12
  15259 	SHRQ  $0x05, R12
  15260 	MOVQ  AX, R10
  15261 	ANDL  $0x0000001f, R10
  15262 	MOVQ  $0x00000040, R13
  15263 	SUBQ  R10, R13
  15264 	DECQ  R12
  15265 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  15266 	LEAQ  -32(R9)(R13*1), R10
  15267 	LEAQ  -32(AX)(R13*1), R14
  15268 
  15269 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
  15270 	MOVOU (R10), X4
  15271 	MOVOU 16(R10), X5
  15272 	MOVOA X4, (R14)
  15273 	MOVOA X5, 16(R14)
  15274 	ADDQ  $0x20, R14
  15275 	ADDQ  $0x20, R10
  15276 	ADDQ  $0x20, R13
  15277 	DECQ  R12
  15278 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
  15279 
  15280 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
  15281 	MOVOU -32(R9)(R13*1), X4
  15282 	MOVOU -16(R9)(R13*1), X5
  15283 	MOVOA X4, -32(AX)(R13*1)
  15284 	MOVOA X5, -16(AX)(R13*1)
  15285 	ADDQ  $0x20, R13
  15286 	CMPQ  R8, R13
  15287 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  15288 	MOVOU X0, (AX)
  15289 	MOVOU X1, 16(AX)
  15290 	MOVOU X2, -32(AX)(R8*1)
  15291 	MOVOU X3, -16(AX)(R8*1)
  15292 	MOVQ  BX, AX
  15293 
  15294 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
  15295 	ADDL R11, CX
  15296 	ADDL $0x04, R11
  15297 	MOVL CX, 12(SP)
  15298 
  15299 	// emitCopy
  15300 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
  15301 	CMPL R11, $0x40
  15302 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
  15303 	MOVB $0xee, (AX)
  15304 	MOVW DI, 1(AX)
  15305 	LEAL -60(R11), R11
  15306 	ADDQ $0x03, AX
  15307 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
  15308 
  15309 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
  15310 	MOVL R11, BX
  15311 	SHLL $0x02, BX
  15312 	CMPL R11, $0x0c
  15313 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
  15314 	CMPL DI, $0x00000800
  15315 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
  15316 	LEAL -15(BX), BX
  15317 	MOVB DI, 1(AX)
  15318 	SHRL $0x08, DI
  15319 	SHLL $0x05, DI
  15320 	ORL  DI, BX
  15321 	MOVB BL, (AX)
  15322 	ADDQ $0x02, AX
  15323 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
  15324 
  15325 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
  15326 	LEAL -2(BX), BX
  15327 	MOVB BL, (AX)
  15328 	MOVW DI, 1(AX)
  15329 	ADDQ $0x03, AX
  15330 
  15331 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
  15332 	CMPL CX, 8(SP)
  15333 	JAE  emit_remainder_encodeSnappyBetterBlockAsm64K
  15334 	CMPQ AX, (SP)
  15335 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
  15336 	MOVQ $0x00000000, ret+48(FP)
  15337 	RET
  15338 
  15339 match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
  15340 	MOVQ  $0x00cf1bbcdcbfa563, BX
  15341 	MOVQ  $0x9e3779b1, DI
  15342 	LEAQ  1(SI), SI
  15343 	LEAQ  -2(CX), R8
  15344 	MOVQ  (DX)(SI*1), R9
  15345 	MOVQ  1(DX)(SI*1), R10
  15346 	MOVQ  (DX)(R8*1), R11
  15347 	MOVQ  1(DX)(R8*1), R12
  15348 	SHLQ  $0x08, R9
  15349 	IMULQ BX, R9
  15350 	SHRQ  $0x30, R9
  15351 	SHLQ  $0x20, R10
  15352 	IMULQ DI, R10
  15353 	SHRQ  $0x32, R10
  15354 	SHLQ  $0x08, R11
  15355 	IMULQ BX, R11
  15356 	SHRQ  $0x30, R11
  15357 	SHLQ  $0x20, R12
  15358 	IMULQ DI, R12
  15359 	SHRQ  $0x32, R12
  15360 	LEAQ  1(SI), DI
  15361 	LEAQ  1(R8), R13
  15362 	MOVL  SI, 24(SP)(R9*4)
  15363 	MOVL  R8, 24(SP)(R11*4)
  15364 	MOVL  DI, 262168(SP)(R10*4)
  15365 	MOVL  R13, 262168(SP)(R12*4)
  15366 	ADDQ  $0x01, SI
  15367 	SUBQ  $0x01, R8
  15368 
  15369 index_loop_encodeSnappyBetterBlockAsm64K:
  15370 	CMPQ  SI, R8
  15371 	JAE   search_loop_encodeSnappyBetterBlockAsm64K
  15372 	MOVQ  (DX)(SI*1), DI
  15373 	MOVQ  (DX)(R8*1), R9
  15374 	SHLQ  $0x08, DI
  15375 	IMULQ BX, DI
  15376 	SHRQ  $0x30, DI
  15377 	SHLQ  $0x08, R9
  15378 	IMULQ BX, R9
  15379 	SHRQ  $0x30, R9
  15380 	MOVL  SI, 24(SP)(DI*4)
  15381 	MOVL  R8, 24(SP)(R9*4)
  15382 	ADDQ  $0x02, SI
  15383 	SUBQ  $0x02, R8
  15384 	JMP   index_loop_encodeSnappyBetterBlockAsm64K
  15385 
  15386 emit_remainder_encodeSnappyBetterBlockAsm64K:
  15387 	MOVQ src_len+32(FP), CX
  15388 	SUBL 12(SP), CX
  15389 	LEAQ 3(AX)(CX*1), CX
  15390 	CMPQ CX, (SP)
  15391 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm64K
  15392 	MOVQ $0x00000000, ret+48(FP)
  15393 	RET
  15394 
  15395 emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
  15396 	MOVQ src_len+32(FP), CX
  15397 	MOVL 12(SP), BX
  15398 	CMPL BX, CX
  15399 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
  15400 	MOVL CX, SI
  15401 	MOVL CX, 12(SP)
  15402 	LEAQ (DX)(BX*1), CX
  15403 	SUBL BX, SI
  15404 	LEAL -1(SI), DX
  15405 	CMPL DX, $0x3c
  15406 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
  15407 	CMPL DX, $0x00000100
  15408 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
  15409 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
  15410 
  15411 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15412 	MOVB $0xf4, (AX)
  15413 	MOVW DX, 1(AX)
  15414 	ADDQ $0x03, AX
  15415 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
  15416 
  15417 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15418 	MOVB $0xf0, (AX)
  15419 	MOVB DL, 1(AX)
  15420 	ADDQ $0x02, AX
  15421 	CMPL DX, $0x40
  15422 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
  15423 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
  15424 
  15425 one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15426 	SHLB $0x02, DL
  15427 	MOVB DL, (AX)
  15428 	ADDQ $0x01, AX
  15429 
  15430 memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15431 	LEAQ (AX)(SI*1), DX
  15432 	MOVL SI, BX
  15433 
  15434 	// genMemMoveShort
  15435 	CMPQ BX, $0x03
  15436 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2
  15437 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3
  15438 	CMPQ BX, $0x08
  15439 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7
  15440 	CMPQ BX, $0x10
  15441 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
  15442 	CMPQ BX, $0x20
  15443 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
  15444 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
  15445 
  15446 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
  15447 	MOVB (CX), SI
  15448 	MOVB -1(CX)(BX*1), CL
  15449 	MOVB SI, (AX)
  15450 	MOVB CL, -1(AX)(BX*1)
  15451 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  15452 
  15453 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
  15454 	MOVW (CX), SI
  15455 	MOVB 2(CX), CL
  15456 	MOVW SI, (AX)
  15457 	MOVB CL, 2(AX)
  15458 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  15459 
  15460 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
  15461 	MOVL (CX), SI
  15462 	MOVL -4(CX)(BX*1), CX
  15463 	MOVL SI, (AX)
  15464 	MOVL CX, -4(AX)(BX*1)
  15465 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  15466 
  15467 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
  15468 	MOVQ (CX), SI
  15469 	MOVQ -8(CX)(BX*1), CX
  15470 	MOVQ SI, (AX)
  15471 	MOVQ CX, -8(AX)(BX*1)
  15472 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  15473 
  15474 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
  15475 	MOVOU (CX), X0
  15476 	MOVOU -16(CX)(BX*1), X1
  15477 	MOVOU X0, (AX)
  15478 	MOVOU X1, -16(AX)(BX*1)
  15479 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  15480 
  15481 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
  15482 	MOVOU (CX), X0
  15483 	MOVOU 16(CX), X1
  15484 	MOVOU -32(CX)(BX*1), X2
  15485 	MOVOU -16(CX)(BX*1), X3
  15486 	MOVOU X0, (AX)
  15487 	MOVOU X1, 16(AX)
  15488 	MOVOU X2, -32(AX)(BX*1)
  15489 	MOVOU X3, -16(AX)(BX*1)
  15490 
  15491 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15492 	MOVQ DX, AX
  15493 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
  15494 
  15495 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15496 	LEAQ (AX)(SI*1), DX
  15497 	MOVL SI, BX
  15498 
  15499 	// genMemMoveLong
  15500 	MOVOU (CX), X0
  15501 	MOVOU 16(CX), X1
  15502 	MOVOU -32(CX)(BX*1), X2
  15503 	MOVOU -16(CX)(BX*1), X3
  15504 	MOVQ  BX, DI
  15505 	SHRQ  $0x05, DI
  15506 	MOVQ  AX, SI
  15507 	ANDL  $0x0000001f, SI
  15508 	MOVQ  $0x00000040, R8
  15509 	SUBQ  SI, R8
  15510 	DECQ  DI
  15511 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  15512 	LEAQ  -32(CX)(R8*1), SI
  15513 	LEAQ  -32(AX)(R8*1), R9
  15514 
  15515 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
  15516 	MOVOU (SI), X4
  15517 	MOVOU 16(SI), X5
  15518 	MOVOA X4, (R9)
  15519 	MOVOA X5, 16(R9)
  15520 	ADDQ  $0x20, R9
  15521 	ADDQ  $0x20, SI
  15522 	ADDQ  $0x20, R8
  15523 	DECQ  DI
  15524 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
  15525 
  15526 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
  15527 	MOVOU -32(CX)(R8*1), X4
  15528 	MOVOU -16(CX)(R8*1), X5
  15529 	MOVOA X4, -32(AX)(R8*1)
  15530 	MOVOA X5, -16(AX)(R8*1)
  15531 	ADDQ  $0x20, R8
  15532 	CMPQ  BX, R8
  15533 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  15534 	MOVOU X0, (AX)
  15535 	MOVOU X1, 16(AX)
  15536 	MOVOU X2, -32(AX)(BX*1)
  15537 	MOVOU X3, -16(AX)(BX*1)
  15538 	MOVQ  DX, AX
  15539 
  15540 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
  15541 	MOVQ dst_base+0(FP), CX
  15542 	SUBQ CX, AX
  15543 	MOVQ AX, ret+48(FP)
  15544 	RET
  15545 
  15546 // func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
  15547 // Requires: BMI, SSE2
  15548 TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56
  15549 	MOVQ dst_base+0(FP), AX
  15550 	MOVQ $0x00000280, CX
  15551 	LEAQ 24(SP), DX
  15552 	PXOR X0, X0
  15553 
  15554 zero_loop_encodeSnappyBetterBlockAsm12B:
  15555 	MOVOU X0, (DX)
  15556 	MOVOU X0, 16(DX)
  15557 	MOVOU X0, 32(DX)
  15558 	MOVOU X0, 48(DX)
  15559 	MOVOU X0, 64(DX)
  15560 	MOVOU X0, 80(DX)
  15561 	MOVOU X0, 96(DX)
  15562 	MOVOU X0, 112(DX)
  15563 	ADDQ  $0x80, DX
  15564 	DECQ  CX
  15565 	JNZ   zero_loop_encodeSnappyBetterBlockAsm12B
  15566 	MOVL  $0x00000000, 12(SP)
  15567 	MOVQ  src_len+32(FP), CX
  15568 	LEAQ  -9(CX), DX
  15569 	LEAQ  -8(CX), BX
  15570 	MOVL  BX, 8(SP)
  15571 	SHRQ  $0x05, CX
  15572 	SUBL  CX, DX
  15573 	LEAQ  (AX)(DX*1), DX
  15574 	MOVQ  DX, (SP)
  15575 	MOVL  $0x00000001, CX
  15576 	MOVL  $0x00000000, 16(SP)
  15577 	MOVQ  src_base+24(FP), DX
  15578 
  15579 search_loop_encodeSnappyBetterBlockAsm12B:
  15580 	MOVL  CX, BX
  15581 	SUBL  12(SP), BX
  15582 	SHRL  $0x06, BX
  15583 	LEAL  1(CX)(BX*1), BX
  15584 	CMPL  BX, 8(SP)
  15585 	JAE   emit_remainder_encodeSnappyBetterBlockAsm12B
  15586 	MOVQ  (DX)(CX*1), SI
  15587 	MOVL  BX, 20(SP)
  15588 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  15589 	MOVQ  $0x9e3779b1, BX
  15590 	MOVQ  SI, R9
  15591 	MOVQ  SI, R10
  15592 	SHLQ  $0x10, R9
  15593 	IMULQ R8, R9
  15594 	SHRQ  $0x32, R9
  15595 	SHLQ  $0x20, R10
  15596 	IMULQ BX, R10
  15597 	SHRQ  $0x34, R10
  15598 	MOVL  24(SP)(R9*4), BX
  15599 	MOVL  65560(SP)(R10*4), DI
  15600 	MOVL  CX, 24(SP)(R9*4)
  15601 	MOVL  CX, 65560(SP)(R10*4)
  15602 	MOVQ  (DX)(BX*1), R9
  15603 	MOVQ  (DX)(DI*1), R10
  15604 	CMPQ  R9, SI
  15605 	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
  15606 	CMPQ  R10, SI
  15607 	JNE   no_short_found_encodeSnappyBetterBlockAsm12B
  15608 	MOVL  DI, BX
  15609 	JMP   candidate_match_encodeSnappyBetterBlockAsm12B
  15610 
  15611 no_short_found_encodeSnappyBetterBlockAsm12B:
  15612 	CMPL R9, SI
  15613 	JEQ  candidate_match_encodeSnappyBetterBlockAsm12B
  15614 	CMPL R10, SI
  15615 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm12B
  15616 	MOVL 20(SP), CX
  15617 	JMP  search_loop_encodeSnappyBetterBlockAsm12B
  15618 
  15619 candidateS_match_encodeSnappyBetterBlockAsm12B:
  15620 	SHRQ  $0x08, SI
  15621 	MOVQ  SI, R9
  15622 	SHLQ  $0x10, R9
  15623 	IMULQ R8, R9
  15624 	SHRQ  $0x32, R9
  15625 	MOVL  24(SP)(R9*4), BX
  15626 	INCL  CX
  15627 	MOVL  CX, 24(SP)(R9*4)
  15628 	CMPL  (DX)(BX*1), SI
  15629 	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
  15630 	DECL  CX
  15631 	MOVL  DI, BX
  15632 
  15633 candidate_match_encodeSnappyBetterBlockAsm12B:
  15634 	MOVL  12(SP), SI
  15635 	TESTL BX, BX
  15636 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm12B
  15637 
  15638 match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
  15639 	CMPL CX, SI
  15640 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
  15641 	MOVB -1(DX)(BX*1), DI
  15642 	MOVB -1(DX)(CX*1), R8
  15643 	CMPB DI, R8
  15644 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
  15645 	LEAL -1(CX), CX
  15646 	DECL BX
  15647 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm12B
  15648 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm12B
  15649 
  15650 match_extend_back_end_encodeSnappyBetterBlockAsm12B:
  15651 	MOVL CX, SI
  15652 	SUBL 12(SP), SI
  15653 	LEAQ 3(AX)(SI*1), SI
  15654 	CMPQ SI, (SP)
  15655 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm12B
  15656 	MOVQ $0x00000000, ret+48(FP)
  15657 	RET
  15658 
  15659 match_dst_size_check_encodeSnappyBetterBlockAsm12B:
  15660 	MOVL CX, SI
  15661 	ADDL $0x04, CX
  15662 	ADDL $0x04, BX
  15663 	MOVQ src_len+32(FP), DI
  15664 	SUBL CX, DI
  15665 	LEAQ (DX)(CX*1), R8
  15666 	LEAQ (DX)(BX*1), R9
  15667 
  15668 	// matchLen
  15669 	XORL R11, R11
  15670 	CMPL DI, $0x08
  15671 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
  15672 
  15673 matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B:
  15674 	MOVQ  (R8)(R11*1), R10
  15675 	XORQ  (R9)(R11*1), R10
  15676 	TESTQ R10, R10
  15677 	JZ    matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B
  15678 
  15679 #ifdef GOAMD64_v3
  15680 	TZCNTQ R10, R10
  15681 
  15682 #else
  15683 	BSFQ R10, R10
  15684 
  15685 #endif
  15686 	SARQ $0x03, R10
  15687 	LEAL (R11)(R10*1), R11
  15688 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
  15689 
  15690 matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B:
  15691 	LEAL -8(DI), DI
  15692 	LEAL 8(R11), R11
  15693 	CMPL DI, $0x08
  15694 	JAE  matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B
  15695 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm12B
  15696 
  15697 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
  15698 	CMPL DI, $0x04
  15699 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
  15700 	MOVL (R8)(R11*1), R10
  15701 	CMPL (R9)(R11*1), R10
  15702 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
  15703 	SUBL $0x04, DI
  15704 	LEAL 4(R11), R11
  15705 
  15706 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
  15707 	CMPL DI, $0x02
  15708 	JB   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
  15709 	MOVW (R8)(R11*1), R10
  15710 	CMPW (R9)(R11*1), R10
  15711 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
  15712 	SUBL $0x02, DI
  15713 	LEAL 2(R11), R11
  15714 
  15715 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
  15716 	CMPL DI, $0x01
  15717 	JB   match_nolit_end_encodeSnappyBetterBlockAsm12B
  15718 	MOVB (R8)(R11*1), R10
  15719 	CMPB (R9)(R11*1), R10
  15720 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm12B
  15721 	LEAL 1(R11), R11
  15722 
  15723 match_nolit_end_encodeSnappyBetterBlockAsm12B:
  15724 	MOVL CX, DI
  15725 	SUBL BX, DI
  15726 
  15727 	// Check if repeat
  15728 	MOVL DI, 16(SP)
  15729 	MOVL 12(SP), BX
  15730 	CMPL BX, SI
  15731 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
  15732 	MOVL SI, R8
  15733 	MOVL SI, 12(SP)
  15734 	LEAQ (DX)(BX*1), R9
  15735 	SUBL BX, R8
  15736 	LEAL -1(R8), BX
  15737 	CMPL BX, $0x3c
  15738 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm12B
  15739 	CMPL BX, $0x00000100
  15740 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
  15741 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
  15742 
  15743 three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
  15744 	MOVB $0xf4, (AX)
  15745 	MOVW BX, 1(AX)
  15746 	ADDQ $0x03, AX
  15747 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
  15748 
  15749 two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
  15750 	MOVB $0xf0, (AX)
  15751 	MOVB BL, 1(AX)
  15752 	ADDQ $0x02, AX
  15753 	CMPL BX, $0x40
  15754 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm12B
  15755 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
  15756 
  15757 one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
  15758 	SHLB $0x02, BL
  15759 	MOVB BL, (AX)
  15760 	ADDQ $0x01, AX
  15761 
  15762 memmove_match_emit_encodeSnappyBetterBlockAsm12B:
  15763 	LEAQ (AX)(R8*1), BX
  15764 
  15765 	// genMemMoveShort
  15766 	CMPQ R8, $0x08
  15767 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
  15768 	CMPQ R8, $0x10
  15769 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
  15770 	CMPQ R8, $0x20
  15771 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
  15772 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
  15773 
  15774 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
  15775 	MOVQ (R9), R10
  15776 	MOVQ R10, (AX)
  15777 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
  15778 
  15779 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
  15780 	MOVQ (R9), R10
  15781 	MOVQ -8(R9)(R8*1), R9
  15782 	MOVQ R10, (AX)
  15783 	MOVQ R9, -8(AX)(R8*1)
  15784 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
  15785 
  15786 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
  15787 	MOVOU (R9), X0
  15788 	MOVOU -16(R9)(R8*1), X1
  15789 	MOVOU X0, (AX)
  15790 	MOVOU X1, -16(AX)(R8*1)
  15791 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
  15792 
  15793 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
  15794 	MOVOU (R9), X0
  15795 	MOVOU 16(R9), X1
  15796 	MOVOU -32(R9)(R8*1), X2
  15797 	MOVOU -16(R9)(R8*1), X3
  15798 	MOVOU X0, (AX)
  15799 	MOVOU X1, 16(AX)
  15800 	MOVOU X2, -32(AX)(R8*1)
  15801 	MOVOU X3, -16(AX)(R8*1)
  15802 
  15803 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
  15804 	MOVQ BX, AX
  15805 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
  15806 
  15807 memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
  15808 	LEAQ (AX)(R8*1), BX
  15809 
  15810 	// genMemMoveLong
  15811 	MOVOU (R9), X0
  15812 	MOVOU 16(R9), X1
  15813 	MOVOU -32(R9)(R8*1), X2
  15814 	MOVOU -16(R9)(R8*1), X3
  15815 	MOVQ  R8, R12
  15816 	SHRQ  $0x05, R12
  15817 	MOVQ  AX, R10
  15818 	ANDL  $0x0000001f, R10
  15819 	MOVQ  $0x00000040, R13
  15820 	SUBQ  R10, R13
  15821 	DECQ  R12
  15822 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  15823 	LEAQ  -32(R9)(R13*1), R10
  15824 	LEAQ  -32(AX)(R13*1), R14
  15825 
  15826 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
  15827 	MOVOU (R10), X4
  15828 	MOVOU 16(R10), X5
  15829 	MOVOA X4, (R14)
  15830 	MOVOA X5, 16(R14)
  15831 	ADDQ  $0x20, R14
  15832 	ADDQ  $0x20, R10
  15833 	ADDQ  $0x20, R13
  15834 	DECQ  R12
  15835 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
  15836 
  15837 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
  15838 	MOVOU -32(R9)(R13*1), X4
  15839 	MOVOU -16(R9)(R13*1), X5
  15840 	MOVOA X4, -32(AX)(R13*1)
  15841 	MOVOA X5, -16(AX)(R13*1)
  15842 	ADDQ  $0x20, R13
  15843 	CMPQ  R8, R13
  15844 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  15845 	MOVOU X0, (AX)
  15846 	MOVOU X1, 16(AX)
  15847 	MOVOU X2, -32(AX)(R8*1)
  15848 	MOVOU X3, -16(AX)(R8*1)
  15849 	MOVQ  BX, AX
  15850 
  15851 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
  15852 	ADDL R11, CX
  15853 	ADDL $0x04, R11
  15854 	MOVL CX, 12(SP)
  15855 
  15856 	// emitCopy
  15857 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
  15858 	CMPL R11, $0x40
  15859 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
  15860 	MOVB $0xee, (AX)
  15861 	MOVW DI, 1(AX)
  15862 	LEAL -60(R11), R11
  15863 	ADDQ $0x03, AX
  15864 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
  15865 
  15866 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
  15867 	MOVL R11, BX
  15868 	SHLL $0x02, BX
  15869 	CMPL R11, $0x0c
  15870 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
  15871 	CMPL DI, $0x00000800
  15872 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
  15873 	LEAL -15(BX), BX
  15874 	MOVB DI, 1(AX)
  15875 	SHRL $0x08, DI
  15876 	SHLL $0x05, DI
  15877 	ORL  DI, BX
  15878 	MOVB BL, (AX)
  15879 	ADDQ $0x02, AX
  15880 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
  15881 
  15882 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
  15883 	LEAL -2(BX), BX
  15884 	MOVB BL, (AX)
  15885 	MOVW DI, 1(AX)
  15886 	ADDQ $0x03, AX
  15887 
  15888 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
  15889 	CMPL CX, 8(SP)
  15890 	JAE  emit_remainder_encodeSnappyBetterBlockAsm12B
  15891 	CMPQ AX, (SP)
  15892 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
  15893 	MOVQ $0x00000000, ret+48(FP)
  15894 	RET
  15895 
  15896 match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
  15897 	MOVQ  $0x0000cf1bbcdcbf9b, BX
  15898 	MOVQ  $0x9e3779b1, DI
  15899 	LEAQ  1(SI), SI
  15900 	LEAQ  -2(CX), R8
  15901 	MOVQ  (DX)(SI*1), R9
  15902 	MOVQ  1(DX)(SI*1), R10
  15903 	MOVQ  (DX)(R8*1), R11
  15904 	MOVQ  1(DX)(R8*1), R12
  15905 	SHLQ  $0x10, R9
  15906 	IMULQ BX, R9
  15907 	SHRQ  $0x32, R9
  15908 	SHLQ  $0x20, R10
  15909 	IMULQ DI, R10
  15910 	SHRQ  $0x34, R10
  15911 	SHLQ  $0x10, R11
  15912 	IMULQ BX, R11
  15913 	SHRQ  $0x32, R11
  15914 	SHLQ  $0x20, R12
  15915 	IMULQ DI, R12
  15916 	SHRQ  $0x34, R12
  15917 	LEAQ  1(SI), DI
  15918 	LEAQ  1(R8), R13
  15919 	MOVL  SI, 24(SP)(R9*4)
  15920 	MOVL  R8, 24(SP)(R11*4)
  15921 	MOVL  DI, 65560(SP)(R10*4)
  15922 	MOVL  R13, 65560(SP)(R12*4)
  15923 	ADDQ  $0x01, SI
  15924 	SUBQ  $0x01, R8
  15925 
  15926 index_loop_encodeSnappyBetterBlockAsm12B:
  15927 	CMPQ  SI, R8
  15928 	JAE   search_loop_encodeSnappyBetterBlockAsm12B
  15929 	MOVQ  (DX)(SI*1), DI
  15930 	MOVQ  (DX)(R8*1), R9
  15931 	SHLQ  $0x10, DI
  15932 	IMULQ BX, DI
  15933 	SHRQ  $0x32, DI
  15934 	SHLQ  $0x10, R9
  15935 	IMULQ BX, R9
  15936 	SHRQ  $0x32, R9
  15937 	MOVL  SI, 24(SP)(DI*4)
  15938 	MOVL  R8, 24(SP)(R9*4)
  15939 	ADDQ  $0x02, SI
  15940 	SUBQ  $0x02, R8
  15941 	JMP   index_loop_encodeSnappyBetterBlockAsm12B
  15942 
  15943 emit_remainder_encodeSnappyBetterBlockAsm12B:
  15944 	MOVQ src_len+32(FP), CX
  15945 	SUBL 12(SP), CX
  15946 	LEAQ 3(AX)(CX*1), CX
  15947 	CMPQ CX, (SP)
  15948 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm12B
  15949 	MOVQ $0x00000000, ret+48(FP)
  15950 	RET
  15951 
  15952 emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
  15953 	MOVQ src_len+32(FP), CX
  15954 	MOVL 12(SP), BX
  15955 	CMPL BX, CX
  15956 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
  15957 	MOVL CX, SI
  15958 	MOVL CX, 12(SP)
  15959 	LEAQ (DX)(BX*1), CX
  15960 	SUBL BX, SI
  15961 	LEAL -1(SI), DX
  15962 	CMPL DX, $0x3c
  15963 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
  15964 	CMPL DX, $0x00000100
  15965 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
  15966 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
  15967 
  15968 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
  15969 	MOVB $0xf4, (AX)
  15970 	MOVW DX, 1(AX)
  15971 	ADDQ $0x03, AX
  15972 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
  15973 
  15974 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
  15975 	MOVB $0xf0, (AX)
  15976 	MOVB DL, 1(AX)
  15977 	ADDQ $0x02, AX
  15978 	CMPL DX, $0x40
  15979 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
  15980 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
  15981 
  15982 one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
  15983 	SHLB $0x02, DL
  15984 	MOVB DL, (AX)
  15985 	ADDQ $0x01, AX
  15986 
  15987 memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
  15988 	LEAQ (AX)(SI*1), DX
  15989 	MOVL SI, BX
  15990 
  15991 	// genMemMoveShort
  15992 	CMPQ BX, $0x03
  15993 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2
  15994 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3
  15995 	CMPQ BX, $0x08
  15996 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7
  15997 	CMPQ BX, $0x10
  15998 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
  15999 	CMPQ BX, $0x20
  16000 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
  16001 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
  16002 
  16003 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
  16004 	MOVB (CX), SI
  16005 	MOVB -1(CX)(BX*1), CL
  16006 	MOVB SI, (AX)
  16007 	MOVB CL, -1(AX)(BX*1)
  16008 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  16009 
  16010 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
  16011 	MOVW (CX), SI
  16012 	MOVB 2(CX), CL
  16013 	MOVW SI, (AX)
  16014 	MOVB CL, 2(AX)
  16015 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  16016 
  16017 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
  16018 	MOVL (CX), SI
  16019 	MOVL -4(CX)(BX*1), CX
  16020 	MOVL SI, (AX)
  16021 	MOVL CX, -4(AX)(BX*1)
  16022 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  16023 
  16024 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
  16025 	MOVQ (CX), SI
  16026 	MOVQ -8(CX)(BX*1), CX
  16027 	MOVQ SI, (AX)
  16028 	MOVQ CX, -8(AX)(BX*1)
  16029 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  16030 
  16031 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
  16032 	MOVOU (CX), X0
  16033 	MOVOU -16(CX)(BX*1), X1
  16034 	MOVOU X0, (AX)
  16035 	MOVOU X1, -16(AX)(BX*1)
  16036 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  16037 
  16038 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
  16039 	MOVOU (CX), X0
  16040 	MOVOU 16(CX), X1
  16041 	MOVOU -32(CX)(BX*1), X2
  16042 	MOVOU -16(CX)(BX*1), X3
  16043 	MOVOU X0, (AX)
  16044 	MOVOU X1, 16(AX)
  16045 	MOVOU X2, -32(AX)(BX*1)
  16046 	MOVOU X3, -16(AX)(BX*1)
  16047 
  16048 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
  16049 	MOVQ DX, AX
  16050 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
  16051 
  16052 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
  16053 	LEAQ (AX)(SI*1), DX
  16054 	MOVL SI, BX
  16055 
  16056 	// genMemMoveLong
  16057 	MOVOU (CX), X0
  16058 	MOVOU 16(CX), X1
  16059 	MOVOU -32(CX)(BX*1), X2
  16060 	MOVOU -16(CX)(BX*1), X3
  16061 	MOVQ  BX, DI
  16062 	SHRQ  $0x05, DI
  16063 	MOVQ  AX, SI
  16064 	ANDL  $0x0000001f, SI
  16065 	MOVQ  $0x00000040, R8
  16066 	SUBQ  SI, R8
  16067 	DECQ  DI
  16068 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  16069 	LEAQ  -32(CX)(R8*1), SI
  16070 	LEAQ  -32(AX)(R8*1), R9
  16071 
  16072 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
  16073 	MOVOU (SI), X4
  16074 	MOVOU 16(SI), X5
  16075 	MOVOA X4, (R9)
  16076 	MOVOA X5, 16(R9)
  16077 	ADDQ  $0x20, R9
  16078 	ADDQ  $0x20, SI
  16079 	ADDQ  $0x20, R8
  16080 	DECQ  DI
  16081 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
  16082 
  16083 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
  16084 	MOVOU -32(CX)(R8*1), X4
  16085 	MOVOU -16(CX)(R8*1), X5
  16086 	MOVOA X4, -32(AX)(R8*1)
  16087 	MOVOA X5, -16(AX)(R8*1)
  16088 	ADDQ  $0x20, R8
  16089 	CMPQ  BX, R8
  16090 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  16091 	MOVOU X0, (AX)
  16092 	MOVOU X1, 16(AX)
  16093 	MOVOU X2, -32(AX)(BX*1)
  16094 	MOVOU X3, -16(AX)(BX*1)
  16095 	MOVQ  DX, AX
  16096 
  16097 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
  16098 	MOVQ dst_base+0(FP), CX
  16099 	SUBQ CX, AX
  16100 	MOVQ AX, ret+48(FP)
  16101 	RET
  16102 
  16103 // func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
  16104 // Requires: BMI, SSE2
  16105 TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56
  16106 	MOVQ dst_base+0(FP), AX
  16107 	MOVQ $0x000000a0, CX
  16108 	LEAQ 24(SP), DX
  16109 	PXOR X0, X0
  16110 
  16111 zero_loop_encodeSnappyBetterBlockAsm10B:
  16112 	MOVOU X0, (DX)
  16113 	MOVOU X0, 16(DX)
  16114 	MOVOU X0, 32(DX)
  16115 	MOVOU X0, 48(DX)
  16116 	MOVOU X0, 64(DX)
  16117 	MOVOU X0, 80(DX)
  16118 	MOVOU X0, 96(DX)
  16119 	MOVOU X0, 112(DX)
  16120 	ADDQ  $0x80, DX
  16121 	DECQ  CX
  16122 	JNZ   zero_loop_encodeSnappyBetterBlockAsm10B
  16123 	MOVL  $0x00000000, 12(SP)
  16124 	MOVQ  src_len+32(FP), CX
  16125 	LEAQ  -9(CX), DX
  16126 	LEAQ  -8(CX), BX
  16127 	MOVL  BX, 8(SP)
  16128 	SHRQ  $0x05, CX
  16129 	SUBL  CX, DX
  16130 	LEAQ  (AX)(DX*1), DX
  16131 	MOVQ  DX, (SP)
  16132 	MOVL  $0x00000001, CX
  16133 	MOVL  $0x00000000, 16(SP)
  16134 	MOVQ  src_base+24(FP), DX
  16135 
  16136 search_loop_encodeSnappyBetterBlockAsm10B:
  16137 	MOVL  CX, BX
  16138 	SUBL  12(SP), BX
  16139 	SHRL  $0x05, BX
  16140 	LEAL  1(CX)(BX*1), BX
  16141 	CMPL  BX, 8(SP)
  16142 	JAE   emit_remainder_encodeSnappyBetterBlockAsm10B
  16143 	MOVQ  (DX)(CX*1), SI
  16144 	MOVL  BX, 20(SP)
  16145 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  16146 	MOVQ  $0x9e3779b1, BX
  16147 	MOVQ  SI, R9
  16148 	MOVQ  SI, R10
  16149 	SHLQ  $0x10, R9
  16150 	IMULQ R8, R9
  16151 	SHRQ  $0x34, R9
  16152 	SHLQ  $0x20, R10
  16153 	IMULQ BX, R10
  16154 	SHRQ  $0x36, R10
  16155 	MOVL  24(SP)(R9*4), BX
  16156 	MOVL  16408(SP)(R10*4), DI
  16157 	MOVL  CX, 24(SP)(R9*4)
  16158 	MOVL  CX, 16408(SP)(R10*4)
  16159 	MOVQ  (DX)(BX*1), R9
  16160 	MOVQ  (DX)(DI*1), R10
  16161 	CMPQ  R9, SI
  16162 	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
  16163 	CMPQ  R10, SI
  16164 	JNE   no_short_found_encodeSnappyBetterBlockAsm10B
  16165 	MOVL  DI, BX
  16166 	JMP   candidate_match_encodeSnappyBetterBlockAsm10B
  16167 
  16168 no_short_found_encodeSnappyBetterBlockAsm10B:
  16169 	CMPL R9, SI
  16170 	JEQ  candidate_match_encodeSnappyBetterBlockAsm10B
  16171 	CMPL R10, SI
  16172 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm10B
  16173 	MOVL 20(SP), CX
  16174 	JMP  search_loop_encodeSnappyBetterBlockAsm10B
  16175 
  16176 candidateS_match_encodeSnappyBetterBlockAsm10B:
  16177 	SHRQ  $0x08, SI
  16178 	MOVQ  SI, R9
  16179 	SHLQ  $0x10, R9
  16180 	IMULQ R8, R9
  16181 	SHRQ  $0x34, R9
  16182 	MOVL  24(SP)(R9*4), BX
  16183 	INCL  CX
  16184 	MOVL  CX, 24(SP)(R9*4)
  16185 	CMPL  (DX)(BX*1), SI
  16186 	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
  16187 	DECL  CX
  16188 	MOVL  DI, BX
  16189 
  16190 candidate_match_encodeSnappyBetterBlockAsm10B:
  16191 	MOVL  12(SP), SI
  16192 	TESTL BX, BX
  16193 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm10B
  16194 
  16195 match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
  16196 	CMPL CX, SI
  16197 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
  16198 	MOVB -1(DX)(BX*1), DI
  16199 	MOVB -1(DX)(CX*1), R8
  16200 	CMPB DI, R8
  16201 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
  16202 	LEAL -1(CX), CX
  16203 	DECL BX
  16204 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm10B
  16205 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm10B
  16206 
  16207 match_extend_back_end_encodeSnappyBetterBlockAsm10B:
  16208 	MOVL CX, SI
  16209 	SUBL 12(SP), SI
  16210 	LEAQ 3(AX)(SI*1), SI
  16211 	CMPQ SI, (SP)
  16212 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm10B
  16213 	MOVQ $0x00000000, ret+48(FP)
  16214 	RET
  16215 
  16216 match_dst_size_check_encodeSnappyBetterBlockAsm10B:
  16217 	MOVL CX, SI
  16218 	ADDL $0x04, CX
  16219 	ADDL $0x04, BX
  16220 	MOVQ src_len+32(FP), DI
  16221 	SUBL CX, DI
  16222 	LEAQ (DX)(CX*1), R8
  16223 	LEAQ (DX)(BX*1), R9
  16224 
  16225 	// matchLen
  16226 	XORL R11, R11
  16227 	CMPL DI, $0x08
  16228 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
  16229 
  16230 matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B:
  16231 	MOVQ  (R8)(R11*1), R10
  16232 	XORQ  (R9)(R11*1), R10
  16233 	TESTQ R10, R10
  16234 	JZ    matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B
  16235 
  16236 #ifdef GOAMD64_v3
  16237 	TZCNTQ R10, R10
  16238 
  16239 #else
  16240 	BSFQ R10, R10
  16241 
  16242 #endif
  16243 	SARQ $0x03, R10
  16244 	LEAL (R11)(R10*1), R11
  16245 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
  16246 
  16247 matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B:
  16248 	LEAL -8(DI), DI
  16249 	LEAL 8(R11), R11
  16250 	CMPL DI, $0x08
  16251 	JAE  matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B
  16252 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm10B
  16253 
  16254 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
  16255 	CMPL DI, $0x04
  16256 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
  16257 	MOVL (R8)(R11*1), R10
  16258 	CMPL (R9)(R11*1), R10
  16259 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
  16260 	SUBL $0x04, DI
  16261 	LEAL 4(R11), R11
  16262 
  16263 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
  16264 	CMPL DI, $0x02
  16265 	JB   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
  16266 	MOVW (R8)(R11*1), R10
  16267 	CMPW (R9)(R11*1), R10
  16268 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
  16269 	SUBL $0x02, DI
  16270 	LEAL 2(R11), R11
  16271 
  16272 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
  16273 	CMPL DI, $0x01
  16274 	JB   match_nolit_end_encodeSnappyBetterBlockAsm10B
  16275 	MOVB (R8)(R11*1), R10
  16276 	CMPB (R9)(R11*1), R10
  16277 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm10B
  16278 	LEAL 1(R11), R11
  16279 
  16280 match_nolit_end_encodeSnappyBetterBlockAsm10B:
  16281 	MOVL CX, DI
  16282 	SUBL BX, DI
  16283 
  16284 	// Check if repeat
  16285 	MOVL DI, 16(SP)
  16286 	MOVL 12(SP), BX
  16287 	CMPL BX, SI
  16288 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
  16289 	MOVL SI, R8
  16290 	MOVL SI, 12(SP)
  16291 	LEAQ (DX)(BX*1), R9
  16292 	SUBL BX, R8
  16293 	LEAL -1(R8), BX
  16294 	CMPL BX, $0x3c
  16295 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm10B
  16296 	CMPL BX, $0x00000100
  16297 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
  16298 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
  16299 
  16300 three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
  16301 	MOVB $0xf4, (AX)
  16302 	MOVW BX, 1(AX)
  16303 	ADDQ $0x03, AX
  16304 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
  16305 
  16306 two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
  16307 	MOVB $0xf0, (AX)
  16308 	MOVB BL, 1(AX)
  16309 	ADDQ $0x02, AX
  16310 	CMPL BX, $0x40
  16311 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm10B
  16312 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
  16313 
  16314 one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
  16315 	SHLB $0x02, BL
  16316 	MOVB BL, (AX)
  16317 	ADDQ $0x01, AX
  16318 
  16319 memmove_match_emit_encodeSnappyBetterBlockAsm10B:
  16320 	LEAQ (AX)(R8*1), BX
  16321 
  16322 	// genMemMoveShort
  16323 	CMPQ R8, $0x08
  16324 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
  16325 	CMPQ R8, $0x10
  16326 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
  16327 	CMPQ R8, $0x20
  16328 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
  16329 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
  16330 
  16331 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
  16332 	MOVQ (R9), R10
  16333 	MOVQ R10, (AX)
  16334 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
  16335 
  16336 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
  16337 	MOVQ (R9), R10
  16338 	MOVQ -8(R9)(R8*1), R9
  16339 	MOVQ R10, (AX)
  16340 	MOVQ R9, -8(AX)(R8*1)
  16341 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
  16342 
  16343 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
  16344 	MOVOU (R9), X0
  16345 	MOVOU -16(R9)(R8*1), X1
  16346 	MOVOU X0, (AX)
  16347 	MOVOU X1, -16(AX)(R8*1)
  16348 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
  16349 
  16350 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
  16351 	MOVOU (R9), X0
  16352 	MOVOU 16(R9), X1
  16353 	MOVOU -32(R9)(R8*1), X2
  16354 	MOVOU -16(R9)(R8*1), X3
  16355 	MOVOU X0, (AX)
  16356 	MOVOU X1, 16(AX)
  16357 	MOVOU X2, -32(AX)(R8*1)
  16358 	MOVOU X3, -16(AX)(R8*1)
  16359 
  16360 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
  16361 	MOVQ BX, AX
  16362 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
  16363 
  16364 memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
  16365 	LEAQ (AX)(R8*1), BX
  16366 
  16367 	// genMemMoveLong
  16368 	MOVOU (R9), X0
  16369 	MOVOU 16(R9), X1
  16370 	MOVOU -32(R9)(R8*1), X2
  16371 	MOVOU -16(R9)(R8*1), X3
  16372 	MOVQ  R8, R12
  16373 	SHRQ  $0x05, R12
  16374 	MOVQ  AX, R10
  16375 	ANDL  $0x0000001f, R10
  16376 	MOVQ  $0x00000040, R13
  16377 	SUBQ  R10, R13
  16378 	DECQ  R12
  16379 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  16380 	LEAQ  -32(R9)(R13*1), R10
  16381 	LEAQ  -32(AX)(R13*1), R14
  16382 
  16383 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
  16384 	MOVOU (R10), X4
  16385 	MOVOU 16(R10), X5
  16386 	MOVOA X4, (R14)
  16387 	MOVOA X5, 16(R14)
  16388 	ADDQ  $0x20, R14
  16389 	ADDQ  $0x20, R10
  16390 	ADDQ  $0x20, R13
  16391 	DECQ  R12
  16392 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
  16393 
  16394 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
  16395 	MOVOU -32(R9)(R13*1), X4
  16396 	MOVOU -16(R9)(R13*1), X5
  16397 	MOVOA X4, -32(AX)(R13*1)
  16398 	MOVOA X5, -16(AX)(R13*1)
  16399 	ADDQ  $0x20, R13
  16400 	CMPQ  R8, R13
  16401 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  16402 	MOVOU X0, (AX)
  16403 	MOVOU X1, 16(AX)
  16404 	MOVOU X2, -32(AX)(R8*1)
  16405 	MOVOU X3, -16(AX)(R8*1)
  16406 	MOVQ  BX, AX
  16407 
  16408 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
  16409 	ADDL R11, CX
  16410 	ADDL $0x04, R11
  16411 	MOVL CX, 12(SP)
  16412 
  16413 	// emitCopy
  16414 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
  16415 	CMPL R11, $0x40
  16416 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
  16417 	MOVB $0xee, (AX)
  16418 	MOVW DI, 1(AX)
  16419 	LEAL -60(R11), R11
  16420 	ADDQ $0x03, AX
  16421 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
  16422 
  16423 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
  16424 	MOVL R11, BX
  16425 	SHLL $0x02, BX
  16426 	CMPL R11, $0x0c
  16427 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
  16428 	CMPL DI, $0x00000800
  16429 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
  16430 	LEAL -15(BX), BX
  16431 	MOVB DI, 1(AX)
  16432 	SHRL $0x08, DI
  16433 	SHLL $0x05, DI
  16434 	ORL  DI, BX
  16435 	MOVB BL, (AX)
  16436 	ADDQ $0x02, AX
  16437 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
  16438 
  16439 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
  16440 	LEAL -2(BX), BX
  16441 	MOVB BL, (AX)
  16442 	MOVW DI, 1(AX)
  16443 	ADDQ $0x03, AX
  16444 
  16445 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
  16446 	CMPL CX, 8(SP)
  16447 	JAE  emit_remainder_encodeSnappyBetterBlockAsm10B
  16448 	CMPQ AX, (SP)
  16449 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
  16450 	MOVQ $0x00000000, ret+48(FP)
  16451 	RET
  16452 
  16453 match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
  16454 	MOVQ  $0x0000cf1bbcdcbf9b, BX
  16455 	MOVQ  $0x9e3779b1, DI
  16456 	LEAQ  1(SI), SI
  16457 	LEAQ  -2(CX), R8
  16458 	MOVQ  (DX)(SI*1), R9
  16459 	MOVQ  1(DX)(SI*1), R10
  16460 	MOVQ  (DX)(R8*1), R11
  16461 	MOVQ  1(DX)(R8*1), R12
  16462 	SHLQ  $0x10, R9
  16463 	IMULQ BX, R9
  16464 	SHRQ  $0x34, R9
  16465 	SHLQ  $0x20, R10
  16466 	IMULQ DI, R10
  16467 	SHRQ  $0x36, R10
  16468 	SHLQ  $0x10, R11
  16469 	IMULQ BX, R11
  16470 	SHRQ  $0x34, R11
  16471 	SHLQ  $0x20, R12
  16472 	IMULQ DI, R12
  16473 	SHRQ  $0x36, R12
  16474 	LEAQ  1(SI), DI
  16475 	LEAQ  1(R8), R13
  16476 	MOVL  SI, 24(SP)(R9*4)
  16477 	MOVL  R8, 24(SP)(R11*4)
  16478 	MOVL  DI, 16408(SP)(R10*4)
  16479 	MOVL  R13, 16408(SP)(R12*4)
  16480 	ADDQ  $0x01, SI
  16481 	SUBQ  $0x01, R8
  16482 
  16483 index_loop_encodeSnappyBetterBlockAsm10B:
  16484 	CMPQ  SI, R8
  16485 	JAE   search_loop_encodeSnappyBetterBlockAsm10B
  16486 	MOVQ  (DX)(SI*1), DI
  16487 	MOVQ  (DX)(R8*1), R9
  16488 	SHLQ  $0x10, DI
  16489 	IMULQ BX, DI
  16490 	SHRQ  $0x34, DI
  16491 	SHLQ  $0x10, R9
  16492 	IMULQ BX, R9
  16493 	SHRQ  $0x34, R9
  16494 	MOVL  SI, 24(SP)(DI*4)
  16495 	MOVL  R8, 24(SP)(R9*4)
  16496 	ADDQ  $0x02, SI
  16497 	SUBQ  $0x02, R8
  16498 	JMP   index_loop_encodeSnappyBetterBlockAsm10B
  16499 
  16500 emit_remainder_encodeSnappyBetterBlockAsm10B:
  16501 	MOVQ src_len+32(FP), CX
  16502 	SUBL 12(SP), CX
  16503 	LEAQ 3(AX)(CX*1), CX
  16504 	CMPQ CX, (SP)
  16505 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm10B
  16506 	MOVQ $0x00000000, ret+48(FP)
  16507 	RET
  16508 
  16509 emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
  16510 	MOVQ src_len+32(FP), CX
  16511 	MOVL 12(SP), BX
  16512 	CMPL BX, CX
  16513 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
  16514 	MOVL CX, SI
  16515 	MOVL CX, 12(SP)
  16516 	LEAQ (DX)(BX*1), CX
  16517 	SUBL BX, SI
  16518 	LEAL -1(SI), DX
  16519 	CMPL DX, $0x3c
  16520 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
  16521 	CMPL DX, $0x00000100
  16522 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
  16523 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
  16524 
  16525 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16526 	MOVB $0xf4, (AX)
  16527 	MOVW DX, 1(AX)
  16528 	ADDQ $0x03, AX
  16529 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
  16530 
  16531 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16532 	MOVB $0xf0, (AX)
  16533 	MOVB DL, 1(AX)
  16534 	ADDQ $0x02, AX
  16535 	CMPL DX, $0x40
  16536 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
  16537 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
  16538 
  16539 one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16540 	SHLB $0x02, DL
  16541 	MOVB DL, (AX)
  16542 	ADDQ $0x01, AX
  16543 
  16544 memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16545 	LEAQ (AX)(SI*1), DX
  16546 	MOVL SI, BX
  16547 
  16548 	// genMemMoveShort
  16549 	CMPQ BX, $0x03
  16550 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2
  16551 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3
  16552 	CMPQ BX, $0x08
  16553 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7
  16554 	CMPQ BX, $0x10
  16555 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
  16556 	CMPQ BX, $0x20
  16557 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
  16558 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
  16559 
  16560 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
  16561 	MOVB (CX), SI
  16562 	MOVB -1(CX)(BX*1), CL
  16563 	MOVB SI, (AX)
  16564 	MOVB CL, -1(AX)(BX*1)
  16565 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  16566 
  16567 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
  16568 	MOVW (CX), SI
  16569 	MOVB 2(CX), CL
  16570 	MOVW SI, (AX)
  16571 	MOVB CL, 2(AX)
  16572 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  16573 
  16574 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
  16575 	MOVL (CX), SI
  16576 	MOVL -4(CX)(BX*1), CX
  16577 	MOVL SI, (AX)
  16578 	MOVL CX, -4(AX)(BX*1)
  16579 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  16580 
  16581 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
  16582 	MOVQ (CX), SI
  16583 	MOVQ -8(CX)(BX*1), CX
  16584 	MOVQ SI, (AX)
  16585 	MOVQ CX, -8(AX)(BX*1)
  16586 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  16587 
  16588 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
  16589 	MOVOU (CX), X0
  16590 	MOVOU -16(CX)(BX*1), X1
  16591 	MOVOU X0, (AX)
  16592 	MOVOU X1, -16(AX)(BX*1)
  16593 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  16594 
  16595 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
  16596 	MOVOU (CX), X0
  16597 	MOVOU 16(CX), X1
  16598 	MOVOU -32(CX)(BX*1), X2
  16599 	MOVOU -16(CX)(BX*1), X3
  16600 	MOVOU X0, (AX)
  16601 	MOVOU X1, 16(AX)
  16602 	MOVOU X2, -32(AX)(BX*1)
  16603 	MOVOU X3, -16(AX)(BX*1)
  16604 
  16605 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16606 	MOVQ DX, AX
  16607 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
  16608 
  16609 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16610 	LEAQ (AX)(SI*1), DX
  16611 	MOVL SI, BX
  16612 
  16613 	// genMemMoveLong
  16614 	MOVOU (CX), X0
  16615 	MOVOU 16(CX), X1
  16616 	MOVOU -32(CX)(BX*1), X2
  16617 	MOVOU -16(CX)(BX*1), X3
  16618 	MOVQ  BX, DI
  16619 	SHRQ  $0x05, DI
  16620 	MOVQ  AX, SI
  16621 	ANDL  $0x0000001f, SI
  16622 	MOVQ  $0x00000040, R8
  16623 	SUBQ  SI, R8
  16624 	DECQ  DI
  16625 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  16626 	LEAQ  -32(CX)(R8*1), SI
  16627 	LEAQ  -32(AX)(R8*1), R9
  16628 
  16629 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
  16630 	MOVOU (SI), X4
  16631 	MOVOU 16(SI), X5
  16632 	MOVOA X4, (R9)
  16633 	MOVOA X5, 16(R9)
  16634 	ADDQ  $0x20, R9
  16635 	ADDQ  $0x20, SI
  16636 	ADDQ  $0x20, R8
  16637 	DECQ  DI
  16638 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
  16639 
  16640 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
  16641 	MOVOU -32(CX)(R8*1), X4
  16642 	MOVOU -16(CX)(R8*1), X5
  16643 	MOVOA X4, -32(AX)(R8*1)
  16644 	MOVOA X5, -16(AX)(R8*1)
  16645 	ADDQ  $0x20, R8
  16646 	CMPQ  BX, R8
  16647 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  16648 	MOVOU X0, (AX)
  16649 	MOVOU X1, 16(AX)
  16650 	MOVOU X2, -32(AX)(BX*1)
  16651 	MOVOU X3, -16(AX)(BX*1)
  16652 	MOVQ  DX, AX
  16653 
  16654 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
  16655 	MOVQ dst_base+0(FP), CX
  16656 	SUBQ CX, AX
  16657 	MOVQ AX, ret+48(FP)
  16658 	RET
  16659 
  16660 // func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
  16661 // Requires: BMI, SSE2
  16662 TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56
  16663 	MOVQ dst_base+0(FP), AX
  16664 	MOVQ $0x00000028, CX
  16665 	LEAQ 24(SP), DX
  16666 	PXOR X0, X0
  16667 
  16668 zero_loop_encodeSnappyBetterBlockAsm8B:
  16669 	MOVOU X0, (DX)
  16670 	MOVOU X0, 16(DX)
  16671 	MOVOU X0, 32(DX)
  16672 	MOVOU X0, 48(DX)
  16673 	MOVOU X0, 64(DX)
  16674 	MOVOU X0, 80(DX)
  16675 	MOVOU X0, 96(DX)
  16676 	MOVOU X0, 112(DX)
  16677 	ADDQ  $0x80, DX
  16678 	DECQ  CX
  16679 	JNZ   zero_loop_encodeSnappyBetterBlockAsm8B
  16680 	MOVL  $0x00000000, 12(SP)
  16681 	MOVQ  src_len+32(FP), CX
  16682 	LEAQ  -9(CX), DX
  16683 	LEAQ  -8(CX), BX
  16684 	MOVL  BX, 8(SP)
  16685 	SHRQ  $0x05, CX
  16686 	SUBL  CX, DX
  16687 	LEAQ  (AX)(DX*1), DX
  16688 	MOVQ  DX, (SP)
  16689 	MOVL  $0x00000001, CX
  16690 	MOVL  $0x00000000, 16(SP)
  16691 	MOVQ  src_base+24(FP), DX
  16692 
  16693 search_loop_encodeSnappyBetterBlockAsm8B:
  16694 	MOVL  CX, BX
  16695 	SUBL  12(SP), BX
  16696 	SHRL  $0x04, BX
  16697 	LEAL  1(CX)(BX*1), BX
  16698 	CMPL  BX, 8(SP)
  16699 	JAE   emit_remainder_encodeSnappyBetterBlockAsm8B
  16700 	MOVQ  (DX)(CX*1), SI
  16701 	MOVL  BX, 20(SP)
  16702 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  16703 	MOVQ  $0x9e3779b1, BX
  16704 	MOVQ  SI, R9
  16705 	MOVQ  SI, R10
  16706 	SHLQ  $0x10, R9
  16707 	IMULQ R8, R9
  16708 	SHRQ  $0x36, R9
  16709 	SHLQ  $0x20, R10
  16710 	IMULQ BX, R10
  16711 	SHRQ  $0x38, R10
  16712 	MOVL  24(SP)(R9*4), BX
  16713 	MOVL  4120(SP)(R10*4), DI
  16714 	MOVL  CX, 24(SP)(R9*4)
  16715 	MOVL  CX, 4120(SP)(R10*4)
  16716 	MOVQ  (DX)(BX*1), R9
  16717 	MOVQ  (DX)(DI*1), R10
  16718 	CMPQ  R9, SI
  16719 	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
  16720 	CMPQ  R10, SI
  16721 	JNE   no_short_found_encodeSnappyBetterBlockAsm8B
  16722 	MOVL  DI, BX
  16723 	JMP   candidate_match_encodeSnappyBetterBlockAsm8B
  16724 
  16725 no_short_found_encodeSnappyBetterBlockAsm8B:
  16726 	CMPL R9, SI
  16727 	JEQ  candidate_match_encodeSnappyBetterBlockAsm8B
  16728 	CMPL R10, SI
  16729 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm8B
  16730 	MOVL 20(SP), CX
  16731 	JMP  search_loop_encodeSnappyBetterBlockAsm8B
  16732 
  16733 candidateS_match_encodeSnappyBetterBlockAsm8B:
  16734 	SHRQ  $0x08, SI
  16735 	MOVQ  SI, R9
  16736 	SHLQ  $0x10, R9
  16737 	IMULQ R8, R9
  16738 	SHRQ  $0x36, R9
  16739 	MOVL  24(SP)(R9*4), BX
  16740 	INCL  CX
  16741 	MOVL  CX, 24(SP)(R9*4)
  16742 	CMPL  (DX)(BX*1), SI
  16743 	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
  16744 	DECL  CX
  16745 	MOVL  DI, BX
  16746 
  16747 candidate_match_encodeSnappyBetterBlockAsm8B:
  16748 	MOVL  12(SP), SI
  16749 	TESTL BX, BX
  16750 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm8B
  16751 
  16752 match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
  16753 	CMPL CX, SI
  16754 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
  16755 	MOVB -1(DX)(BX*1), DI
  16756 	MOVB -1(DX)(CX*1), R8
  16757 	CMPB DI, R8
  16758 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
  16759 	LEAL -1(CX), CX
  16760 	DECL BX
  16761 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm8B
  16762 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm8B
  16763 
  16764 match_extend_back_end_encodeSnappyBetterBlockAsm8B:
  16765 	MOVL CX, SI
  16766 	SUBL 12(SP), SI
  16767 	LEAQ 3(AX)(SI*1), SI
  16768 	CMPQ SI, (SP)
  16769 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm8B
  16770 	MOVQ $0x00000000, ret+48(FP)
  16771 	RET
  16772 
  16773 match_dst_size_check_encodeSnappyBetterBlockAsm8B:
  16774 	MOVL CX, SI
  16775 	ADDL $0x04, CX
  16776 	ADDL $0x04, BX
  16777 	MOVQ src_len+32(FP), DI
  16778 	SUBL CX, DI
  16779 	LEAQ (DX)(CX*1), R8
  16780 	LEAQ (DX)(BX*1), R9
  16781 
  16782 	// matchLen
  16783 	XORL R11, R11
  16784 	CMPL DI, $0x08
  16785 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
  16786 
  16787 matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B:
  16788 	MOVQ  (R8)(R11*1), R10
  16789 	XORQ  (R9)(R11*1), R10
  16790 	TESTQ R10, R10
  16791 	JZ    matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B
  16792 
  16793 #ifdef GOAMD64_v3
  16794 	TZCNTQ R10, R10
  16795 
  16796 #else
  16797 	BSFQ R10, R10
  16798 
  16799 #endif
  16800 	SARQ $0x03, R10
  16801 	LEAL (R11)(R10*1), R11
  16802 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
  16803 
  16804 matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B:
  16805 	LEAL -8(DI), DI
  16806 	LEAL 8(R11), R11
  16807 	CMPL DI, $0x08
  16808 	JAE  matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B
  16809 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm8B
  16810 
  16811 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
  16812 	CMPL DI, $0x04
  16813 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
  16814 	MOVL (R8)(R11*1), R10
  16815 	CMPL (R9)(R11*1), R10
  16816 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
  16817 	SUBL $0x04, DI
  16818 	LEAL 4(R11), R11
  16819 
  16820 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
  16821 	CMPL DI, $0x02
  16822 	JB   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
  16823 	MOVW (R8)(R11*1), R10
  16824 	CMPW (R9)(R11*1), R10
  16825 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
  16826 	SUBL $0x02, DI
  16827 	LEAL 2(R11), R11
  16828 
  16829 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
  16830 	CMPL DI, $0x01
  16831 	JB   match_nolit_end_encodeSnappyBetterBlockAsm8B
  16832 	MOVB (R8)(R11*1), R10
  16833 	CMPB (R9)(R11*1), R10
  16834 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm8B
  16835 	LEAL 1(R11), R11
  16836 
  16837 match_nolit_end_encodeSnappyBetterBlockAsm8B:
  16838 	MOVL CX, DI
  16839 	SUBL BX, DI
  16840 
  16841 	// Check if repeat
  16842 	MOVL DI, 16(SP)
  16843 	MOVL 12(SP), BX
  16844 	CMPL BX, SI
  16845 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
  16846 	MOVL SI, R8
  16847 	MOVL SI, 12(SP)
  16848 	LEAQ (DX)(BX*1), R9
  16849 	SUBL BX, R8
  16850 	LEAL -1(R8), BX
  16851 	CMPL BX, $0x3c
  16852 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm8B
  16853 	CMPL BX, $0x00000100
  16854 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
  16855 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
  16856 
  16857 three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
  16858 	MOVB $0xf4, (AX)
  16859 	MOVW BX, 1(AX)
  16860 	ADDQ $0x03, AX
  16861 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
  16862 
  16863 two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
  16864 	MOVB $0xf0, (AX)
  16865 	MOVB BL, 1(AX)
  16866 	ADDQ $0x02, AX
  16867 	CMPL BX, $0x40
  16868 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm8B
  16869 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
  16870 
  16871 one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
  16872 	SHLB $0x02, BL
  16873 	MOVB BL, (AX)
  16874 	ADDQ $0x01, AX
  16875 
  16876 memmove_match_emit_encodeSnappyBetterBlockAsm8B:
  16877 	LEAQ (AX)(R8*1), BX
  16878 
  16879 	// genMemMoveShort
  16880 	CMPQ R8, $0x08
  16881 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
  16882 	CMPQ R8, $0x10
  16883 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
  16884 	CMPQ R8, $0x20
  16885 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
  16886 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
  16887 
  16888 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
  16889 	MOVQ (R9), R10
  16890 	MOVQ R10, (AX)
  16891 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
  16892 
  16893 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
  16894 	MOVQ (R9), R10
  16895 	MOVQ -8(R9)(R8*1), R9
  16896 	MOVQ R10, (AX)
  16897 	MOVQ R9, -8(AX)(R8*1)
  16898 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
  16899 
  16900 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
  16901 	MOVOU (R9), X0
  16902 	MOVOU -16(R9)(R8*1), X1
  16903 	MOVOU X0, (AX)
  16904 	MOVOU X1, -16(AX)(R8*1)
  16905 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
  16906 
  16907 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
  16908 	MOVOU (R9), X0
  16909 	MOVOU 16(R9), X1
  16910 	MOVOU -32(R9)(R8*1), X2
  16911 	MOVOU -16(R9)(R8*1), X3
  16912 	MOVOU X0, (AX)
  16913 	MOVOU X1, 16(AX)
  16914 	MOVOU X2, -32(AX)(R8*1)
  16915 	MOVOU X3, -16(AX)(R8*1)
  16916 
  16917 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
  16918 	MOVQ BX, AX
  16919 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
  16920 
  16921 memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
  16922 	LEAQ (AX)(R8*1), BX
  16923 
  16924 	// genMemMoveLong
  16925 	MOVOU (R9), X0
  16926 	MOVOU 16(R9), X1
  16927 	MOVOU -32(R9)(R8*1), X2
  16928 	MOVOU -16(R9)(R8*1), X3
  16929 	MOVQ  R8, R12
  16930 	SHRQ  $0x05, R12
  16931 	MOVQ  AX, R10
  16932 	ANDL  $0x0000001f, R10
  16933 	MOVQ  $0x00000040, R13
  16934 	SUBQ  R10, R13
  16935 	DECQ  R12
  16936 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  16937 	LEAQ  -32(R9)(R13*1), R10
  16938 	LEAQ  -32(AX)(R13*1), R14
  16939 
  16940 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
  16941 	MOVOU (R10), X4
  16942 	MOVOU 16(R10), X5
  16943 	MOVOA X4, (R14)
  16944 	MOVOA X5, 16(R14)
  16945 	ADDQ  $0x20, R14
  16946 	ADDQ  $0x20, R10
  16947 	ADDQ  $0x20, R13
  16948 	DECQ  R12
  16949 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
  16950 
  16951 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
  16952 	MOVOU -32(R9)(R13*1), X4
  16953 	MOVOU -16(R9)(R13*1), X5
  16954 	MOVOA X4, -32(AX)(R13*1)
  16955 	MOVOA X5, -16(AX)(R13*1)
  16956 	ADDQ  $0x20, R13
  16957 	CMPQ  R8, R13
  16958 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  16959 	MOVOU X0, (AX)
  16960 	MOVOU X1, 16(AX)
  16961 	MOVOU X2, -32(AX)(R8*1)
  16962 	MOVOU X3, -16(AX)(R8*1)
  16963 	MOVQ  BX, AX
  16964 
  16965 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
  16966 	ADDL R11, CX
  16967 	ADDL $0x04, R11
  16968 	MOVL CX, 12(SP)
  16969 
  16970 	// emitCopy
  16971 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
  16972 	CMPL R11, $0x40
  16973 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
  16974 	MOVB $0xee, (AX)
  16975 	MOVW DI, 1(AX)
  16976 	LEAL -60(R11), R11
  16977 	ADDQ $0x03, AX
  16978 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
  16979 
  16980 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
  16981 	MOVL R11, BX
  16982 	SHLL $0x02, BX
  16983 	CMPL R11, $0x0c
  16984 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
  16985 	LEAL -15(BX), BX
  16986 	MOVB DI, 1(AX)
  16987 	SHRL $0x08, DI
  16988 	SHLL $0x05, DI
  16989 	ORL  DI, BX
  16990 	MOVB BL, (AX)
  16991 	ADDQ $0x02, AX
  16992 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
  16993 
  16994 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
  16995 	LEAL -2(BX), BX
  16996 	MOVB BL, (AX)
  16997 	MOVW DI, 1(AX)
  16998 	ADDQ $0x03, AX
  16999 
  17000 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
  17001 	CMPL CX, 8(SP)
  17002 	JAE  emit_remainder_encodeSnappyBetterBlockAsm8B
  17003 	CMPQ AX, (SP)
  17004 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
  17005 	MOVQ $0x00000000, ret+48(FP)
  17006 	RET
  17007 
  17008 match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
  17009 	MOVQ  $0x0000cf1bbcdcbf9b, BX
  17010 	MOVQ  $0x9e3779b1, DI
  17011 	LEAQ  1(SI), SI
  17012 	LEAQ  -2(CX), R8
  17013 	MOVQ  (DX)(SI*1), R9
  17014 	MOVQ  1(DX)(SI*1), R10
  17015 	MOVQ  (DX)(R8*1), R11
  17016 	MOVQ  1(DX)(R8*1), R12
  17017 	SHLQ  $0x10, R9
  17018 	IMULQ BX, R9
  17019 	SHRQ  $0x36, R9
  17020 	SHLQ  $0x20, R10
  17021 	IMULQ DI, R10
  17022 	SHRQ  $0x38, R10
  17023 	SHLQ  $0x10, R11
  17024 	IMULQ BX, R11
  17025 	SHRQ  $0x36, R11
  17026 	SHLQ  $0x20, R12
  17027 	IMULQ DI, R12
  17028 	SHRQ  $0x38, R12
  17029 	LEAQ  1(SI), DI
  17030 	LEAQ  1(R8), R13
  17031 	MOVL  SI, 24(SP)(R9*4)
  17032 	MOVL  R8, 24(SP)(R11*4)
  17033 	MOVL  DI, 4120(SP)(R10*4)
  17034 	MOVL  R13, 4120(SP)(R12*4)
  17035 	ADDQ  $0x01, SI
  17036 	SUBQ  $0x01, R8
  17037 
  17038 index_loop_encodeSnappyBetterBlockAsm8B:
  17039 	CMPQ  SI, R8
  17040 	JAE   search_loop_encodeSnappyBetterBlockAsm8B
  17041 	MOVQ  (DX)(SI*1), DI
  17042 	MOVQ  (DX)(R8*1), R9
  17043 	SHLQ  $0x10, DI
  17044 	IMULQ BX, DI
  17045 	SHRQ  $0x36, DI
  17046 	SHLQ  $0x10, R9
  17047 	IMULQ BX, R9
  17048 	SHRQ  $0x36, R9
  17049 	MOVL  SI, 24(SP)(DI*4)
  17050 	MOVL  R8, 24(SP)(R9*4)
  17051 	ADDQ  $0x02, SI
  17052 	SUBQ  $0x02, R8
  17053 	JMP   index_loop_encodeSnappyBetterBlockAsm8B
  17054 
  17055 emit_remainder_encodeSnappyBetterBlockAsm8B:
  17056 	MOVQ src_len+32(FP), CX
  17057 	SUBL 12(SP), CX
  17058 	LEAQ 3(AX)(CX*1), CX
  17059 	CMPQ CX, (SP)
  17060 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm8B
  17061 	MOVQ $0x00000000, ret+48(FP)
  17062 	RET
  17063 
  17064 emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
  17065 	MOVQ src_len+32(FP), CX
  17066 	MOVL 12(SP), BX
  17067 	CMPL BX, CX
  17068 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
  17069 	MOVL CX, SI
  17070 	MOVL CX, 12(SP)
  17071 	LEAQ (DX)(BX*1), CX
  17072 	SUBL BX, SI
  17073 	LEAL -1(SI), DX
  17074 	CMPL DX, $0x3c
  17075 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
  17076 	CMPL DX, $0x00000100
  17077 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
  17078 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
  17079 
  17080 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17081 	MOVB $0xf4, (AX)
  17082 	MOVW DX, 1(AX)
  17083 	ADDQ $0x03, AX
  17084 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
  17085 
  17086 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17087 	MOVB $0xf0, (AX)
  17088 	MOVB DL, 1(AX)
  17089 	ADDQ $0x02, AX
  17090 	CMPL DX, $0x40
  17091 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
  17092 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
  17093 
  17094 one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17095 	SHLB $0x02, DL
  17096 	MOVB DL, (AX)
  17097 	ADDQ $0x01, AX
  17098 
  17099 memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17100 	LEAQ (AX)(SI*1), DX
  17101 	MOVL SI, BX
  17102 
  17103 	// genMemMoveShort
  17104 	CMPQ BX, $0x03
  17105 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2
  17106 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3
  17107 	CMPQ BX, $0x08
  17108 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7
  17109 	CMPQ BX, $0x10
  17110 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
  17111 	CMPQ BX, $0x20
  17112 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
  17113 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
  17114 
  17115 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
  17116 	MOVB (CX), SI
  17117 	MOVB -1(CX)(BX*1), CL
  17118 	MOVB SI, (AX)
  17119 	MOVB CL, -1(AX)(BX*1)
  17120 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  17121 
  17122 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
  17123 	MOVW (CX), SI
  17124 	MOVB 2(CX), CL
  17125 	MOVW SI, (AX)
  17126 	MOVB CL, 2(AX)
  17127 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  17128 
  17129 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
  17130 	MOVL (CX), SI
  17131 	MOVL -4(CX)(BX*1), CX
  17132 	MOVL SI, (AX)
  17133 	MOVL CX, -4(AX)(BX*1)
  17134 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  17135 
  17136 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
  17137 	MOVQ (CX), SI
  17138 	MOVQ -8(CX)(BX*1), CX
  17139 	MOVQ SI, (AX)
  17140 	MOVQ CX, -8(AX)(BX*1)
  17141 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  17142 
  17143 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
  17144 	MOVOU (CX), X0
  17145 	MOVOU -16(CX)(BX*1), X1
  17146 	MOVOU X0, (AX)
  17147 	MOVOU X1, -16(AX)(BX*1)
  17148 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  17149 
  17150 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
  17151 	MOVOU (CX), X0
  17152 	MOVOU 16(CX), X1
  17153 	MOVOU -32(CX)(BX*1), X2
  17154 	MOVOU -16(CX)(BX*1), X3
  17155 	MOVOU X0, (AX)
  17156 	MOVOU X1, 16(AX)
  17157 	MOVOU X2, -32(AX)(BX*1)
  17158 	MOVOU X3, -16(AX)(BX*1)
  17159 
  17160 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17161 	MOVQ DX, AX
  17162 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
  17163 
  17164 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17165 	LEAQ (AX)(SI*1), DX
  17166 	MOVL SI, BX
  17167 
  17168 	// genMemMoveLong
  17169 	MOVOU (CX), X0
  17170 	MOVOU 16(CX), X1
  17171 	MOVOU -32(CX)(BX*1), X2
  17172 	MOVOU -16(CX)(BX*1), X3
  17173 	MOVQ  BX, DI
  17174 	SHRQ  $0x05, DI
  17175 	MOVQ  AX, SI
  17176 	ANDL  $0x0000001f, SI
  17177 	MOVQ  $0x00000040, R8
  17178 	SUBQ  SI, R8
  17179 	DECQ  DI
  17180 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  17181 	LEAQ  -32(CX)(R8*1), SI
  17182 	LEAQ  -32(AX)(R8*1), R9
  17183 
  17184 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
  17185 	MOVOU (SI), X4
  17186 	MOVOU 16(SI), X5
  17187 	MOVOA X4, (R9)
  17188 	MOVOA X5, 16(R9)
  17189 	ADDQ  $0x20, R9
  17190 	ADDQ  $0x20, SI
  17191 	ADDQ  $0x20, R8
  17192 	DECQ  DI
  17193 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
  17194 
  17195 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
  17196 	MOVOU -32(CX)(R8*1), X4
  17197 	MOVOU -16(CX)(R8*1), X5
  17198 	MOVOA X4, -32(AX)(R8*1)
  17199 	MOVOA X5, -16(AX)(R8*1)
  17200 	ADDQ  $0x20, R8
  17201 	CMPQ  BX, R8
  17202 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  17203 	MOVOU X0, (AX)
  17204 	MOVOU X1, 16(AX)
  17205 	MOVOU X2, -32(AX)(BX*1)
  17206 	MOVOU X3, -16(AX)(BX*1)
  17207 	MOVQ  DX, AX
  17208 
  17209 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
  17210 	MOVQ dst_base+0(FP), CX
  17211 	SUBQ CX, AX
  17212 	MOVQ AX, ret+48(FP)
  17213 	RET
  17214 
  17215 // func calcBlockSize(src []byte) int
  17216 // Requires: BMI, SSE2
  17217 TEXT ·calcBlockSize(SB), $32792-32
  17218 	XORQ AX, AX
  17219 	MOVQ $0x00000100, CX
  17220 	LEAQ 24(SP), DX
  17221 	PXOR X0, X0
  17222 
  17223 zero_loop_calcBlockSize:
  17224 	MOVOU X0, (DX)
  17225 	MOVOU X0, 16(DX)
  17226 	MOVOU X0, 32(DX)
  17227 	MOVOU X0, 48(DX)
  17228 	MOVOU X0, 64(DX)
  17229 	MOVOU X0, 80(DX)
  17230 	MOVOU X0, 96(DX)
  17231 	MOVOU X0, 112(DX)
  17232 	ADDQ  $0x80, DX
  17233 	DECQ  CX
  17234 	JNZ   zero_loop_calcBlockSize
  17235 	MOVL  $0x00000000, 12(SP)
  17236 	MOVQ  src_len+8(FP), CX
  17237 	LEAQ  -9(CX), DX
  17238 	LEAQ  -8(CX), BX
  17239 	MOVL  BX, 8(SP)
  17240 	SHRQ  $0x05, CX
  17241 	SUBL  CX, DX
  17242 	LEAQ  (AX)(DX*1), DX
  17243 	MOVQ  DX, (SP)
  17244 	MOVL  $0x00000001, CX
  17245 	MOVL  CX, 16(SP)
  17246 	MOVQ  src_base+0(FP), DX
  17247 
  17248 search_loop_calcBlockSize:
  17249 	MOVL  CX, BX
  17250 	SUBL  12(SP), BX
  17251 	SHRL  $0x05, BX
  17252 	LEAL  4(CX)(BX*1), BX
  17253 	CMPL  BX, 8(SP)
  17254 	JAE   emit_remainder_calcBlockSize
  17255 	MOVQ  (DX)(CX*1), SI
  17256 	MOVL  BX, 20(SP)
  17257 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  17258 	MOVQ  SI, R9
  17259 	MOVQ  SI, R10
  17260 	SHRQ  $0x08, R10
  17261 	SHLQ  $0x10, R9
  17262 	IMULQ R8, R9
  17263 	SHRQ  $0x33, R9
  17264 	SHLQ  $0x10, R10
  17265 	IMULQ R8, R10
  17266 	SHRQ  $0x33, R10
  17267 	MOVL  24(SP)(R9*4), BX
  17268 	MOVL  24(SP)(R10*4), DI
  17269 	MOVL  CX, 24(SP)(R9*4)
  17270 	LEAL  1(CX), R9
  17271 	MOVL  R9, 24(SP)(R10*4)
  17272 	MOVQ  SI, R9
  17273 	SHRQ  $0x10, R9
  17274 	SHLQ  $0x10, R9
  17275 	IMULQ R8, R9
  17276 	SHRQ  $0x33, R9
  17277 	MOVL  CX, R8
  17278 	SUBL  16(SP), R8
  17279 	MOVL  1(DX)(R8*1), R10
  17280 	MOVQ  SI, R8
  17281 	SHRQ  $0x08, R8
  17282 	CMPL  R8, R10
  17283 	JNE   no_repeat_found_calcBlockSize
  17284 	LEAL  1(CX), SI
  17285 	MOVL  12(SP), BX
  17286 	MOVL  SI, DI
  17287 	SUBL  16(SP), DI
  17288 	JZ    repeat_extend_back_end_calcBlockSize
  17289 
  17290 repeat_extend_back_loop_calcBlockSize:
  17291 	CMPL SI, BX
  17292 	JBE  repeat_extend_back_end_calcBlockSize
  17293 	MOVB -1(DX)(DI*1), R8
  17294 	MOVB -1(DX)(SI*1), R9
  17295 	CMPB R8, R9
  17296 	JNE  repeat_extend_back_end_calcBlockSize
  17297 	LEAL -1(SI), SI
  17298 	DECL DI
  17299 	JNZ  repeat_extend_back_loop_calcBlockSize
  17300 
  17301 repeat_extend_back_end_calcBlockSize:
  17302 	MOVL 12(SP), BX
  17303 	CMPL BX, SI
  17304 	JEQ  emit_literal_done_repeat_emit_calcBlockSize
  17305 	MOVL SI, DI
  17306 	MOVL SI, 12(SP)
  17307 	LEAQ (DX)(BX*1), R8
  17308 	SUBL BX, DI
  17309 	LEAL -1(DI), BX
  17310 	CMPL BX, $0x3c
  17311 	JB   one_byte_repeat_emit_calcBlockSize
  17312 	CMPL BX, $0x00000100
  17313 	JB   two_bytes_repeat_emit_calcBlockSize
  17314 	CMPL BX, $0x00010000
  17315 	JB   three_bytes_repeat_emit_calcBlockSize
  17316 	CMPL BX, $0x01000000
  17317 	JB   four_bytes_repeat_emit_calcBlockSize
  17318 	ADDQ $0x05, AX
  17319 	JMP  memmove_long_repeat_emit_calcBlockSize
  17320 
  17321 four_bytes_repeat_emit_calcBlockSize:
  17322 	ADDQ $0x04, AX
  17323 	JMP  memmove_long_repeat_emit_calcBlockSize
  17324 
  17325 three_bytes_repeat_emit_calcBlockSize:
  17326 	ADDQ $0x03, AX
  17327 	JMP  memmove_long_repeat_emit_calcBlockSize
  17328 
  17329 two_bytes_repeat_emit_calcBlockSize:
  17330 	ADDQ $0x02, AX
  17331 	CMPL BX, $0x40
  17332 	JB   memmove_repeat_emit_calcBlockSize
  17333 	JMP  memmove_long_repeat_emit_calcBlockSize
  17334 
  17335 one_byte_repeat_emit_calcBlockSize:
  17336 	ADDQ $0x01, AX
  17337 
  17338 memmove_repeat_emit_calcBlockSize:
  17339 	LEAQ (AX)(DI*1), AX
  17340 	JMP  emit_literal_done_repeat_emit_calcBlockSize
  17341 
  17342 memmove_long_repeat_emit_calcBlockSize:
  17343 	LEAQ (AX)(DI*1), AX
  17344 
  17345 emit_literal_done_repeat_emit_calcBlockSize:
  17346 	ADDL $0x05, CX
  17347 	MOVL CX, BX
  17348 	SUBL 16(SP), BX
  17349 	MOVQ src_len+8(FP), DI
  17350 	SUBL CX, DI
  17351 	LEAQ (DX)(CX*1), R8
  17352 	LEAQ (DX)(BX*1), BX
  17353 
  17354 	// matchLen
  17355 	XORL R10, R10
  17356 	CMPL DI, $0x08
  17357 	JB   matchlen_match4_repeat_extend_calcBlockSize
  17358 
  17359 matchlen_loopback_repeat_extend_calcBlockSize:
  17360 	MOVQ  (R8)(R10*1), R9
  17361 	XORQ  (BX)(R10*1), R9
  17362 	TESTQ R9, R9
  17363 	JZ    matchlen_loop_repeat_extend_calcBlockSize
  17364 
  17365 #ifdef GOAMD64_v3
  17366 	TZCNTQ R9, R9
  17367 
  17368 #else
  17369 	BSFQ R9, R9
  17370 
  17371 #endif
  17372 	SARQ $0x03, R9
  17373 	LEAL (R10)(R9*1), R10
  17374 	JMP  repeat_extend_forward_end_calcBlockSize
  17375 
  17376 matchlen_loop_repeat_extend_calcBlockSize:
  17377 	LEAL -8(DI), DI
  17378 	LEAL 8(R10), R10
  17379 	CMPL DI, $0x08
  17380 	JAE  matchlen_loopback_repeat_extend_calcBlockSize
  17381 	JZ   repeat_extend_forward_end_calcBlockSize
  17382 
  17383 matchlen_match4_repeat_extend_calcBlockSize:
  17384 	CMPL DI, $0x04
  17385 	JB   matchlen_match2_repeat_extend_calcBlockSize
  17386 	MOVL (R8)(R10*1), R9
  17387 	CMPL (BX)(R10*1), R9
  17388 	JNE  matchlen_match2_repeat_extend_calcBlockSize
  17389 	SUBL $0x04, DI
  17390 	LEAL 4(R10), R10
  17391 
  17392 matchlen_match2_repeat_extend_calcBlockSize:
  17393 	CMPL DI, $0x02
  17394 	JB   matchlen_match1_repeat_extend_calcBlockSize
  17395 	MOVW (R8)(R10*1), R9
  17396 	CMPW (BX)(R10*1), R9
  17397 	JNE  matchlen_match1_repeat_extend_calcBlockSize
  17398 	SUBL $0x02, DI
  17399 	LEAL 2(R10), R10
  17400 
  17401 matchlen_match1_repeat_extend_calcBlockSize:
  17402 	CMPL DI, $0x01
  17403 	JB   repeat_extend_forward_end_calcBlockSize
  17404 	MOVB (R8)(R10*1), R9
  17405 	CMPB (BX)(R10*1), R9
  17406 	JNE  repeat_extend_forward_end_calcBlockSize
  17407 	LEAL 1(R10), R10
  17408 
  17409 repeat_extend_forward_end_calcBlockSize:
  17410 	ADDL R10, CX
  17411 	MOVL CX, BX
  17412 	SUBL SI, BX
  17413 	MOVL 16(SP), SI
  17414 
  17415 	// emitCopy
  17416 	CMPL SI, $0x00010000
  17417 	JB   two_byte_offset_repeat_as_copy_calcBlockSize
  17418 
  17419 four_bytes_loop_back_repeat_as_copy_calcBlockSize:
  17420 	CMPL BX, $0x40
  17421 	JBE  four_bytes_remain_repeat_as_copy_calcBlockSize
  17422 	LEAL -64(BX), BX
  17423 	ADDQ $0x05, AX
  17424 	CMPL BX, $0x04
  17425 	JB   four_bytes_remain_repeat_as_copy_calcBlockSize
  17426 	JMP  four_bytes_loop_back_repeat_as_copy_calcBlockSize
  17427 
  17428 four_bytes_remain_repeat_as_copy_calcBlockSize:
  17429 	TESTL BX, BX
  17430 	JZ    repeat_end_emit_calcBlockSize
  17431 	XORL  BX, BX
  17432 	ADDQ  $0x05, AX
  17433 	JMP   repeat_end_emit_calcBlockSize
  17434 
  17435 two_byte_offset_repeat_as_copy_calcBlockSize:
  17436 	CMPL BX, $0x40
  17437 	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSize
  17438 	LEAL -60(BX), BX
  17439 	ADDQ $0x03, AX
  17440 	JMP  two_byte_offset_repeat_as_copy_calcBlockSize
  17441 
  17442 two_byte_offset_short_repeat_as_copy_calcBlockSize:
  17443 	MOVL BX, DI
  17444 	SHLL $0x02, DI
  17445 	CMPL BX, $0x0c
  17446 	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
  17447 	CMPL SI, $0x00000800
  17448 	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
  17449 	ADDQ $0x02, AX
  17450 	JMP  repeat_end_emit_calcBlockSize
  17451 
  17452 emit_copy_three_repeat_as_copy_calcBlockSize:
  17453 	ADDQ $0x03, AX
  17454 
  17455 repeat_end_emit_calcBlockSize:
  17456 	MOVL CX, 12(SP)
  17457 	JMP  search_loop_calcBlockSize
  17458 
  17459 no_repeat_found_calcBlockSize:
  17460 	CMPL (DX)(BX*1), SI
  17461 	JEQ  candidate_match_calcBlockSize
  17462 	SHRQ $0x08, SI
  17463 	MOVL 24(SP)(R9*4), BX
  17464 	LEAL 2(CX), R8
  17465 	CMPL (DX)(DI*1), SI
  17466 	JEQ  candidate2_match_calcBlockSize
  17467 	MOVL R8, 24(SP)(R9*4)
  17468 	SHRQ $0x08, SI
  17469 	CMPL (DX)(BX*1), SI
  17470 	JEQ  candidate3_match_calcBlockSize
  17471 	MOVL 20(SP), CX
  17472 	JMP  search_loop_calcBlockSize
  17473 
  17474 candidate3_match_calcBlockSize:
  17475 	ADDL $0x02, CX
  17476 	JMP  candidate_match_calcBlockSize
  17477 
  17478 candidate2_match_calcBlockSize:
  17479 	MOVL R8, 24(SP)(R9*4)
  17480 	INCL CX
  17481 	MOVL DI, BX
  17482 
  17483 candidate_match_calcBlockSize:
  17484 	MOVL  12(SP), SI
  17485 	TESTL BX, BX
  17486 	JZ    match_extend_back_end_calcBlockSize
  17487 
  17488 match_extend_back_loop_calcBlockSize:
  17489 	CMPL CX, SI
  17490 	JBE  match_extend_back_end_calcBlockSize
  17491 	MOVB -1(DX)(BX*1), DI
  17492 	MOVB -1(DX)(CX*1), R8
  17493 	CMPB DI, R8
  17494 	JNE  match_extend_back_end_calcBlockSize
  17495 	LEAL -1(CX), CX
  17496 	DECL BX
  17497 	JZ   match_extend_back_end_calcBlockSize
  17498 	JMP  match_extend_back_loop_calcBlockSize
  17499 
  17500 match_extend_back_end_calcBlockSize:
  17501 	MOVL CX, SI
  17502 	SUBL 12(SP), SI
  17503 	LEAQ 5(AX)(SI*1), SI
  17504 	CMPQ SI, (SP)
  17505 	JB   match_dst_size_check_calcBlockSize
  17506 	MOVQ $0x00000000, ret+24(FP)
  17507 	RET
  17508 
  17509 match_dst_size_check_calcBlockSize:
  17510 	MOVL CX, SI
  17511 	MOVL 12(SP), DI
  17512 	CMPL DI, SI
  17513 	JEQ  emit_literal_done_match_emit_calcBlockSize
  17514 	MOVL SI, R8
  17515 	MOVL SI, 12(SP)
  17516 	LEAQ (DX)(DI*1), SI
  17517 	SUBL DI, R8
  17518 	LEAL -1(R8), SI
  17519 	CMPL SI, $0x3c
  17520 	JB   one_byte_match_emit_calcBlockSize
  17521 	CMPL SI, $0x00000100
  17522 	JB   two_bytes_match_emit_calcBlockSize
  17523 	CMPL SI, $0x00010000
  17524 	JB   three_bytes_match_emit_calcBlockSize
  17525 	CMPL SI, $0x01000000
  17526 	JB   four_bytes_match_emit_calcBlockSize
  17527 	ADDQ $0x05, AX
  17528 	JMP  memmove_long_match_emit_calcBlockSize
  17529 
  17530 four_bytes_match_emit_calcBlockSize:
  17531 	ADDQ $0x04, AX
  17532 	JMP  memmove_long_match_emit_calcBlockSize
  17533 
  17534 three_bytes_match_emit_calcBlockSize:
  17535 	ADDQ $0x03, AX
  17536 	JMP  memmove_long_match_emit_calcBlockSize
  17537 
  17538 two_bytes_match_emit_calcBlockSize:
  17539 	ADDQ $0x02, AX
  17540 	CMPL SI, $0x40
  17541 	JB   memmove_match_emit_calcBlockSize
  17542 	JMP  memmove_long_match_emit_calcBlockSize
  17543 
  17544 one_byte_match_emit_calcBlockSize:
  17545 	ADDQ $0x01, AX
  17546 
  17547 memmove_match_emit_calcBlockSize:
  17548 	LEAQ (AX)(R8*1), AX
  17549 	JMP  emit_literal_done_match_emit_calcBlockSize
  17550 
  17551 memmove_long_match_emit_calcBlockSize:
  17552 	LEAQ (AX)(R8*1), AX
  17553 
  17554 emit_literal_done_match_emit_calcBlockSize:
  17555 match_nolit_loop_calcBlockSize:
  17556 	MOVL CX, SI
  17557 	SUBL BX, SI
  17558 	MOVL SI, 16(SP)
  17559 	ADDL $0x04, CX
  17560 	ADDL $0x04, BX
  17561 	MOVQ src_len+8(FP), SI
  17562 	SUBL CX, SI
  17563 	LEAQ (DX)(CX*1), DI
  17564 	LEAQ (DX)(BX*1), BX
  17565 
  17566 	// matchLen
  17567 	XORL R9, R9
  17568 	CMPL SI, $0x08
  17569 	JB   matchlen_match4_match_nolit_calcBlockSize
  17570 
  17571 matchlen_loopback_match_nolit_calcBlockSize:
  17572 	MOVQ  (DI)(R9*1), R8
  17573 	XORQ  (BX)(R9*1), R8
  17574 	TESTQ R8, R8
  17575 	JZ    matchlen_loop_match_nolit_calcBlockSize
  17576 
  17577 #ifdef GOAMD64_v3
  17578 	TZCNTQ R8, R8
  17579 
  17580 #else
  17581 	BSFQ R8, R8
  17582 
  17583 #endif
  17584 	SARQ $0x03, R8
  17585 	LEAL (R9)(R8*1), R9
  17586 	JMP  match_nolit_end_calcBlockSize
  17587 
  17588 matchlen_loop_match_nolit_calcBlockSize:
  17589 	LEAL -8(SI), SI
  17590 	LEAL 8(R9), R9
  17591 	CMPL SI, $0x08
  17592 	JAE  matchlen_loopback_match_nolit_calcBlockSize
  17593 	JZ   match_nolit_end_calcBlockSize
  17594 
  17595 matchlen_match4_match_nolit_calcBlockSize:
  17596 	CMPL SI, $0x04
  17597 	JB   matchlen_match2_match_nolit_calcBlockSize
  17598 	MOVL (DI)(R9*1), R8
  17599 	CMPL (BX)(R9*1), R8
  17600 	JNE  matchlen_match2_match_nolit_calcBlockSize
  17601 	SUBL $0x04, SI
  17602 	LEAL 4(R9), R9
  17603 
  17604 matchlen_match2_match_nolit_calcBlockSize:
  17605 	CMPL SI, $0x02
  17606 	JB   matchlen_match1_match_nolit_calcBlockSize
  17607 	MOVW (DI)(R9*1), R8
  17608 	CMPW (BX)(R9*1), R8
  17609 	JNE  matchlen_match1_match_nolit_calcBlockSize
  17610 	SUBL $0x02, SI
  17611 	LEAL 2(R9), R9
  17612 
  17613 matchlen_match1_match_nolit_calcBlockSize:
  17614 	CMPL SI, $0x01
  17615 	JB   match_nolit_end_calcBlockSize
  17616 	MOVB (DI)(R9*1), R8
  17617 	CMPB (BX)(R9*1), R8
  17618 	JNE  match_nolit_end_calcBlockSize
  17619 	LEAL 1(R9), R9
  17620 
  17621 match_nolit_end_calcBlockSize:
  17622 	ADDL R9, CX
  17623 	MOVL 16(SP), BX
  17624 	ADDL $0x04, R9
  17625 	MOVL CX, 12(SP)
  17626 
  17627 	// emitCopy
  17628 	CMPL BX, $0x00010000
  17629 	JB   two_byte_offset_match_nolit_calcBlockSize
  17630 
  17631 four_bytes_loop_back_match_nolit_calcBlockSize:
  17632 	CMPL R9, $0x40
  17633 	JBE  four_bytes_remain_match_nolit_calcBlockSize
  17634 	LEAL -64(R9), R9
  17635 	ADDQ $0x05, AX
  17636 	CMPL R9, $0x04
  17637 	JB   four_bytes_remain_match_nolit_calcBlockSize
  17638 	JMP  four_bytes_loop_back_match_nolit_calcBlockSize
  17639 
  17640 four_bytes_remain_match_nolit_calcBlockSize:
  17641 	TESTL R9, R9
  17642 	JZ    match_nolit_emitcopy_end_calcBlockSize
  17643 	XORL  BX, BX
  17644 	ADDQ  $0x05, AX
  17645 	JMP   match_nolit_emitcopy_end_calcBlockSize
  17646 
  17647 two_byte_offset_match_nolit_calcBlockSize:
  17648 	CMPL R9, $0x40
  17649 	JBE  two_byte_offset_short_match_nolit_calcBlockSize
  17650 	LEAL -60(R9), R9
  17651 	ADDQ $0x03, AX
  17652 	JMP  two_byte_offset_match_nolit_calcBlockSize
  17653 
  17654 two_byte_offset_short_match_nolit_calcBlockSize:
  17655 	MOVL R9, SI
  17656 	SHLL $0x02, SI
  17657 	CMPL R9, $0x0c
  17658 	JAE  emit_copy_three_match_nolit_calcBlockSize
  17659 	CMPL BX, $0x00000800
  17660 	JAE  emit_copy_three_match_nolit_calcBlockSize
  17661 	ADDQ $0x02, AX
  17662 	JMP  match_nolit_emitcopy_end_calcBlockSize
  17663 
  17664 emit_copy_three_match_nolit_calcBlockSize:
  17665 	ADDQ $0x03, AX
  17666 
  17667 match_nolit_emitcopy_end_calcBlockSize:
  17668 	CMPL CX, 8(SP)
  17669 	JAE  emit_remainder_calcBlockSize
  17670 	MOVQ -2(DX)(CX*1), SI
  17671 	CMPQ AX, (SP)
  17672 	JB   match_nolit_dst_ok_calcBlockSize
  17673 	MOVQ $0x00000000, ret+24(FP)
  17674 	RET
  17675 
  17676 match_nolit_dst_ok_calcBlockSize:
  17677 	MOVQ  $0x0000cf1bbcdcbf9b, R8
  17678 	MOVQ  SI, DI
  17679 	SHRQ  $0x10, SI
  17680 	MOVQ  SI, BX
  17681 	SHLQ  $0x10, DI
  17682 	IMULQ R8, DI
  17683 	SHRQ  $0x33, DI
  17684 	SHLQ  $0x10, BX
  17685 	IMULQ R8, BX
  17686 	SHRQ  $0x33, BX
  17687 	LEAL  -2(CX), R8
  17688 	LEAQ  24(SP)(BX*4), R9
  17689 	MOVL  (R9), BX
  17690 	MOVL  R8, 24(SP)(DI*4)
  17691 	MOVL  CX, (R9)
  17692 	CMPL  (DX)(BX*1), SI
  17693 	JEQ   match_nolit_loop_calcBlockSize
  17694 	INCL  CX
  17695 	JMP   search_loop_calcBlockSize
  17696 
  17697 emit_remainder_calcBlockSize:
  17698 	MOVQ src_len+8(FP), CX
  17699 	SUBL 12(SP), CX
  17700 	LEAQ 5(AX)(CX*1), CX
  17701 	CMPQ CX, (SP)
  17702 	JB   emit_remainder_ok_calcBlockSize
  17703 	MOVQ $0x00000000, ret+24(FP)
  17704 	RET
  17705 
  17706 emit_remainder_ok_calcBlockSize:
  17707 	MOVQ src_len+8(FP), CX
  17708 	MOVL 12(SP), BX
  17709 	CMPL BX, CX
  17710 	JEQ  emit_literal_done_emit_remainder_calcBlockSize
  17711 	MOVL CX, SI
  17712 	MOVL CX, 12(SP)
  17713 	LEAQ (DX)(BX*1), CX
  17714 	SUBL BX, SI
  17715 	LEAL -1(SI), CX
  17716 	CMPL CX, $0x3c
  17717 	JB   one_byte_emit_remainder_calcBlockSize
  17718 	CMPL CX, $0x00000100
  17719 	JB   two_bytes_emit_remainder_calcBlockSize
  17720 	CMPL CX, $0x00010000
  17721 	JB   three_bytes_emit_remainder_calcBlockSize
  17722 	CMPL CX, $0x01000000
  17723 	JB   four_bytes_emit_remainder_calcBlockSize
  17724 	ADDQ $0x05, AX
  17725 	JMP  memmove_long_emit_remainder_calcBlockSize
  17726 
  17727 four_bytes_emit_remainder_calcBlockSize:
  17728 	ADDQ $0x04, AX
  17729 	JMP  memmove_long_emit_remainder_calcBlockSize
  17730 
  17731 three_bytes_emit_remainder_calcBlockSize:
  17732 	ADDQ $0x03, AX
  17733 	JMP  memmove_long_emit_remainder_calcBlockSize
  17734 
  17735 two_bytes_emit_remainder_calcBlockSize:
  17736 	ADDQ $0x02, AX
  17737 	CMPL CX, $0x40
  17738 	JB   memmove_emit_remainder_calcBlockSize
  17739 	JMP  memmove_long_emit_remainder_calcBlockSize
  17740 
  17741 one_byte_emit_remainder_calcBlockSize:
  17742 	ADDQ $0x01, AX
  17743 
  17744 memmove_emit_remainder_calcBlockSize:
  17745 	LEAQ (AX)(SI*1), AX
  17746 	JMP  emit_literal_done_emit_remainder_calcBlockSize
  17747 
  17748 memmove_long_emit_remainder_calcBlockSize:
  17749 	LEAQ (AX)(SI*1), AX
  17750 
  17751 emit_literal_done_emit_remainder_calcBlockSize:
  17752 	MOVQ AX, ret+24(FP)
  17753 	RET
  17754 
  17755 // func calcBlockSizeSmall(src []byte) int
  17756 // Requires: BMI, SSE2
  17757 TEXT ·calcBlockSizeSmall(SB), $2072-32
  17758 	XORQ AX, AX
  17759 	MOVQ $0x00000010, CX
  17760 	LEAQ 24(SP), DX
  17761 	PXOR X0, X0
  17762 
  17763 zero_loop_calcBlockSizeSmall:
  17764 	MOVOU X0, (DX)
  17765 	MOVOU X0, 16(DX)
  17766 	MOVOU X0, 32(DX)
  17767 	MOVOU X0, 48(DX)
  17768 	MOVOU X0, 64(DX)
  17769 	MOVOU X0, 80(DX)
  17770 	MOVOU X0, 96(DX)
  17771 	MOVOU X0, 112(DX)
  17772 	ADDQ  $0x80, DX
  17773 	DECQ  CX
  17774 	JNZ   zero_loop_calcBlockSizeSmall
  17775 	MOVL  $0x00000000, 12(SP)
  17776 	MOVQ  src_len+8(FP), CX
  17777 	LEAQ  -9(CX), DX
  17778 	LEAQ  -8(CX), BX
  17779 	MOVL  BX, 8(SP)
  17780 	SHRQ  $0x05, CX
  17781 	SUBL  CX, DX
  17782 	LEAQ  (AX)(DX*1), DX
  17783 	MOVQ  DX, (SP)
  17784 	MOVL  $0x00000001, CX
  17785 	MOVL  CX, 16(SP)
  17786 	MOVQ  src_base+0(FP), DX
  17787 
  17788 search_loop_calcBlockSizeSmall:
  17789 	MOVL  CX, BX
  17790 	SUBL  12(SP), BX
  17791 	SHRL  $0x04, BX
  17792 	LEAL  4(CX)(BX*1), BX
  17793 	CMPL  BX, 8(SP)
  17794 	JAE   emit_remainder_calcBlockSizeSmall
  17795 	MOVQ  (DX)(CX*1), SI
  17796 	MOVL  BX, 20(SP)
  17797 	MOVQ  $0x9e3779b1, R8
  17798 	MOVQ  SI, R9
  17799 	MOVQ  SI, R10
  17800 	SHRQ  $0x08, R10
  17801 	SHLQ  $0x20, R9
  17802 	IMULQ R8, R9
  17803 	SHRQ  $0x37, R9
  17804 	SHLQ  $0x20, R10
  17805 	IMULQ R8, R10
  17806 	SHRQ  $0x37, R10
  17807 	MOVL  24(SP)(R9*4), BX
  17808 	MOVL  24(SP)(R10*4), DI
  17809 	MOVL  CX, 24(SP)(R9*4)
  17810 	LEAL  1(CX), R9
  17811 	MOVL  R9, 24(SP)(R10*4)
  17812 	MOVQ  SI, R9
  17813 	SHRQ  $0x10, R9
  17814 	SHLQ  $0x20, R9
  17815 	IMULQ R8, R9
  17816 	SHRQ  $0x37, R9
  17817 	MOVL  CX, R8
  17818 	SUBL  16(SP), R8
  17819 	MOVL  1(DX)(R8*1), R10
  17820 	MOVQ  SI, R8
  17821 	SHRQ  $0x08, R8
  17822 	CMPL  R8, R10
  17823 	JNE   no_repeat_found_calcBlockSizeSmall
  17824 	LEAL  1(CX), SI
  17825 	MOVL  12(SP), BX
  17826 	MOVL  SI, DI
  17827 	SUBL  16(SP), DI
  17828 	JZ    repeat_extend_back_end_calcBlockSizeSmall
  17829 
  17830 repeat_extend_back_loop_calcBlockSizeSmall:
  17831 	CMPL SI, BX
  17832 	JBE  repeat_extend_back_end_calcBlockSizeSmall
  17833 	MOVB -1(DX)(DI*1), R8
  17834 	MOVB -1(DX)(SI*1), R9
  17835 	CMPB R8, R9
  17836 	JNE  repeat_extend_back_end_calcBlockSizeSmall
  17837 	LEAL -1(SI), SI
  17838 	DECL DI
  17839 	JNZ  repeat_extend_back_loop_calcBlockSizeSmall
  17840 
  17841 repeat_extend_back_end_calcBlockSizeSmall:
  17842 	MOVL 12(SP), BX
  17843 	CMPL BX, SI
  17844 	JEQ  emit_literal_done_repeat_emit_calcBlockSizeSmall
  17845 	MOVL SI, DI
  17846 	MOVL SI, 12(SP)
  17847 	LEAQ (DX)(BX*1), R8
  17848 	SUBL BX, DI
  17849 	LEAL -1(DI), BX
  17850 	CMPL BX, $0x3c
  17851 	JB   one_byte_repeat_emit_calcBlockSizeSmall
  17852 	CMPL BX, $0x00000100
  17853 	JB   two_bytes_repeat_emit_calcBlockSizeSmall
  17854 	JB   three_bytes_repeat_emit_calcBlockSizeSmall
  17855 
  17856 three_bytes_repeat_emit_calcBlockSizeSmall:
  17857 	ADDQ $0x03, AX
  17858 	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
  17859 
  17860 two_bytes_repeat_emit_calcBlockSizeSmall:
  17861 	ADDQ $0x02, AX
  17862 	CMPL BX, $0x40
  17863 	JB   memmove_repeat_emit_calcBlockSizeSmall
  17864 	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
  17865 
  17866 one_byte_repeat_emit_calcBlockSizeSmall:
  17867 	ADDQ $0x01, AX
  17868 
  17869 memmove_repeat_emit_calcBlockSizeSmall:
  17870 	LEAQ (AX)(DI*1), AX
  17871 	JMP  emit_literal_done_repeat_emit_calcBlockSizeSmall
  17872 
  17873 memmove_long_repeat_emit_calcBlockSizeSmall:
  17874 	LEAQ (AX)(DI*1), AX
  17875 
  17876 emit_literal_done_repeat_emit_calcBlockSizeSmall:
  17877 	ADDL $0x05, CX
  17878 	MOVL CX, BX
  17879 	SUBL 16(SP), BX
  17880 	MOVQ src_len+8(FP), DI
  17881 	SUBL CX, DI
  17882 	LEAQ (DX)(CX*1), R8
  17883 	LEAQ (DX)(BX*1), BX
  17884 
  17885 	// matchLen
  17886 	XORL R10, R10
  17887 	CMPL DI, $0x08
  17888 	JB   matchlen_match4_repeat_extend_calcBlockSizeSmall
  17889 
  17890 matchlen_loopback_repeat_extend_calcBlockSizeSmall:
  17891 	MOVQ  (R8)(R10*1), R9
  17892 	XORQ  (BX)(R10*1), R9
  17893 	TESTQ R9, R9
  17894 	JZ    matchlen_loop_repeat_extend_calcBlockSizeSmall
  17895 
  17896 #ifdef GOAMD64_v3
  17897 	TZCNTQ R9, R9
  17898 
  17899 #else
  17900 	BSFQ R9, R9
  17901 
  17902 #endif
  17903 	SARQ $0x03, R9
  17904 	LEAL (R10)(R9*1), R10
  17905 	JMP  repeat_extend_forward_end_calcBlockSizeSmall
  17906 
  17907 matchlen_loop_repeat_extend_calcBlockSizeSmall:
  17908 	LEAL -8(DI), DI
  17909 	LEAL 8(R10), R10
  17910 	CMPL DI, $0x08
  17911 	JAE  matchlen_loopback_repeat_extend_calcBlockSizeSmall
  17912 	JZ   repeat_extend_forward_end_calcBlockSizeSmall
  17913 
  17914 matchlen_match4_repeat_extend_calcBlockSizeSmall:
  17915 	CMPL DI, $0x04
  17916 	JB   matchlen_match2_repeat_extend_calcBlockSizeSmall
  17917 	MOVL (R8)(R10*1), R9
  17918 	CMPL (BX)(R10*1), R9
  17919 	JNE  matchlen_match2_repeat_extend_calcBlockSizeSmall
  17920 	SUBL $0x04, DI
  17921 	LEAL 4(R10), R10
  17922 
  17923 matchlen_match2_repeat_extend_calcBlockSizeSmall:
  17924 	CMPL DI, $0x02
  17925 	JB   matchlen_match1_repeat_extend_calcBlockSizeSmall
  17926 	MOVW (R8)(R10*1), R9
  17927 	CMPW (BX)(R10*1), R9
  17928 	JNE  matchlen_match1_repeat_extend_calcBlockSizeSmall
  17929 	SUBL $0x02, DI
  17930 	LEAL 2(R10), R10
  17931 
  17932 matchlen_match1_repeat_extend_calcBlockSizeSmall:
  17933 	CMPL DI, $0x01
  17934 	JB   repeat_extend_forward_end_calcBlockSizeSmall
  17935 	MOVB (R8)(R10*1), R9
  17936 	CMPB (BX)(R10*1), R9
  17937 	JNE  repeat_extend_forward_end_calcBlockSizeSmall
  17938 	LEAL 1(R10), R10
  17939 
  17940 repeat_extend_forward_end_calcBlockSizeSmall:
  17941 	ADDL R10, CX
  17942 	MOVL CX, BX
  17943 	SUBL SI, BX
  17944 	MOVL 16(SP), SI
  17945 
  17946 	// emitCopy
  17947 two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
  17948 	CMPL BX, $0x40
  17949 	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
  17950 	LEAL -60(BX), BX
  17951 	ADDQ $0x03, AX
  17952 	JMP  two_byte_offset_repeat_as_copy_calcBlockSizeSmall
  17953 
  17954 two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
  17955 	MOVL BX, SI
  17956 	SHLL $0x02, SI
  17957 	CMPL BX, $0x0c
  17958 	JAE  emit_copy_three_repeat_as_copy_calcBlockSizeSmall
  17959 	ADDQ $0x02, AX
  17960 	JMP  repeat_end_emit_calcBlockSizeSmall
  17961 
  17962 emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
  17963 	ADDQ $0x03, AX
  17964 
  17965 repeat_end_emit_calcBlockSizeSmall:
  17966 	MOVL CX, 12(SP)
  17967 	JMP  search_loop_calcBlockSizeSmall
  17968 
  17969 no_repeat_found_calcBlockSizeSmall:
  17970 	CMPL (DX)(BX*1), SI
  17971 	JEQ  candidate_match_calcBlockSizeSmall
  17972 	SHRQ $0x08, SI
  17973 	MOVL 24(SP)(R9*4), BX
  17974 	LEAL 2(CX), R8
  17975 	CMPL (DX)(DI*1), SI
  17976 	JEQ  candidate2_match_calcBlockSizeSmall
  17977 	MOVL R8, 24(SP)(R9*4)
  17978 	SHRQ $0x08, SI
  17979 	CMPL (DX)(BX*1), SI
  17980 	JEQ  candidate3_match_calcBlockSizeSmall
  17981 	MOVL 20(SP), CX
  17982 	JMP  search_loop_calcBlockSizeSmall
  17983 
  17984 candidate3_match_calcBlockSizeSmall:
  17985 	ADDL $0x02, CX
  17986 	JMP  candidate_match_calcBlockSizeSmall
  17987 
  17988 candidate2_match_calcBlockSizeSmall:
  17989 	MOVL R8, 24(SP)(R9*4)
  17990 	INCL CX
  17991 	MOVL DI, BX
  17992 
  17993 candidate_match_calcBlockSizeSmall:
  17994 	MOVL  12(SP), SI
  17995 	TESTL BX, BX
  17996 	JZ    match_extend_back_end_calcBlockSizeSmall
  17997 
  17998 match_extend_back_loop_calcBlockSizeSmall:
  17999 	CMPL CX, SI
  18000 	JBE  match_extend_back_end_calcBlockSizeSmall
  18001 	MOVB -1(DX)(BX*1), DI
  18002 	MOVB -1(DX)(CX*1), R8
  18003 	CMPB DI, R8
  18004 	JNE  match_extend_back_end_calcBlockSizeSmall
  18005 	LEAL -1(CX), CX
  18006 	DECL BX
  18007 	JZ   match_extend_back_end_calcBlockSizeSmall
  18008 	JMP  match_extend_back_loop_calcBlockSizeSmall
  18009 
  18010 match_extend_back_end_calcBlockSizeSmall:
  18011 	MOVL CX, SI
  18012 	SUBL 12(SP), SI
  18013 	LEAQ 3(AX)(SI*1), SI
  18014 	CMPQ SI, (SP)
  18015 	JB   match_dst_size_check_calcBlockSizeSmall
  18016 	MOVQ $0x00000000, ret+24(FP)
  18017 	RET
  18018 
  18019 match_dst_size_check_calcBlockSizeSmall:
  18020 	MOVL CX, SI
  18021 	MOVL 12(SP), DI
  18022 	CMPL DI, SI
  18023 	JEQ  emit_literal_done_match_emit_calcBlockSizeSmall
  18024 	MOVL SI, R8
  18025 	MOVL SI, 12(SP)
  18026 	LEAQ (DX)(DI*1), SI
  18027 	SUBL DI, R8
  18028 	LEAL -1(R8), SI
  18029 	CMPL SI, $0x3c
  18030 	JB   one_byte_match_emit_calcBlockSizeSmall
  18031 	CMPL SI, $0x00000100
  18032 	JB   two_bytes_match_emit_calcBlockSizeSmall
  18033 	JB   three_bytes_match_emit_calcBlockSizeSmall
  18034 
  18035 three_bytes_match_emit_calcBlockSizeSmall:
  18036 	ADDQ $0x03, AX
  18037 	JMP  memmove_long_match_emit_calcBlockSizeSmall
  18038 
  18039 two_bytes_match_emit_calcBlockSizeSmall:
  18040 	ADDQ $0x02, AX
  18041 	CMPL SI, $0x40
  18042 	JB   memmove_match_emit_calcBlockSizeSmall
  18043 	JMP  memmove_long_match_emit_calcBlockSizeSmall
  18044 
  18045 one_byte_match_emit_calcBlockSizeSmall:
  18046 	ADDQ $0x01, AX
  18047 
  18048 memmove_match_emit_calcBlockSizeSmall:
  18049 	LEAQ (AX)(R8*1), AX
  18050 	JMP  emit_literal_done_match_emit_calcBlockSizeSmall
  18051 
  18052 memmove_long_match_emit_calcBlockSizeSmall:
  18053 	LEAQ (AX)(R8*1), AX
  18054 
  18055 emit_literal_done_match_emit_calcBlockSizeSmall:
  18056 match_nolit_loop_calcBlockSizeSmall:
  18057 	MOVL CX, SI
  18058 	SUBL BX, SI
  18059 	MOVL SI, 16(SP)
  18060 	ADDL $0x04, CX
  18061 	ADDL $0x04, BX
  18062 	MOVQ src_len+8(FP), SI
  18063 	SUBL CX, SI
  18064 	LEAQ (DX)(CX*1), DI
  18065 	LEAQ (DX)(BX*1), BX
  18066 
  18067 	// matchLen
  18068 	XORL R9, R9
  18069 	CMPL SI, $0x08
  18070 	JB   matchlen_match4_match_nolit_calcBlockSizeSmall
  18071 
  18072 matchlen_loopback_match_nolit_calcBlockSizeSmall:
  18073 	MOVQ  (DI)(R9*1), R8
  18074 	XORQ  (BX)(R9*1), R8
  18075 	TESTQ R8, R8
  18076 	JZ    matchlen_loop_match_nolit_calcBlockSizeSmall
  18077 
  18078 #ifdef GOAMD64_v3
  18079 	TZCNTQ R8, R8
  18080 
  18081 #else
  18082 	BSFQ R8, R8
  18083 
  18084 #endif
  18085 	SARQ $0x03, R8
  18086 	LEAL (R9)(R8*1), R9
  18087 	JMP  match_nolit_end_calcBlockSizeSmall
  18088 
  18089 matchlen_loop_match_nolit_calcBlockSizeSmall:
  18090 	LEAL -8(SI), SI
  18091 	LEAL 8(R9), R9
  18092 	CMPL SI, $0x08
  18093 	JAE  matchlen_loopback_match_nolit_calcBlockSizeSmall
  18094 	JZ   match_nolit_end_calcBlockSizeSmall
  18095 
  18096 matchlen_match4_match_nolit_calcBlockSizeSmall:
  18097 	CMPL SI, $0x04
  18098 	JB   matchlen_match2_match_nolit_calcBlockSizeSmall
  18099 	MOVL (DI)(R9*1), R8
  18100 	CMPL (BX)(R9*1), R8
  18101 	JNE  matchlen_match2_match_nolit_calcBlockSizeSmall
  18102 	SUBL $0x04, SI
  18103 	LEAL 4(R9), R9
  18104 
  18105 matchlen_match2_match_nolit_calcBlockSizeSmall:
  18106 	CMPL SI, $0x02
  18107 	JB   matchlen_match1_match_nolit_calcBlockSizeSmall
  18108 	MOVW (DI)(R9*1), R8
  18109 	CMPW (BX)(R9*1), R8
  18110 	JNE  matchlen_match1_match_nolit_calcBlockSizeSmall
  18111 	SUBL $0x02, SI
  18112 	LEAL 2(R9), R9
  18113 
  18114 matchlen_match1_match_nolit_calcBlockSizeSmall:
  18115 	CMPL SI, $0x01
  18116 	JB   match_nolit_end_calcBlockSizeSmall
  18117 	MOVB (DI)(R9*1), R8
  18118 	CMPB (BX)(R9*1), R8
  18119 	JNE  match_nolit_end_calcBlockSizeSmall
  18120 	LEAL 1(R9), R9
  18121 
  18122 match_nolit_end_calcBlockSizeSmall:
  18123 	ADDL R9, CX
  18124 	MOVL 16(SP), BX
  18125 	ADDL $0x04, R9
  18126 	MOVL CX, 12(SP)
  18127 
  18128 	// emitCopy
  18129 two_byte_offset_match_nolit_calcBlockSizeSmall:
  18130 	CMPL R9, $0x40
  18131 	JBE  two_byte_offset_short_match_nolit_calcBlockSizeSmall
  18132 	LEAL -60(R9), R9
  18133 	ADDQ $0x03, AX
  18134 	JMP  two_byte_offset_match_nolit_calcBlockSizeSmall
  18135 
  18136 two_byte_offset_short_match_nolit_calcBlockSizeSmall:
  18137 	MOVL R9, BX
  18138 	SHLL $0x02, BX
  18139 	CMPL R9, $0x0c
  18140 	JAE  emit_copy_three_match_nolit_calcBlockSizeSmall
  18141 	ADDQ $0x02, AX
  18142 	JMP  match_nolit_emitcopy_end_calcBlockSizeSmall
  18143 
  18144 emit_copy_three_match_nolit_calcBlockSizeSmall:
  18145 	ADDQ $0x03, AX
  18146 
  18147 match_nolit_emitcopy_end_calcBlockSizeSmall:
  18148 	CMPL CX, 8(SP)
  18149 	JAE  emit_remainder_calcBlockSizeSmall
  18150 	MOVQ -2(DX)(CX*1), SI
  18151 	CMPQ AX, (SP)
  18152 	JB   match_nolit_dst_ok_calcBlockSizeSmall
  18153 	MOVQ $0x00000000, ret+24(FP)
  18154 	RET
  18155 
  18156 match_nolit_dst_ok_calcBlockSizeSmall:
  18157 	MOVQ  $0x9e3779b1, R8
  18158 	MOVQ  SI, DI
  18159 	SHRQ  $0x10, SI
  18160 	MOVQ  SI, BX
  18161 	SHLQ  $0x20, DI
  18162 	IMULQ R8, DI
  18163 	SHRQ  $0x37, DI
  18164 	SHLQ  $0x20, BX
  18165 	IMULQ R8, BX
  18166 	SHRQ  $0x37, BX
  18167 	LEAL  -2(CX), R8
  18168 	LEAQ  24(SP)(BX*4), R9
  18169 	MOVL  (R9), BX
  18170 	MOVL  R8, 24(SP)(DI*4)
  18171 	MOVL  CX, (R9)
  18172 	CMPL  (DX)(BX*1), SI
  18173 	JEQ   match_nolit_loop_calcBlockSizeSmall
  18174 	INCL  CX
  18175 	JMP   search_loop_calcBlockSizeSmall
  18176 
  18177 emit_remainder_calcBlockSizeSmall:
  18178 	MOVQ src_len+8(FP), CX
  18179 	SUBL 12(SP), CX
  18180 	LEAQ 3(AX)(CX*1), CX
  18181 	CMPQ CX, (SP)
  18182 	JB   emit_remainder_ok_calcBlockSizeSmall
  18183 	MOVQ $0x00000000, ret+24(FP)
  18184 	RET
  18185 
  18186 emit_remainder_ok_calcBlockSizeSmall:
  18187 	MOVQ src_len+8(FP), CX
  18188 	MOVL 12(SP), BX
  18189 	CMPL BX, CX
  18190 	JEQ  emit_literal_done_emit_remainder_calcBlockSizeSmall
  18191 	MOVL CX, SI
  18192 	MOVL CX, 12(SP)
  18193 	LEAQ (DX)(BX*1), CX
  18194 	SUBL BX, SI
  18195 	LEAL -1(SI), CX
  18196 	CMPL CX, $0x3c
  18197 	JB   one_byte_emit_remainder_calcBlockSizeSmall
  18198 	CMPL CX, $0x00000100
  18199 	JB   two_bytes_emit_remainder_calcBlockSizeSmall
  18200 	JB   three_bytes_emit_remainder_calcBlockSizeSmall
  18201 
  18202 three_bytes_emit_remainder_calcBlockSizeSmall:
  18203 	ADDQ $0x03, AX
  18204 	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
  18205 
  18206 two_bytes_emit_remainder_calcBlockSizeSmall:
  18207 	ADDQ $0x02, AX
  18208 	CMPL CX, $0x40
  18209 	JB   memmove_emit_remainder_calcBlockSizeSmall
  18210 	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
  18211 
  18212 one_byte_emit_remainder_calcBlockSizeSmall:
  18213 	ADDQ $0x01, AX
  18214 
  18215 memmove_emit_remainder_calcBlockSizeSmall:
  18216 	LEAQ (AX)(SI*1), AX
  18217 	JMP  emit_literal_done_emit_remainder_calcBlockSizeSmall
  18218 
  18219 memmove_long_emit_remainder_calcBlockSizeSmall:
  18220 	LEAQ (AX)(SI*1), AX
  18221 
  18222 emit_literal_done_emit_remainder_calcBlockSizeSmall:
  18223 	MOVQ AX, ret+24(FP)
  18224 	RET
  18225 
  18226 // func emitLiteral(dst []byte, lit []byte) int
  18227 // Requires: SSE2
  18228 TEXT ·emitLiteral(SB), NOSPLIT, $0-56
  18229 	MOVQ  lit_len+32(FP), DX
  18230 	MOVQ  dst_base+0(FP), AX
  18231 	MOVQ  lit_base+24(FP), CX
  18232 	TESTQ DX, DX
  18233 	JZ    emit_literal_end_standalone_skip
  18234 	MOVL  DX, BX
  18235 	LEAL  -1(DX), SI
  18236 	CMPL  SI, $0x3c
  18237 	JB    one_byte_standalone
  18238 	CMPL  SI, $0x00000100
  18239 	JB    two_bytes_standalone
  18240 	CMPL  SI, $0x00010000
  18241 	JB    three_bytes_standalone
  18242 	CMPL  SI, $0x01000000
  18243 	JB    four_bytes_standalone
  18244 	MOVB  $0xfc, (AX)
  18245 	MOVL  SI, 1(AX)
  18246 	ADDQ  $0x05, BX
  18247 	ADDQ  $0x05, AX
  18248 	JMP   memmove_long_standalone
  18249 
  18250 four_bytes_standalone:
  18251 	MOVL SI, DI
  18252 	SHRL $0x10, DI
  18253 	MOVB $0xf8, (AX)
  18254 	MOVW SI, 1(AX)
  18255 	MOVB DI, 3(AX)
  18256 	ADDQ $0x04, BX
  18257 	ADDQ $0x04, AX
  18258 	JMP  memmove_long_standalone
  18259 
  18260 three_bytes_standalone:
  18261 	MOVB $0xf4, (AX)
  18262 	MOVW SI, 1(AX)
  18263 	ADDQ $0x03, BX
  18264 	ADDQ $0x03, AX
  18265 	JMP  memmove_long_standalone
  18266 
  18267 two_bytes_standalone:
  18268 	MOVB $0xf0, (AX)
  18269 	MOVB SI, 1(AX)
  18270 	ADDQ $0x02, BX
  18271 	ADDQ $0x02, AX
  18272 	CMPL SI, $0x40
  18273 	JB   memmove_standalone
  18274 	JMP  memmove_long_standalone
  18275 
  18276 one_byte_standalone:
  18277 	SHLB $0x02, SI
  18278 	MOVB SI, (AX)
  18279 	ADDQ $0x01, BX
  18280 	ADDQ $0x01, AX
  18281 
  18282 memmove_standalone:
  18283 	// genMemMoveShort
  18284 	CMPQ DX, $0x03
  18285 	JB   emit_lit_memmove_standalone_memmove_move_1or2
  18286 	JE   emit_lit_memmove_standalone_memmove_move_3
  18287 	CMPQ DX, $0x08
  18288 	JB   emit_lit_memmove_standalone_memmove_move_4through7
  18289 	CMPQ DX, $0x10
  18290 	JBE  emit_lit_memmove_standalone_memmove_move_8through16
  18291 	CMPQ DX, $0x20
  18292 	JBE  emit_lit_memmove_standalone_memmove_move_17through32
  18293 	JMP  emit_lit_memmove_standalone_memmove_move_33through64
  18294 
  18295 emit_lit_memmove_standalone_memmove_move_1or2:
  18296 	MOVB (CX), SI
  18297 	MOVB -1(CX)(DX*1), CL
  18298 	MOVB SI, (AX)
  18299 	MOVB CL, -1(AX)(DX*1)
  18300 	JMP  emit_literal_end_standalone
  18301 
  18302 emit_lit_memmove_standalone_memmove_move_3:
  18303 	MOVW (CX), SI
  18304 	MOVB 2(CX), CL
  18305 	MOVW SI, (AX)
  18306 	MOVB CL, 2(AX)
  18307 	JMP  emit_literal_end_standalone
  18308 
  18309 emit_lit_memmove_standalone_memmove_move_4through7:
  18310 	MOVL (CX), SI
  18311 	MOVL -4(CX)(DX*1), CX
  18312 	MOVL SI, (AX)
  18313 	MOVL CX, -4(AX)(DX*1)
  18314 	JMP  emit_literal_end_standalone
  18315 
  18316 emit_lit_memmove_standalone_memmove_move_8through16:
  18317 	MOVQ (CX), SI
  18318 	MOVQ -8(CX)(DX*1), CX
  18319 	MOVQ SI, (AX)
  18320 	MOVQ CX, -8(AX)(DX*1)
  18321 	JMP  emit_literal_end_standalone
  18322 
  18323 emit_lit_memmove_standalone_memmove_move_17through32:
  18324 	MOVOU (CX), X0
  18325 	MOVOU -16(CX)(DX*1), X1
  18326 	MOVOU X0, (AX)
  18327 	MOVOU X1, -16(AX)(DX*1)
  18328 	JMP   emit_literal_end_standalone
  18329 
  18330 emit_lit_memmove_standalone_memmove_move_33through64:
  18331 	MOVOU (CX), X0
  18332 	MOVOU 16(CX), X1
  18333 	MOVOU -32(CX)(DX*1), X2
  18334 	MOVOU -16(CX)(DX*1), X3
  18335 	MOVOU X0, (AX)
  18336 	MOVOU X1, 16(AX)
  18337 	MOVOU X2, -32(AX)(DX*1)
  18338 	MOVOU X3, -16(AX)(DX*1)
  18339 	JMP   emit_literal_end_standalone
  18340 	JMP emit_literal_end_standalone
  18341 
  18342 memmove_long_standalone:
  18343 	// genMemMoveLong
  18344 	MOVOU (CX), X0
  18345 	MOVOU 16(CX), X1
  18346 	MOVOU -32(CX)(DX*1), X2
  18347 	MOVOU -16(CX)(DX*1), X3
  18348 	MOVQ  DX, DI
  18349 	SHRQ  $0x05, DI
  18350 	MOVQ  AX, SI
  18351 	ANDL  $0x0000001f, SI
  18352 	MOVQ  $0x00000040, R8
  18353 	SUBQ  SI, R8
  18354 	DECQ  DI
  18355 	JA    emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
  18356 	LEAQ  -32(CX)(R8*1), SI
  18357 	LEAQ  -32(AX)(R8*1), R9
  18358 
  18359 emit_lit_memmove_long_standalonelarge_big_loop_back:
  18360 	MOVOU (SI), X4
  18361 	MOVOU 16(SI), X5
  18362 	MOVOA X4, (R9)
  18363 	MOVOA X5, 16(R9)
  18364 	ADDQ  $0x20, R9
  18365 	ADDQ  $0x20, SI
  18366 	ADDQ  $0x20, R8
  18367 	DECQ  DI
  18368 	JNA   emit_lit_memmove_long_standalonelarge_big_loop_back
  18369 
  18370 emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
  18371 	MOVOU -32(CX)(R8*1), X4
  18372 	MOVOU -16(CX)(R8*1), X5
  18373 	MOVOA X4, -32(AX)(R8*1)
  18374 	MOVOA X5, -16(AX)(R8*1)
  18375 	ADDQ  $0x20, R8
  18376 	CMPQ  DX, R8
  18377 	JAE   emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
  18378 	MOVOU X0, (AX)
  18379 	MOVOU X1, 16(AX)
  18380 	MOVOU X2, -32(AX)(DX*1)
  18381 	MOVOU X3, -16(AX)(DX*1)
  18382 	JMP   emit_literal_end_standalone
  18383 	JMP emit_literal_end_standalone
  18384 
  18385 emit_literal_end_standalone_skip:
  18386 	XORQ BX, BX
  18387 
  18388 emit_literal_end_standalone:
  18389 	MOVQ BX, ret+48(FP)
  18390 	RET
  18391 
  18392 // func emitRepeat(dst []byte, offset int, length int) int
  18393 TEXT ·emitRepeat(SB), NOSPLIT, $0-48
  18394 	XORQ BX, BX
  18395 	MOVQ dst_base+0(FP), AX
  18396 	MOVQ offset+24(FP), CX
  18397 	MOVQ length+32(FP), DX
  18398 
  18399 	// emitRepeat
  18400 emit_repeat_again_standalone:
  18401 	MOVL DX, SI
  18402 	LEAL -4(DX), DX
  18403 	CMPL SI, $0x08
  18404 	JBE  repeat_two_standalone
  18405 	CMPL SI, $0x0c
  18406 	JAE  cant_repeat_two_offset_standalone
  18407 	CMPL CX, $0x00000800
  18408 	JB   repeat_two_offset_standalone
  18409 
  18410 cant_repeat_two_offset_standalone:
  18411 	CMPL DX, $0x00000104
  18412 	JB   repeat_three_standalone
  18413 	CMPL DX, $0x00010100
  18414 	JB   repeat_four_standalone
  18415 	CMPL DX, $0x0100ffff
  18416 	JB   repeat_five_standalone
  18417 	LEAL -16842747(DX), DX
  18418 	MOVL $0xfffb001d, (AX)
  18419 	MOVB $0xff, 4(AX)
  18420 	ADDQ $0x05, AX
  18421 	ADDQ $0x05, BX
  18422 	JMP  emit_repeat_again_standalone
  18423 
  18424 repeat_five_standalone:
  18425 	LEAL -65536(DX), DX
  18426 	MOVL DX, CX
  18427 	MOVW $0x001d, (AX)
  18428 	MOVW DX, 2(AX)
  18429 	SARL $0x10, CX
  18430 	MOVB CL, 4(AX)
  18431 	ADDQ $0x05, BX
  18432 	ADDQ $0x05, AX
  18433 	JMP  gen_emit_repeat_end
  18434 
  18435 repeat_four_standalone:
  18436 	LEAL -256(DX), DX
  18437 	MOVW $0x0019, (AX)
  18438 	MOVW DX, 2(AX)
  18439 	ADDQ $0x04, BX
  18440 	ADDQ $0x04, AX
  18441 	JMP  gen_emit_repeat_end
  18442 
  18443 repeat_three_standalone:
  18444 	LEAL -4(DX), DX
  18445 	MOVW $0x0015, (AX)
  18446 	MOVB DL, 2(AX)
  18447 	ADDQ $0x03, BX
  18448 	ADDQ $0x03, AX
  18449 	JMP  gen_emit_repeat_end
  18450 
  18451 repeat_two_standalone:
  18452 	SHLL $0x02, DX
  18453 	ORL  $0x01, DX
  18454 	MOVW DX, (AX)
  18455 	ADDQ $0x02, BX
  18456 	ADDQ $0x02, AX
  18457 	JMP  gen_emit_repeat_end
  18458 
  18459 repeat_two_offset_standalone:
  18460 	XORQ SI, SI
  18461 	LEAL 1(SI)(DX*4), DX
  18462 	MOVB CL, 1(AX)
  18463 	SARL $0x08, CX
  18464 	SHLL $0x05, CX
  18465 	ORL  CX, DX
  18466 	MOVB DL, (AX)
  18467 	ADDQ $0x02, BX
  18468 	ADDQ $0x02, AX
  18469 
  18470 gen_emit_repeat_end:
  18471 	MOVQ BX, ret+40(FP)
  18472 	RET
  18473 
  18474 // func emitCopy(dst []byte, offset int, length int) int
  18475 TEXT ·emitCopy(SB), NOSPLIT, $0-48
  18476 	XORQ BX, BX
  18477 	MOVQ dst_base+0(FP), AX
  18478 	MOVQ offset+24(FP), CX
  18479 	MOVQ length+32(FP), DX
  18480 
  18481 	// emitCopy
  18482 	CMPL CX, $0x00010000
  18483 	JB   two_byte_offset_standalone
  18484 	CMPL DX, $0x40
  18485 	JBE  four_bytes_remain_standalone
  18486 	MOVB $0xff, (AX)
  18487 	MOVL CX, 1(AX)
  18488 	LEAL -64(DX), DX
  18489 	ADDQ $0x05, BX
  18490 	ADDQ $0x05, AX
  18491 	CMPL DX, $0x04
  18492 	JB   four_bytes_remain_standalone
  18493 
  18494 	// emitRepeat
  18495 emit_repeat_again_standalone_emit_copy:
  18496 	MOVL DX, SI
  18497 	LEAL -4(DX), DX
  18498 	CMPL SI, $0x08
  18499 	JBE  repeat_two_standalone_emit_copy
  18500 	CMPL SI, $0x0c
  18501 	JAE  cant_repeat_two_offset_standalone_emit_copy
  18502 	CMPL CX, $0x00000800
  18503 	JB   repeat_two_offset_standalone_emit_copy
  18504 
  18505 cant_repeat_two_offset_standalone_emit_copy:
  18506 	CMPL DX, $0x00000104
  18507 	JB   repeat_three_standalone_emit_copy
  18508 	CMPL DX, $0x00010100
  18509 	JB   repeat_four_standalone_emit_copy
  18510 	CMPL DX, $0x0100ffff
  18511 	JB   repeat_five_standalone_emit_copy
  18512 	LEAL -16842747(DX), DX
  18513 	MOVL $0xfffb001d, (AX)
  18514 	MOVB $0xff, 4(AX)
  18515 	ADDQ $0x05, AX
  18516 	ADDQ $0x05, BX
  18517 	JMP  emit_repeat_again_standalone_emit_copy
  18518 
  18519 repeat_five_standalone_emit_copy:
  18520 	LEAL -65536(DX), DX
  18521 	MOVL DX, CX
  18522 	MOVW $0x001d, (AX)
  18523 	MOVW DX, 2(AX)
  18524 	SARL $0x10, CX
  18525 	MOVB CL, 4(AX)
  18526 	ADDQ $0x05, BX
  18527 	ADDQ $0x05, AX
  18528 	JMP  gen_emit_copy_end
  18529 
  18530 repeat_four_standalone_emit_copy:
  18531 	LEAL -256(DX), DX
  18532 	MOVW $0x0019, (AX)
  18533 	MOVW DX, 2(AX)
  18534 	ADDQ $0x04, BX
  18535 	ADDQ $0x04, AX
  18536 	JMP  gen_emit_copy_end
  18537 
  18538 repeat_three_standalone_emit_copy:
  18539 	LEAL -4(DX), DX
  18540 	MOVW $0x0015, (AX)
  18541 	MOVB DL, 2(AX)
  18542 	ADDQ $0x03, BX
  18543 	ADDQ $0x03, AX
  18544 	JMP  gen_emit_copy_end
  18545 
  18546 repeat_two_standalone_emit_copy:
  18547 	SHLL $0x02, DX
  18548 	ORL  $0x01, DX
  18549 	MOVW DX, (AX)
  18550 	ADDQ $0x02, BX
  18551 	ADDQ $0x02, AX
  18552 	JMP  gen_emit_copy_end
  18553 
  18554 repeat_two_offset_standalone_emit_copy:
  18555 	XORQ SI, SI
  18556 	LEAL 1(SI)(DX*4), DX
  18557 	MOVB CL, 1(AX)
  18558 	SARL $0x08, CX
  18559 	SHLL $0x05, CX
  18560 	ORL  CX, DX
  18561 	MOVB DL, (AX)
  18562 	ADDQ $0x02, BX
  18563 	ADDQ $0x02, AX
  18564 	JMP  gen_emit_copy_end
  18565 
  18566 four_bytes_remain_standalone:
  18567 	TESTL DX, DX
  18568 	JZ    gen_emit_copy_end
  18569 	XORL  SI, SI
  18570 	LEAL  -1(SI)(DX*4), DX
  18571 	MOVB  DL, (AX)
  18572 	MOVL  CX, 1(AX)
  18573 	ADDQ  $0x05, BX
  18574 	ADDQ  $0x05, AX
  18575 	JMP   gen_emit_copy_end
  18576 
  18577 two_byte_offset_standalone:
  18578 	CMPL DX, $0x40
  18579 	JBE  two_byte_offset_short_standalone
  18580 	CMPL CX, $0x00000800
  18581 	JAE  long_offset_short_standalone
  18582 	MOVL $0x00000001, SI
  18583 	LEAL 16(SI), SI
  18584 	MOVB CL, 1(AX)
  18585 	MOVL CX, DI
  18586 	SHRL $0x08, DI
  18587 	SHLL $0x05, DI
  18588 	ORL  DI, SI
  18589 	MOVB SI, (AX)
  18590 	ADDQ $0x02, BX
  18591 	ADDQ $0x02, AX
  18592 	SUBL $0x08, DX
  18593 
  18594 	// emitRepeat
  18595 	LEAL -4(DX), DX
  18596 	JMP  cant_repeat_two_offset_standalone_emit_copy_short_2b
  18597 
  18598 emit_repeat_again_standalone_emit_copy_short_2b:
  18599 	MOVL DX, SI
  18600 	LEAL -4(DX), DX
  18601 	CMPL SI, $0x08
  18602 	JBE  repeat_two_standalone_emit_copy_short_2b
  18603 	CMPL SI, $0x0c
  18604 	JAE  cant_repeat_two_offset_standalone_emit_copy_short_2b
  18605 	CMPL CX, $0x00000800
  18606 	JB   repeat_two_offset_standalone_emit_copy_short_2b
  18607 
  18608 cant_repeat_two_offset_standalone_emit_copy_short_2b:
  18609 	CMPL DX, $0x00000104
  18610 	JB   repeat_three_standalone_emit_copy_short_2b
  18611 	CMPL DX, $0x00010100
  18612 	JB   repeat_four_standalone_emit_copy_short_2b
  18613 	CMPL DX, $0x0100ffff
  18614 	JB   repeat_five_standalone_emit_copy_short_2b
  18615 	LEAL -16842747(DX), DX
  18616 	MOVL $0xfffb001d, (AX)
  18617 	MOVB $0xff, 4(AX)
  18618 	ADDQ $0x05, AX
  18619 	ADDQ $0x05, BX
  18620 	JMP  emit_repeat_again_standalone_emit_copy_short_2b
  18621 
  18622 repeat_five_standalone_emit_copy_short_2b:
  18623 	LEAL -65536(DX), DX
  18624 	MOVL DX, CX
  18625 	MOVW $0x001d, (AX)
  18626 	MOVW DX, 2(AX)
  18627 	SARL $0x10, CX
  18628 	MOVB CL, 4(AX)
  18629 	ADDQ $0x05, BX
  18630 	ADDQ $0x05, AX
  18631 	JMP  gen_emit_copy_end
  18632 
  18633 repeat_four_standalone_emit_copy_short_2b:
  18634 	LEAL -256(DX), DX
  18635 	MOVW $0x0019, (AX)
  18636 	MOVW DX, 2(AX)
  18637 	ADDQ $0x04, BX
  18638 	ADDQ $0x04, AX
  18639 	JMP  gen_emit_copy_end
  18640 
  18641 repeat_three_standalone_emit_copy_short_2b:
  18642 	LEAL -4(DX), DX
  18643 	MOVW $0x0015, (AX)
  18644 	MOVB DL, 2(AX)
  18645 	ADDQ $0x03, BX
  18646 	ADDQ $0x03, AX
  18647 	JMP  gen_emit_copy_end
  18648 
  18649 repeat_two_standalone_emit_copy_short_2b:
  18650 	SHLL $0x02, DX
  18651 	ORL  $0x01, DX
  18652 	MOVW DX, (AX)
  18653 	ADDQ $0x02, BX
  18654 	ADDQ $0x02, AX
  18655 	JMP  gen_emit_copy_end
  18656 
  18657 repeat_two_offset_standalone_emit_copy_short_2b:
  18658 	XORQ SI, SI
  18659 	LEAL 1(SI)(DX*4), DX
  18660 	MOVB CL, 1(AX)
  18661 	SARL $0x08, CX
  18662 	SHLL $0x05, CX
  18663 	ORL  CX, DX
  18664 	MOVB DL, (AX)
  18665 	ADDQ $0x02, BX
  18666 	ADDQ $0x02, AX
  18667 	JMP  gen_emit_copy_end
  18668 
  18669 long_offset_short_standalone:
  18670 	MOVB $0xee, (AX)
  18671 	MOVW CX, 1(AX)
  18672 	LEAL -60(DX), DX
  18673 	ADDQ $0x03, AX
  18674 	ADDQ $0x03, BX
  18675 
  18676 	// emitRepeat
  18677 emit_repeat_again_standalone_emit_copy_short:
  18678 	MOVL DX, SI
  18679 	LEAL -4(DX), DX
  18680 	CMPL SI, $0x08
  18681 	JBE  repeat_two_standalone_emit_copy_short
  18682 	CMPL SI, $0x0c
  18683 	JAE  cant_repeat_two_offset_standalone_emit_copy_short
  18684 	CMPL CX, $0x00000800
  18685 	JB   repeat_two_offset_standalone_emit_copy_short
  18686 
  18687 cant_repeat_two_offset_standalone_emit_copy_short:
  18688 	CMPL DX, $0x00000104
  18689 	JB   repeat_three_standalone_emit_copy_short
  18690 	CMPL DX, $0x00010100
  18691 	JB   repeat_four_standalone_emit_copy_short
  18692 	CMPL DX, $0x0100ffff
  18693 	JB   repeat_five_standalone_emit_copy_short
  18694 	LEAL -16842747(DX), DX
  18695 	MOVL $0xfffb001d, (AX)
  18696 	MOVB $0xff, 4(AX)
  18697 	ADDQ $0x05, AX
  18698 	ADDQ $0x05, BX
  18699 	JMP  emit_repeat_again_standalone_emit_copy_short
  18700 
  18701 repeat_five_standalone_emit_copy_short:
  18702 	LEAL -65536(DX), DX
  18703 	MOVL DX, CX
  18704 	MOVW $0x001d, (AX)
  18705 	MOVW DX, 2(AX)
  18706 	SARL $0x10, CX
  18707 	MOVB CL, 4(AX)
  18708 	ADDQ $0x05, BX
  18709 	ADDQ $0x05, AX
  18710 	JMP  gen_emit_copy_end
  18711 
  18712 repeat_four_standalone_emit_copy_short:
  18713 	LEAL -256(DX), DX
  18714 	MOVW $0x0019, (AX)
  18715 	MOVW DX, 2(AX)
  18716 	ADDQ $0x04, BX
  18717 	ADDQ $0x04, AX
  18718 	JMP  gen_emit_copy_end
  18719 
  18720 repeat_three_standalone_emit_copy_short:
  18721 	LEAL -4(DX), DX
  18722 	MOVW $0x0015, (AX)
  18723 	MOVB DL, 2(AX)
  18724 	ADDQ $0x03, BX
  18725 	ADDQ $0x03, AX
  18726 	JMP  gen_emit_copy_end
  18727 
  18728 repeat_two_standalone_emit_copy_short:
  18729 	SHLL $0x02, DX
  18730 	ORL  $0x01, DX
  18731 	MOVW DX, (AX)
  18732 	ADDQ $0x02, BX
  18733 	ADDQ $0x02, AX
  18734 	JMP  gen_emit_copy_end
  18735 
  18736 repeat_two_offset_standalone_emit_copy_short:
  18737 	XORQ SI, SI
  18738 	LEAL 1(SI)(DX*4), DX
  18739 	MOVB CL, 1(AX)
  18740 	SARL $0x08, CX
  18741 	SHLL $0x05, CX
  18742 	ORL  CX, DX
  18743 	MOVB DL, (AX)
  18744 	ADDQ $0x02, BX
  18745 	ADDQ $0x02, AX
  18746 	JMP  gen_emit_copy_end
  18747 
  18748 two_byte_offset_short_standalone:
  18749 	MOVL DX, SI
  18750 	SHLL $0x02, SI
  18751 	CMPL DX, $0x0c
  18752 	JAE  emit_copy_three_standalone
  18753 	CMPL CX, $0x00000800
  18754 	JAE  emit_copy_three_standalone
  18755 	LEAL -15(SI), SI
  18756 	MOVB CL, 1(AX)
  18757 	SHRL $0x08, CX
  18758 	SHLL $0x05, CX
  18759 	ORL  CX, SI
  18760 	MOVB SI, (AX)
  18761 	ADDQ $0x02, BX
  18762 	ADDQ $0x02, AX
  18763 	JMP  gen_emit_copy_end
  18764 
  18765 emit_copy_three_standalone:
  18766 	LEAL -2(SI), SI
  18767 	MOVB SI, (AX)
  18768 	MOVW CX, 1(AX)
  18769 	ADDQ $0x03, BX
  18770 	ADDQ $0x03, AX
  18771 
  18772 gen_emit_copy_end:
  18773 	MOVQ BX, ret+40(FP)
  18774 	RET
  18775 
  18776 // func emitCopyNoRepeat(dst []byte, offset int, length int) int
  18777 TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
  18778 	XORQ BX, BX
  18779 	MOVQ dst_base+0(FP), AX
  18780 	MOVQ offset+24(FP), CX
  18781 	MOVQ length+32(FP), DX
  18782 
  18783 	// emitCopy
  18784 	CMPL CX, $0x00010000
  18785 	JB   two_byte_offset_standalone_snappy
  18786 
  18787 four_bytes_loop_back_standalone_snappy:
  18788 	CMPL DX, $0x40
  18789 	JBE  four_bytes_remain_standalone_snappy
  18790 	MOVB $0xff, (AX)
  18791 	MOVL CX, 1(AX)
  18792 	LEAL -64(DX), DX
  18793 	ADDQ $0x05, BX
  18794 	ADDQ $0x05, AX
  18795 	CMPL DX, $0x04
  18796 	JB   four_bytes_remain_standalone_snappy
  18797 	JMP  four_bytes_loop_back_standalone_snappy
  18798 
  18799 four_bytes_remain_standalone_snappy:
  18800 	TESTL DX, DX
  18801 	JZ    gen_emit_copy_end_snappy
  18802 	XORL  SI, SI
  18803 	LEAL  -1(SI)(DX*4), DX
  18804 	MOVB  DL, (AX)
  18805 	MOVL  CX, 1(AX)
  18806 	ADDQ  $0x05, BX
  18807 	ADDQ  $0x05, AX
  18808 	JMP   gen_emit_copy_end_snappy
  18809 
  18810 two_byte_offset_standalone_snappy:
  18811 	CMPL DX, $0x40
  18812 	JBE  two_byte_offset_short_standalone_snappy
  18813 	MOVB $0xee, (AX)
  18814 	MOVW CX, 1(AX)
  18815 	LEAL -60(DX), DX
  18816 	ADDQ $0x03, AX
  18817 	ADDQ $0x03, BX
  18818 	JMP  two_byte_offset_standalone_snappy
  18819 
  18820 two_byte_offset_short_standalone_snappy:
  18821 	MOVL DX, SI
  18822 	SHLL $0x02, SI
  18823 	CMPL DX, $0x0c
  18824 	JAE  emit_copy_three_standalone_snappy
  18825 	CMPL CX, $0x00000800
  18826 	JAE  emit_copy_three_standalone_snappy
  18827 	LEAL -15(SI), SI
  18828 	MOVB CL, 1(AX)
  18829 	SHRL $0x08, CX
  18830 	SHLL $0x05, CX
  18831 	ORL  CX, SI
  18832 	MOVB SI, (AX)
  18833 	ADDQ $0x02, BX
  18834 	ADDQ $0x02, AX
  18835 	JMP  gen_emit_copy_end_snappy
  18836 
  18837 emit_copy_three_standalone_snappy:
  18838 	LEAL -2(SI), SI
  18839 	MOVB SI, (AX)
  18840 	MOVW CX, 1(AX)
  18841 	ADDQ $0x03, BX
  18842 	ADDQ $0x03, AX
  18843 
  18844 gen_emit_copy_end_snappy:
  18845 	MOVQ BX, ret+40(FP)
  18846 	RET
  18847 
  18848 // func matchLen(a []byte, b []byte) int
  18849 // Requires: BMI
  18850 TEXT ·matchLen(SB), NOSPLIT, $0-56
  18851 	MOVQ a_base+0(FP), AX
  18852 	MOVQ b_base+24(FP), CX
  18853 	MOVQ a_len+8(FP), DX
  18854 
  18855 	// matchLen
  18856 	XORL SI, SI
  18857 	CMPL DX, $0x08
  18858 	JB   matchlen_match4_standalone
  18859 
  18860 matchlen_loopback_standalone:
  18861 	MOVQ  (AX)(SI*1), BX
  18862 	XORQ  (CX)(SI*1), BX
  18863 	TESTQ BX, BX
  18864 	JZ    matchlen_loop_standalone
  18865 
  18866 #ifdef GOAMD64_v3
  18867 	TZCNTQ BX, BX
  18868 
  18869 #else
  18870 	BSFQ BX, BX
  18871 
  18872 #endif
  18873 	SARQ $0x03, BX
  18874 	LEAL (SI)(BX*1), SI
  18875 	JMP  gen_match_len_end
  18876 
  18877 matchlen_loop_standalone:
  18878 	LEAL -8(DX), DX
  18879 	LEAL 8(SI), SI
  18880 	CMPL DX, $0x08
  18881 	JAE  matchlen_loopback_standalone
  18882 	JZ   gen_match_len_end
  18883 
  18884 matchlen_match4_standalone:
  18885 	CMPL DX, $0x04
  18886 	JB   matchlen_match2_standalone
  18887 	MOVL (AX)(SI*1), BX
  18888 	CMPL (CX)(SI*1), BX
  18889 	JNE  matchlen_match2_standalone
  18890 	SUBL $0x04, DX
  18891 	LEAL 4(SI), SI
  18892 
  18893 matchlen_match2_standalone:
  18894 	CMPL DX, $0x02
  18895 	JB   matchlen_match1_standalone
  18896 	MOVW (AX)(SI*1), BX
  18897 	CMPW (CX)(SI*1), BX
  18898 	JNE  matchlen_match1_standalone
  18899 	SUBL $0x02, DX
  18900 	LEAL 2(SI), SI
  18901 
  18902 matchlen_match1_standalone:
  18903 	CMPL DX, $0x01
  18904 	JB   gen_match_len_end
  18905 	MOVB (AX)(SI*1), BL
  18906 	CMPB (CX)(SI*1), BL
  18907 	JNE  gen_match_len_end
  18908 	LEAL 1(SI), SI
  18909 
  18910 gen_match_len_end:
  18911 	MOVQ SI, ret+48(FP)
  18912 	RET
  18913 
  18914 // func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  18915 // Requires: SSE2
  18916 TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
  18917 	XORQ SI, SI
  18918 	MOVQ dst_base+0(FP), AX
  18919 	MOVQ dst_len+8(FP), CX
  18920 	MOVQ src_base+24(FP), DX
  18921 	MOVQ src_len+32(FP), BX
  18922 	LEAQ (DX)(BX*1), BX
  18923 	LEAQ -10(AX)(CX*1), CX
  18924 	XORQ DI, DI
  18925 
  18926 lz4_s2_loop:
  18927 	CMPQ    DX, BX
  18928 	JAE     lz4_s2_corrupt
  18929 	CMPQ    AX, CX
  18930 	JAE     lz4_s2_dstfull
  18931 	MOVBQZX (DX), R8
  18932 	MOVQ    R8, R9
  18933 	MOVQ    R8, R10
  18934 	SHRQ    $0x04, R9
  18935 	ANDQ    $0x0f, R10
  18936 	CMPQ    R8, $0xf0
  18937 	JB      lz4_s2_ll_end
  18938 
  18939 lz4_s2_ll_loop:
  18940 	INCQ    DX
  18941 	CMPQ    DX, BX
  18942 	JAE     lz4_s2_corrupt
  18943 	MOVBQZX (DX), R8
  18944 	ADDQ    R8, R9
  18945 	CMPQ    R8, $0xff
  18946 	JEQ     lz4_s2_ll_loop
  18947 
  18948 lz4_s2_ll_end:
  18949 	LEAQ  (DX)(R9*1), R8
  18950 	ADDQ  $0x04, R10
  18951 	CMPQ  R8, BX
  18952 	JAE   lz4_s2_corrupt
  18953 	INCQ  DX
  18954 	INCQ  R8
  18955 	TESTQ R9, R9
  18956 	JZ    lz4_s2_lits_done
  18957 	LEAQ  (AX)(R9*1), R11
  18958 	CMPQ  R11, CX
  18959 	JAE   lz4_s2_dstfull
  18960 	ADDQ  R9, SI
  18961 	LEAL  -1(R9), R11
  18962 	CMPL  R11, $0x3c
  18963 	JB    one_byte_lz4_s2
  18964 	CMPL  R11, $0x00000100
  18965 	JB    two_bytes_lz4_s2
  18966 	CMPL  R11, $0x00010000
  18967 	JB    three_bytes_lz4_s2
  18968 	CMPL  R11, $0x01000000
  18969 	JB    four_bytes_lz4_s2
  18970 	MOVB  $0xfc, (AX)
  18971 	MOVL  R11, 1(AX)
  18972 	ADDQ  $0x05, AX
  18973 	JMP   memmove_long_lz4_s2
  18974 
  18975 four_bytes_lz4_s2:
  18976 	MOVL R11, R12
  18977 	SHRL $0x10, R12
  18978 	MOVB $0xf8, (AX)
  18979 	MOVW R11, 1(AX)
  18980 	MOVB R12, 3(AX)
  18981 	ADDQ $0x04, AX
  18982 	JMP  memmove_long_lz4_s2
  18983 
  18984 three_bytes_lz4_s2:
  18985 	MOVB $0xf4, (AX)
  18986 	MOVW R11, 1(AX)
  18987 	ADDQ $0x03, AX
  18988 	JMP  memmove_long_lz4_s2
  18989 
  18990 two_bytes_lz4_s2:
  18991 	MOVB $0xf0, (AX)
  18992 	MOVB R11, 1(AX)
  18993 	ADDQ $0x02, AX
  18994 	CMPL R11, $0x40
  18995 	JB   memmove_lz4_s2
  18996 	JMP  memmove_long_lz4_s2
  18997 
  18998 one_byte_lz4_s2:
  18999 	SHLB $0x02, R11
  19000 	MOVB R11, (AX)
  19001 	ADDQ $0x01, AX
  19002 
  19003 memmove_lz4_s2:
  19004 	LEAQ (AX)(R9*1), R11
  19005 
  19006 	// genMemMoveShort
  19007 	CMPQ R9, $0x08
  19008 	JBE  emit_lit_memmove_lz4_s2_memmove_move_8
  19009 	CMPQ R9, $0x10
  19010 	JBE  emit_lit_memmove_lz4_s2_memmove_move_8through16
  19011 	CMPQ R9, $0x20
  19012 	JBE  emit_lit_memmove_lz4_s2_memmove_move_17through32
  19013 	JMP  emit_lit_memmove_lz4_s2_memmove_move_33through64
  19014 
  19015 emit_lit_memmove_lz4_s2_memmove_move_8:
  19016 	MOVQ (DX), R12
  19017 	MOVQ R12, (AX)
  19018 	JMP  memmove_end_copy_lz4_s2
  19019 
  19020 emit_lit_memmove_lz4_s2_memmove_move_8through16:
  19021 	MOVQ (DX), R12
  19022 	MOVQ -8(DX)(R9*1), DX
  19023 	MOVQ R12, (AX)
  19024 	MOVQ DX, -8(AX)(R9*1)
  19025 	JMP  memmove_end_copy_lz4_s2
  19026 
  19027 emit_lit_memmove_lz4_s2_memmove_move_17through32:
  19028 	MOVOU (DX), X0
  19029 	MOVOU -16(DX)(R9*1), X1
  19030 	MOVOU X0, (AX)
  19031 	MOVOU X1, -16(AX)(R9*1)
  19032 	JMP   memmove_end_copy_lz4_s2
  19033 
  19034 emit_lit_memmove_lz4_s2_memmove_move_33through64:
  19035 	MOVOU (DX), X0
  19036 	MOVOU 16(DX), X1
  19037 	MOVOU -32(DX)(R9*1), X2
  19038 	MOVOU -16(DX)(R9*1), X3
  19039 	MOVOU X0, (AX)
  19040 	MOVOU X1, 16(AX)
  19041 	MOVOU X2, -32(AX)(R9*1)
  19042 	MOVOU X3, -16(AX)(R9*1)
  19043 
  19044 memmove_end_copy_lz4_s2:
  19045 	MOVQ R11, AX
  19046 	JMP  lz4_s2_lits_emit_done
  19047 
  19048 memmove_long_lz4_s2:
  19049 	LEAQ (AX)(R9*1), R11
  19050 
  19051 	// genMemMoveLong
  19052 	MOVOU (DX), X0
  19053 	MOVOU 16(DX), X1
  19054 	MOVOU -32(DX)(R9*1), X2
  19055 	MOVOU -16(DX)(R9*1), X3
  19056 	MOVQ  R9, R13
  19057 	SHRQ  $0x05, R13
  19058 	MOVQ  AX, R12
  19059 	ANDL  $0x0000001f, R12
  19060 	MOVQ  $0x00000040, R14
  19061 	SUBQ  R12, R14
  19062 	DECQ  R13
  19063 	JA    emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
  19064 	LEAQ  -32(DX)(R14*1), R12
  19065 	LEAQ  -32(AX)(R14*1), R15
  19066 
  19067 emit_lit_memmove_long_lz4_s2large_big_loop_back:
  19068 	MOVOU (R12), X4
  19069 	MOVOU 16(R12), X5
  19070 	MOVOA X4, (R15)
  19071 	MOVOA X5, 16(R15)
  19072 	ADDQ  $0x20, R15
  19073 	ADDQ  $0x20, R12
  19074 	ADDQ  $0x20, R14
  19075 	DECQ  R13
  19076 	JNA   emit_lit_memmove_long_lz4_s2large_big_loop_back
  19077 
  19078 emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32:
  19079 	MOVOU -32(DX)(R14*1), X4
  19080 	MOVOU -16(DX)(R14*1), X5
  19081 	MOVOA X4, -32(AX)(R14*1)
  19082 	MOVOA X5, -16(AX)(R14*1)
  19083 	ADDQ  $0x20, R14
  19084 	CMPQ  R9, R14
  19085 	JAE   emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
  19086 	MOVOU X0, (AX)
  19087 	MOVOU X1, 16(AX)
  19088 	MOVOU X2, -32(AX)(R9*1)
  19089 	MOVOU X3, -16(AX)(R9*1)
  19090 	MOVQ  R11, AX
  19091 
  19092 lz4_s2_lits_emit_done:
  19093 	MOVQ R8, DX
  19094 
  19095 lz4_s2_lits_done:
  19096 	CMPQ DX, BX
  19097 	JNE  lz4_s2_match
  19098 	CMPQ R10, $0x04
  19099 	JEQ  lz4_s2_done
  19100 	JMP  lz4_s2_corrupt
  19101 
  19102 lz4_s2_match:
  19103 	LEAQ    2(DX), R8
  19104 	CMPQ    R8, BX
  19105 	JAE     lz4_s2_corrupt
  19106 	MOVWQZX (DX), R9
  19107 	MOVQ    R8, DX
  19108 	TESTQ   R9, R9
  19109 	JZ      lz4_s2_corrupt
  19110 	CMPQ    R9, SI
  19111 	JA      lz4_s2_corrupt
  19112 	CMPQ    R10, $0x13
  19113 	JNE     lz4_s2_ml_done
  19114 
  19115 lz4_s2_ml_loop:
  19116 	MOVBQZX (DX), R8
  19117 	INCQ    DX
  19118 	ADDQ    R8, R10
  19119 	CMPQ    DX, BX
  19120 	JAE     lz4_s2_corrupt
  19121 	CMPQ    R8, $0xff
  19122 	JEQ     lz4_s2_ml_loop
  19123 
  19124 lz4_s2_ml_done:
  19125 	ADDQ R10, SI
  19126 	CMPQ R9, DI
  19127 	JNE  lz4_s2_docopy
  19128 
  19129 	// emitRepeat
  19130 emit_repeat_again_lz4_s2:
  19131 	MOVL R10, R8
  19132 	LEAL -4(R10), R10
  19133 	CMPL R8, $0x08
  19134 	JBE  repeat_two_lz4_s2
  19135 	CMPL R8, $0x0c
  19136 	JAE  cant_repeat_two_offset_lz4_s2
  19137 	CMPL R9, $0x00000800
  19138 	JB   repeat_two_offset_lz4_s2
  19139 
  19140 cant_repeat_two_offset_lz4_s2:
  19141 	CMPL R10, $0x00000104
  19142 	JB   repeat_three_lz4_s2
  19143 	CMPL R10, $0x00010100
  19144 	JB   repeat_four_lz4_s2
  19145 	CMPL R10, $0x0100ffff
  19146 	JB   repeat_five_lz4_s2
  19147 	LEAL -16842747(R10), R10
  19148 	MOVL $0xfffb001d, (AX)
  19149 	MOVB $0xff, 4(AX)
  19150 	ADDQ $0x05, AX
  19151 	JMP  emit_repeat_again_lz4_s2
  19152 
  19153 repeat_five_lz4_s2:
  19154 	LEAL -65536(R10), R10
  19155 	MOVL R10, R9
  19156 	MOVW $0x001d, (AX)
  19157 	MOVW R10, 2(AX)
  19158 	SARL $0x10, R9
  19159 	MOVB R9, 4(AX)
  19160 	ADDQ $0x05, AX
  19161 	JMP  lz4_s2_loop
  19162 
  19163 repeat_four_lz4_s2:
  19164 	LEAL -256(R10), R10
  19165 	MOVW $0x0019, (AX)
  19166 	MOVW R10, 2(AX)
  19167 	ADDQ $0x04, AX
  19168 	JMP  lz4_s2_loop
  19169 
  19170 repeat_three_lz4_s2:
  19171 	LEAL -4(R10), R10
  19172 	MOVW $0x0015, (AX)
  19173 	MOVB R10, 2(AX)
  19174 	ADDQ $0x03, AX
  19175 	JMP  lz4_s2_loop
  19176 
  19177 repeat_two_lz4_s2:
  19178 	SHLL $0x02, R10
  19179 	ORL  $0x01, R10
  19180 	MOVW R10, (AX)
  19181 	ADDQ $0x02, AX
  19182 	JMP  lz4_s2_loop
  19183 
  19184 repeat_two_offset_lz4_s2:
  19185 	XORQ R8, R8
  19186 	LEAL 1(R8)(R10*4), R10
  19187 	MOVB R9, 1(AX)
  19188 	SARL $0x08, R9
  19189 	SHLL $0x05, R9
  19190 	ORL  R9, R10
  19191 	MOVB R10, (AX)
  19192 	ADDQ $0x02, AX
  19193 	JMP  lz4_s2_loop
  19194 
  19195 lz4_s2_docopy:
  19196 	MOVQ R9, DI
  19197 
  19198 	// emitCopy
  19199 	CMPL R10, $0x40
  19200 	JBE  two_byte_offset_short_lz4_s2
  19201 	CMPL R9, $0x00000800
  19202 	JAE  long_offset_short_lz4_s2
  19203 	MOVL $0x00000001, R8
  19204 	LEAL 16(R8), R8
  19205 	MOVB R9, 1(AX)
  19206 	MOVL R9, R11
  19207 	SHRL $0x08, R11
  19208 	SHLL $0x05, R11
  19209 	ORL  R11, R8
  19210 	MOVB R8, (AX)
  19211 	ADDQ $0x02, AX
  19212 	SUBL $0x08, R10
  19213 
  19214 	// emitRepeat
  19215 	LEAL -4(R10), R10
  19216 	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  19217 
  19218 emit_repeat_again_lz4_s2_emit_copy_short_2b:
  19219 	MOVL R10, R8
  19220 	LEAL -4(R10), R10
  19221 	CMPL R8, $0x08
  19222 	JBE  repeat_two_lz4_s2_emit_copy_short_2b
  19223 	CMPL R8, $0x0c
  19224 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  19225 	CMPL R9, $0x00000800
  19226 	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
  19227 
  19228 cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
  19229 	CMPL R10, $0x00000104
  19230 	JB   repeat_three_lz4_s2_emit_copy_short_2b
  19231 	CMPL R10, $0x00010100
  19232 	JB   repeat_four_lz4_s2_emit_copy_short_2b
  19233 	CMPL R10, $0x0100ffff
  19234 	JB   repeat_five_lz4_s2_emit_copy_short_2b
  19235 	LEAL -16842747(R10), R10
  19236 	MOVL $0xfffb001d, (AX)
  19237 	MOVB $0xff, 4(AX)
  19238 	ADDQ $0x05, AX
  19239 	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
  19240 
  19241 repeat_five_lz4_s2_emit_copy_short_2b:
  19242 	LEAL -65536(R10), R10
  19243 	MOVL R10, R9
  19244 	MOVW $0x001d, (AX)
  19245 	MOVW R10, 2(AX)
  19246 	SARL $0x10, R9
  19247 	MOVB R9, 4(AX)
  19248 	ADDQ $0x05, AX
  19249 	JMP  lz4_s2_loop
  19250 
  19251 repeat_four_lz4_s2_emit_copy_short_2b:
  19252 	LEAL -256(R10), R10
  19253 	MOVW $0x0019, (AX)
  19254 	MOVW R10, 2(AX)
  19255 	ADDQ $0x04, AX
  19256 	JMP  lz4_s2_loop
  19257 
  19258 repeat_three_lz4_s2_emit_copy_short_2b:
  19259 	LEAL -4(R10), R10
  19260 	MOVW $0x0015, (AX)
  19261 	MOVB R10, 2(AX)
  19262 	ADDQ $0x03, AX
  19263 	JMP  lz4_s2_loop
  19264 
  19265 repeat_two_lz4_s2_emit_copy_short_2b:
  19266 	SHLL $0x02, R10
  19267 	ORL  $0x01, R10
  19268 	MOVW R10, (AX)
  19269 	ADDQ $0x02, AX
  19270 	JMP  lz4_s2_loop
  19271 
  19272 repeat_two_offset_lz4_s2_emit_copy_short_2b:
  19273 	XORQ R8, R8
  19274 	LEAL 1(R8)(R10*4), R10
  19275 	MOVB R9, 1(AX)
  19276 	SARL $0x08, R9
  19277 	SHLL $0x05, R9
  19278 	ORL  R9, R10
  19279 	MOVB R10, (AX)
  19280 	ADDQ $0x02, AX
  19281 	JMP  lz4_s2_loop
  19282 
  19283 long_offset_short_lz4_s2:
  19284 	MOVB $0xee, (AX)
  19285 	MOVW R9, 1(AX)
  19286 	LEAL -60(R10), R10
  19287 	ADDQ $0x03, AX
  19288 
  19289 	// emitRepeat
  19290 emit_repeat_again_lz4_s2_emit_copy_short:
  19291 	MOVL R10, R8
  19292 	LEAL -4(R10), R10
  19293 	CMPL R8, $0x08
  19294 	JBE  repeat_two_lz4_s2_emit_copy_short
  19295 	CMPL R8, $0x0c
  19296 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
  19297 	CMPL R9, $0x00000800
  19298 	JB   repeat_two_offset_lz4_s2_emit_copy_short
  19299 
  19300 cant_repeat_two_offset_lz4_s2_emit_copy_short:
  19301 	CMPL R10, $0x00000104
  19302 	JB   repeat_three_lz4_s2_emit_copy_short
  19303 	CMPL R10, $0x00010100
  19304 	JB   repeat_four_lz4_s2_emit_copy_short
  19305 	CMPL R10, $0x0100ffff
  19306 	JB   repeat_five_lz4_s2_emit_copy_short
  19307 	LEAL -16842747(R10), R10
  19308 	MOVL $0xfffb001d, (AX)
  19309 	MOVB $0xff, 4(AX)
  19310 	ADDQ $0x05, AX
  19311 	JMP  emit_repeat_again_lz4_s2_emit_copy_short
  19312 
  19313 repeat_five_lz4_s2_emit_copy_short:
  19314 	LEAL -65536(R10), R10
  19315 	MOVL R10, R9
  19316 	MOVW $0x001d, (AX)
  19317 	MOVW R10, 2(AX)
  19318 	SARL $0x10, R9
  19319 	MOVB R9, 4(AX)
  19320 	ADDQ $0x05, AX
  19321 	JMP  lz4_s2_loop
  19322 
  19323 repeat_four_lz4_s2_emit_copy_short:
  19324 	LEAL -256(R10), R10
  19325 	MOVW $0x0019, (AX)
  19326 	MOVW R10, 2(AX)
  19327 	ADDQ $0x04, AX
  19328 	JMP  lz4_s2_loop
  19329 
  19330 repeat_three_lz4_s2_emit_copy_short:
  19331 	LEAL -4(R10), R10
  19332 	MOVW $0x0015, (AX)
  19333 	MOVB R10, 2(AX)
  19334 	ADDQ $0x03, AX
  19335 	JMP  lz4_s2_loop
  19336 
  19337 repeat_two_lz4_s2_emit_copy_short:
  19338 	SHLL $0x02, R10
  19339 	ORL  $0x01, R10
  19340 	MOVW R10, (AX)
  19341 	ADDQ $0x02, AX
  19342 	JMP  lz4_s2_loop
  19343 
  19344 repeat_two_offset_lz4_s2_emit_copy_short:
  19345 	XORQ R8, R8
  19346 	LEAL 1(R8)(R10*4), R10
  19347 	MOVB R9, 1(AX)
  19348 	SARL $0x08, R9
  19349 	SHLL $0x05, R9
  19350 	ORL  R9, R10
  19351 	MOVB R10, (AX)
  19352 	ADDQ $0x02, AX
  19353 	JMP  lz4_s2_loop
  19354 
  19355 two_byte_offset_short_lz4_s2:
  19356 	MOVL R10, R8
  19357 	SHLL $0x02, R8
  19358 	CMPL R10, $0x0c
  19359 	JAE  emit_copy_three_lz4_s2
  19360 	CMPL R9, $0x00000800
  19361 	JAE  emit_copy_three_lz4_s2
  19362 	LEAL -15(R8), R8
  19363 	MOVB R9, 1(AX)
  19364 	SHRL $0x08, R9
  19365 	SHLL $0x05, R9
  19366 	ORL  R9, R8
  19367 	MOVB R8, (AX)
  19368 	ADDQ $0x02, AX
  19369 	JMP  lz4_s2_loop
  19370 
  19371 emit_copy_three_lz4_s2:
  19372 	LEAL -2(R8), R8
  19373 	MOVB R8, (AX)
  19374 	MOVW R9, 1(AX)
  19375 	ADDQ $0x03, AX
  19376 	JMP  lz4_s2_loop
  19377 
  19378 lz4_s2_done:
  19379 	MOVQ dst_base+0(FP), CX
  19380 	SUBQ CX, AX
  19381 	MOVQ SI, uncompressed+48(FP)
  19382 	MOVQ AX, dstUsed+56(FP)
  19383 	RET
  19384 
  19385 lz4_s2_corrupt:
  19386 	XORQ AX, AX
  19387 	LEAQ -1(AX), SI
  19388 	MOVQ SI, uncompressed+48(FP)
  19389 	RET
  19390 
  19391 lz4_s2_dstfull:
  19392 	XORQ AX, AX
  19393 	LEAQ -2(AX), SI
  19394 	MOVQ SI, uncompressed+48(FP)
  19395 	RET
  19396 
  19397 // func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  19398 // Requires: SSE2
  19399 TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
  19400 	XORQ SI, SI
  19401 	MOVQ dst_base+0(FP), AX
  19402 	MOVQ dst_len+8(FP), CX
  19403 	MOVQ src_base+24(FP), DX
  19404 	MOVQ src_len+32(FP), BX
  19405 	LEAQ (DX)(BX*1), BX
  19406 	LEAQ -10(AX)(CX*1), CX
  19407 	XORQ DI, DI
  19408 
  19409 lz4s_s2_loop:
  19410 	CMPQ    DX, BX
  19411 	JAE     lz4s_s2_corrupt
  19412 	CMPQ    AX, CX
  19413 	JAE     lz4s_s2_dstfull
  19414 	MOVBQZX (DX), R8
  19415 	MOVQ    R8, R9
  19416 	MOVQ    R8, R10
  19417 	SHRQ    $0x04, R9
  19418 	ANDQ    $0x0f, R10
  19419 	CMPQ    R8, $0xf0
  19420 	JB      lz4s_s2_ll_end
  19421 
  19422 lz4s_s2_ll_loop:
  19423 	INCQ    DX
  19424 	CMPQ    DX, BX
  19425 	JAE     lz4s_s2_corrupt
  19426 	MOVBQZX (DX), R8
  19427 	ADDQ    R8, R9
  19428 	CMPQ    R8, $0xff
  19429 	JEQ     lz4s_s2_ll_loop
  19430 
  19431 lz4s_s2_ll_end:
  19432 	LEAQ  (DX)(R9*1), R8
  19433 	ADDQ  $0x03, R10
  19434 	CMPQ  R8, BX
  19435 	JAE   lz4s_s2_corrupt
  19436 	INCQ  DX
  19437 	INCQ  R8
  19438 	TESTQ R9, R9
  19439 	JZ    lz4s_s2_lits_done
  19440 	LEAQ  (AX)(R9*1), R11
  19441 	CMPQ  R11, CX
  19442 	JAE   lz4s_s2_dstfull
  19443 	ADDQ  R9, SI
  19444 	LEAL  -1(R9), R11
  19445 	CMPL  R11, $0x3c
  19446 	JB    one_byte_lz4s_s2
  19447 	CMPL  R11, $0x00000100
  19448 	JB    two_bytes_lz4s_s2
  19449 	CMPL  R11, $0x00010000
  19450 	JB    three_bytes_lz4s_s2
  19451 	CMPL  R11, $0x01000000
  19452 	JB    four_bytes_lz4s_s2
  19453 	MOVB  $0xfc, (AX)
  19454 	MOVL  R11, 1(AX)
  19455 	ADDQ  $0x05, AX
  19456 	JMP   memmove_long_lz4s_s2
  19457 
  19458 four_bytes_lz4s_s2:
  19459 	MOVL R11, R12
  19460 	SHRL $0x10, R12
  19461 	MOVB $0xf8, (AX)
  19462 	MOVW R11, 1(AX)
  19463 	MOVB R12, 3(AX)
  19464 	ADDQ $0x04, AX
  19465 	JMP  memmove_long_lz4s_s2
  19466 
  19467 three_bytes_lz4s_s2:
  19468 	MOVB $0xf4, (AX)
  19469 	MOVW R11, 1(AX)
  19470 	ADDQ $0x03, AX
  19471 	JMP  memmove_long_lz4s_s2
  19472 
  19473 two_bytes_lz4s_s2:
  19474 	MOVB $0xf0, (AX)
  19475 	MOVB R11, 1(AX)
  19476 	ADDQ $0x02, AX
  19477 	CMPL R11, $0x40
  19478 	JB   memmove_lz4s_s2
  19479 	JMP  memmove_long_lz4s_s2
  19480 
  19481 one_byte_lz4s_s2:
  19482 	SHLB $0x02, R11
  19483 	MOVB R11, (AX)
  19484 	ADDQ $0x01, AX
  19485 
  19486 memmove_lz4s_s2:
  19487 	LEAQ (AX)(R9*1), R11
  19488 
  19489 	// genMemMoveShort
  19490 	CMPQ R9, $0x08
  19491 	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8
  19492 	CMPQ R9, $0x10
  19493 	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8through16
  19494 	CMPQ R9, $0x20
  19495 	JBE  emit_lit_memmove_lz4s_s2_memmove_move_17through32
  19496 	JMP  emit_lit_memmove_lz4s_s2_memmove_move_33through64
  19497 
  19498 emit_lit_memmove_lz4s_s2_memmove_move_8:
  19499 	MOVQ (DX), R12
  19500 	MOVQ R12, (AX)
  19501 	JMP  memmove_end_copy_lz4s_s2
  19502 
  19503 emit_lit_memmove_lz4s_s2_memmove_move_8through16:
  19504 	MOVQ (DX), R12
  19505 	MOVQ -8(DX)(R9*1), DX
  19506 	MOVQ R12, (AX)
  19507 	MOVQ DX, -8(AX)(R9*1)
  19508 	JMP  memmove_end_copy_lz4s_s2
  19509 
  19510 emit_lit_memmove_lz4s_s2_memmove_move_17through32:
  19511 	MOVOU (DX), X0
  19512 	MOVOU -16(DX)(R9*1), X1
  19513 	MOVOU X0, (AX)
  19514 	MOVOU X1, -16(AX)(R9*1)
  19515 	JMP   memmove_end_copy_lz4s_s2
  19516 
  19517 emit_lit_memmove_lz4s_s2_memmove_move_33through64:
  19518 	MOVOU (DX), X0
  19519 	MOVOU 16(DX), X1
  19520 	MOVOU -32(DX)(R9*1), X2
  19521 	MOVOU -16(DX)(R9*1), X3
  19522 	MOVOU X0, (AX)
  19523 	MOVOU X1, 16(AX)
  19524 	MOVOU X2, -32(AX)(R9*1)
  19525 	MOVOU X3, -16(AX)(R9*1)
  19526 
  19527 memmove_end_copy_lz4s_s2:
  19528 	MOVQ R11, AX
  19529 	JMP  lz4s_s2_lits_emit_done
  19530 
  19531 memmove_long_lz4s_s2:
  19532 	LEAQ (AX)(R9*1), R11
  19533 
  19534 	// genMemMoveLong
  19535 	MOVOU (DX), X0
  19536 	MOVOU 16(DX), X1
  19537 	MOVOU -32(DX)(R9*1), X2
  19538 	MOVOU -16(DX)(R9*1), X3
  19539 	MOVQ  R9, R13
  19540 	SHRQ  $0x05, R13
  19541 	MOVQ  AX, R12
  19542 	ANDL  $0x0000001f, R12
  19543 	MOVQ  $0x00000040, R14
  19544 	SUBQ  R12, R14
  19545 	DECQ  R13
  19546 	JA    emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
  19547 	LEAQ  -32(DX)(R14*1), R12
  19548 	LEAQ  -32(AX)(R14*1), R15
  19549 
  19550 emit_lit_memmove_long_lz4s_s2large_big_loop_back:
  19551 	MOVOU (R12), X4
  19552 	MOVOU 16(R12), X5
  19553 	MOVOA X4, (R15)
  19554 	MOVOA X5, 16(R15)
  19555 	ADDQ  $0x20, R15
  19556 	ADDQ  $0x20, R12
  19557 	ADDQ  $0x20, R14
  19558 	DECQ  R13
  19559 	JNA   emit_lit_memmove_long_lz4s_s2large_big_loop_back
  19560 
  19561 emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32:
  19562 	MOVOU -32(DX)(R14*1), X4
  19563 	MOVOU -16(DX)(R14*1), X5
  19564 	MOVOA X4, -32(AX)(R14*1)
  19565 	MOVOA X5, -16(AX)(R14*1)
  19566 	ADDQ  $0x20, R14
  19567 	CMPQ  R9, R14
  19568 	JAE   emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
  19569 	MOVOU X0, (AX)
  19570 	MOVOU X1, 16(AX)
  19571 	MOVOU X2, -32(AX)(R9*1)
  19572 	MOVOU X3, -16(AX)(R9*1)
  19573 	MOVQ  R11, AX
  19574 
  19575 lz4s_s2_lits_emit_done:
  19576 	MOVQ R8, DX
  19577 
  19578 lz4s_s2_lits_done:
  19579 	CMPQ DX, BX
  19580 	JNE  lz4s_s2_match
  19581 	CMPQ R10, $0x03
  19582 	JEQ  lz4s_s2_done
  19583 	JMP  lz4s_s2_corrupt
  19584 
  19585 lz4s_s2_match:
  19586 	CMPQ    R10, $0x03
  19587 	JEQ     lz4s_s2_loop
  19588 	LEAQ    2(DX), R8
  19589 	CMPQ    R8, BX
  19590 	JAE     lz4s_s2_corrupt
  19591 	MOVWQZX (DX), R9
  19592 	MOVQ    R8, DX
  19593 	TESTQ   R9, R9
  19594 	JZ      lz4s_s2_corrupt
  19595 	CMPQ    R9, SI
  19596 	JA      lz4s_s2_corrupt
  19597 	CMPQ    R10, $0x12
  19598 	JNE     lz4s_s2_ml_done
  19599 
  19600 lz4s_s2_ml_loop:
  19601 	MOVBQZX (DX), R8
  19602 	INCQ    DX
  19603 	ADDQ    R8, R10
  19604 	CMPQ    DX, BX
  19605 	JAE     lz4s_s2_corrupt
  19606 	CMPQ    R8, $0xff
  19607 	JEQ     lz4s_s2_ml_loop
  19608 
  19609 lz4s_s2_ml_done:
  19610 	ADDQ R10, SI
  19611 	CMPQ R9, DI
  19612 	JNE  lz4s_s2_docopy
  19613 
  19614 	// emitRepeat
  19615 emit_repeat_again_lz4_s2:
  19616 	MOVL R10, R8
  19617 	LEAL -4(R10), R10
  19618 	CMPL R8, $0x08
  19619 	JBE  repeat_two_lz4_s2
  19620 	CMPL R8, $0x0c
  19621 	JAE  cant_repeat_two_offset_lz4_s2
  19622 	CMPL R9, $0x00000800
  19623 	JB   repeat_two_offset_lz4_s2
  19624 
  19625 cant_repeat_two_offset_lz4_s2:
  19626 	CMPL R10, $0x00000104
  19627 	JB   repeat_three_lz4_s2
  19628 	CMPL R10, $0x00010100
  19629 	JB   repeat_four_lz4_s2
  19630 	CMPL R10, $0x0100ffff
  19631 	JB   repeat_five_lz4_s2
  19632 	LEAL -16842747(R10), R10
  19633 	MOVL $0xfffb001d, (AX)
  19634 	MOVB $0xff, 4(AX)
  19635 	ADDQ $0x05, AX
  19636 	JMP  emit_repeat_again_lz4_s2
  19637 
  19638 repeat_five_lz4_s2:
  19639 	LEAL -65536(R10), R10
  19640 	MOVL R10, R9
  19641 	MOVW $0x001d, (AX)
  19642 	MOVW R10, 2(AX)
  19643 	SARL $0x10, R9
  19644 	MOVB R9, 4(AX)
  19645 	ADDQ $0x05, AX
  19646 	JMP  lz4s_s2_loop
  19647 
  19648 repeat_four_lz4_s2:
  19649 	LEAL -256(R10), R10
  19650 	MOVW $0x0019, (AX)
  19651 	MOVW R10, 2(AX)
  19652 	ADDQ $0x04, AX
  19653 	JMP  lz4s_s2_loop
  19654 
  19655 repeat_three_lz4_s2:
  19656 	LEAL -4(R10), R10
  19657 	MOVW $0x0015, (AX)
  19658 	MOVB R10, 2(AX)
  19659 	ADDQ $0x03, AX
  19660 	JMP  lz4s_s2_loop
  19661 
  19662 repeat_two_lz4_s2:
  19663 	SHLL $0x02, R10
  19664 	ORL  $0x01, R10
  19665 	MOVW R10, (AX)
  19666 	ADDQ $0x02, AX
  19667 	JMP  lz4s_s2_loop
  19668 
  19669 repeat_two_offset_lz4_s2:
  19670 	XORQ R8, R8
  19671 	LEAL 1(R8)(R10*4), R10
  19672 	MOVB R9, 1(AX)
  19673 	SARL $0x08, R9
  19674 	SHLL $0x05, R9
  19675 	ORL  R9, R10
  19676 	MOVB R10, (AX)
  19677 	ADDQ $0x02, AX
  19678 	JMP  lz4s_s2_loop
  19679 
  19680 lz4s_s2_docopy:
  19681 	MOVQ R9, DI
  19682 
  19683 	// emitCopy
  19684 	CMPL R10, $0x40
  19685 	JBE  two_byte_offset_short_lz4_s2
  19686 	CMPL R9, $0x00000800
  19687 	JAE  long_offset_short_lz4_s2
  19688 	MOVL $0x00000001, R8
  19689 	LEAL 16(R8), R8
  19690 	MOVB R9, 1(AX)
  19691 	MOVL R9, R11
  19692 	SHRL $0x08, R11
  19693 	SHLL $0x05, R11
  19694 	ORL  R11, R8
  19695 	MOVB R8, (AX)
  19696 	ADDQ $0x02, AX
  19697 	SUBL $0x08, R10
  19698 
  19699 	// emitRepeat
  19700 	LEAL -4(R10), R10
  19701 	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  19702 
  19703 emit_repeat_again_lz4_s2_emit_copy_short_2b:
  19704 	MOVL R10, R8
  19705 	LEAL -4(R10), R10
  19706 	CMPL R8, $0x08
  19707 	JBE  repeat_two_lz4_s2_emit_copy_short_2b
  19708 	CMPL R8, $0x0c
  19709 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  19710 	CMPL R9, $0x00000800
  19711 	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
  19712 
  19713 cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
  19714 	CMPL R10, $0x00000104
  19715 	JB   repeat_three_lz4_s2_emit_copy_short_2b
  19716 	CMPL R10, $0x00010100
  19717 	JB   repeat_four_lz4_s2_emit_copy_short_2b
  19718 	CMPL R10, $0x0100ffff
  19719 	JB   repeat_five_lz4_s2_emit_copy_short_2b
  19720 	LEAL -16842747(R10), R10
  19721 	MOVL $0xfffb001d, (AX)
  19722 	MOVB $0xff, 4(AX)
  19723 	ADDQ $0x05, AX
  19724 	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
  19725 
  19726 repeat_five_lz4_s2_emit_copy_short_2b:
  19727 	LEAL -65536(R10), R10
  19728 	MOVL R10, R9
  19729 	MOVW $0x001d, (AX)
  19730 	MOVW R10, 2(AX)
  19731 	SARL $0x10, R9
  19732 	MOVB R9, 4(AX)
  19733 	ADDQ $0x05, AX
  19734 	JMP  lz4s_s2_loop
  19735 
  19736 repeat_four_lz4_s2_emit_copy_short_2b:
  19737 	LEAL -256(R10), R10
  19738 	MOVW $0x0019, (AX)
  19739 	MOVW R10, 2(AX)
  19740 	ADDQ $0x04, AX
  19741 	JMP  lz4s_s2_loop
  19742 
  19743 repeat_three_lz4_s2_emit_copy_short_2b:
  19744 	LEAL -4(R10), R10
  19745 	MOVW $0x0015, (AX)
  19746 	MOVB R10, 2(AX)
  19747 	ADDQ $0x03, AX
  19748 	JMP  lz4s_s2_loop
  19749 
  19750 repeat_two_lz4_s2_emit_copy_short_2b:
  19751 	SHLL $0x02, R10
  19752 	ORL  $0x01, R10
  19753 	MOVW R10, (AX)
  19754 	ADDQ $0x02, AX
  19755 	JMP  lz4s_s2_loop
  19756 
  19757 repeat_two_offset_lz4_s2_emit_copy_short_2b:
  19758 	XORQ R8, R8
  19759 	LEAL 1(R8)(R10*4), R10
  19760 	MOVB R9, 1(AX)
  19761 	SARL $0x08, R9
  19762 	SHLL $0x05, R9
  19763 	ORL  R9, R10
  19764 	MOVB R10, (AX)
  19765 	ADDQ $0x02, AX
  19766 	JMP  lz4s_s2_loop
  19767 
  19768 long_offset_short_lz4_s2:
  19769 	MOVB $0xee, (AX)
  19770 	MOVW R9, 1(AX)
  19771 	LEAL -60(R10), R10
  19772 	ADDQ $0x03, AX
  19773 
  19774 	// emitRepeat
  19775 emit_repeat_again_lz4_s2_emit_copy_short:
  19776 	MOVL R10, R8
  19777 	LEAL -4(R10), R10
  19778 	CMPL R8, $0x08
  19779 	JBE  repeat_two_lz4_s2_emit_copy_short
  19780 	CMPL R8, $0x0c
  19781 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
  19782 	CMPL R9, $0x00000800
  19783 	JB   repeat_two_offset_lz4_s2_emit_copy_short
  19784 
  19785 cant_repeat_two_offset_lz4_s2_emit_copy_short:
  19786 	CMPL R10, $0x00000104
  19787 	JB   repeat_three_lz4_s2_emit_copy_short
  19788 	CMPL R10, $0x00010100
  19789 	JB   repeat_four_lz4_s2_emit_copy_short
  19790 	CMPL R10, $0x0100ffff
  19791 	JB   repeat_five_lz4_s2_emit_copy_short
  19792 	LEAL -16842747(R10), R10
  19793 	MOVL $0xfffb001d, (AX)
  19794 	MOVB $0xff, 4(AX)
  19795 	ADDQ $0x05, AX
  19796 	JMP  emit_repeat_again_lz4_s2_emit_copy_short
  19797 
  19798 repeat_five_lz4_s2_emit_copy_short:
  19799 	LEAL -65536(R10), R10
  19800 	MOVL R10, R9
  19801 	MOVW $0x001d, (AX)
  19802 	MOVW R10, 2(AX)
  19803 	SARL $0x10, R9
  19804 	MOVB R9, 4(AX)
  19805 	ADDQ $0x05, AX
  19806 	JMP  lz4s_s2_loop
  19807 
  19808 repeat_four_lz4_s2_emit_copy_short:
  19809 	LEAL -256(R10), R10
  19810 	MOVW $0x0019, (AX)
  19811 	MOVW R10, 2(AX)
  19812 	ADDQ $0x04, AX
  19813 	JMP  lz4s_s2_loop
  19814 
  19815 repeat_three_lz4_s2_emit_copy_short:
  19816 	LEAL -4(R10), R10
  19817 	MOVW $0x0015, (AX)
  19818 	MOVB R10, 2(AX)
  19819 	ADDQ $0x03, AX
  19820 	JMP  lz4s_s2_loop
  19821 
  19822 repeat_two_lz4_s2_emit_copy_short:
  19823 	SHLL $0x02, R10
  19824 	ORL  $0x01, R10
  19825 	MOVW R10, (AX)
  19826 	ADDQ $0x02, AX
  19827 	JMP  lz4s_s2_loop
  19828 
  19829 repeat_two_offset_lz4_s2_emit_copy_short:
  19830 	XORQ R8, R8
  19831 	LEAL 1(R8)(R10*4), R10
  19832 	MOVB R9, 1(AX)
  19833 	SARL $0x08, R9
  19834 	SHLL $0x05, R9
  19835 	ORL  R9, R10
  19836 	MOVB R10, (AX)
  19837 	ADDQ $0x02, AX
  19838 	JMP  lz4s_s2_loop
  19839 
  19840 two_byte_offset_short_lz4_s2:
  19841 	MOVL R10, R8
  19842 	SHLL $0x02, R8
  19843 	CMPL R10, $0x0c
  19844 	JAE  emit_copy_three_lz4_s2
  19845 	CMPL R9, $0x00000800
  19846 	JAE  emit_copy_three_lz4_s2
  19847 	LEAL -15(R8), R8
  19848 	MOVB R9, 1(AX)
  19849 	SHRL $0x08, R9
  19850 	SHLL $0x05, R9
  19851 	ORL  R9, R8
  19852 	MOVB R8, (AX)
  19853 	ADDQ $0x02, AX
  19854 	JMP  lz4s_s2_loop
  19855 
  19856 emit_copy_three_lz4_s2:
  19857 	LEAL -2(R8), R8
  19858 	MOVB R8, (AX)
  19859 	MOVW R9, 1(AX)
  19860 	ADDQ $0x03, AX
  19861 	JMP  lz4s_s2_loop
  19862 
  19863 lz4s_s2_done:
  19864 	MOVQ dst_base+0(FP), CX
  19865 	SUBQ CX, AX
  19866 	MOVQ SI, uncompressed+48(FP)
  19867 	MOVQ AX, dstUsed+56(FP)
  19868 	RET
  19869 
  19870 lz4s_s2_corrupt:
  19871 	XORQ AX, AX
  19872 	LEAQ -1(AX), SI
  19873 	MOVQ SI, uncompressed+48(FP)
  19874 	RET
  19875 
  19876 lz4s_s2_dstfull:
  19877 	XORQ AX, AX
  19878 	LEAQ -2(AX), SI
  19879 	MOVQ SI, uncompressed+48(FP)
  19880 	RET
  19881 
  19882 // func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  19883 // Requires: SSE2
  19884 TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
  19885 	XORQ SI, SI
  19886 	MOVQ dst_base+0(FP), AX
  19887 	MOVQ dst_len+8(FP), CX
  19888 	MOVQ src_base+24(FP), DX
  19889 	MOVQ src_len+32(FP), BX
  19890 	LEAQ (DX)(BX*1), BX
  19891 	LEAQ -10(AX)(CX*1), CX
  19892 
  19893 lz4_snappy_loop:
  19894 	CMPQ    DX, BX
  19895 	JAE     lz4_snappy_corrupt
  19896 	CMPQ    AX, CX
  19897 	JAE     lz4_snappy_dstfull
  19898 	MOVBQZX (DX), DI
  19899 	MOVQ    DI, R8
  19900 	MOVQ    DI, R9
  19901 	SHRQ    $0x04, R8
  19902 	ANDQ    $0x0f, R9
  19903 	CMPQ    DI, $0xf0
  19904 	JB      lz4_snappy_ll_end
  19905 
  19906 lz4_snappy_ll_loop:
  19907 	INCQ    DX
  19908 	CMPQ    DX, BX
  19909 	JAE     lz4_snappy_corrupt
  19910 	MOVBQZX (DX), DI
  19911 	ADDQ    DI, R8
  19912 	CMPQ    DI, $0xff
  19913 	JEQ     lz4_snappy_ll_loop
  19914 
  19915 lz4_snappy_ll_end:
  19916 	LEAQ  (DX)(R8*1), DI
  19917 	ADDQ  $0x04, R9
  19918 	CMPQ  DI, BX
  19919 	JAE   lz4_snappy_corrupt
  19920 	INCQ  DX
  19921 	INCQ  DI
  19922 	TESTQ R8, R8
  19923 	JZ    lz4_snappy_lits_done
  19924 	LEAQ  (AX)(R8*1), R10
  19925 	CMPQ  R10, CX
  19926 	JAE   lz4_snappy_dstfull
  19927 	ADDQ  R8, SI
  19928 	LEAL  -1(R8), R10
  19929 	CMPL  R10, $0x3c
  19930 	JB    one_byte_lz4_snappy
  19931 	CMPL  R10, $0x00000100
  19932 	JB    two_bytes_lz4_snappy
  19933 	CMPL  R10, $0x00010000
  19934 	JB    three_bytes_lz4_snappy
  19935 	CMPL  R10, $0x01000000
  19936 	JB    four_bytes_lz4_snappy
  19937 	MOVB  $0xfc, (AX)
  19938 	MOVL  R10, 1(AX)
  19939 	ADDQ  $0x05, AX
  19940 	JMP   memmove_long_lz4_snappy
  19941 
  19942 four_bytes_lz4_snappy:
  19943 	MOVL R10, R11
  19944 	SHRL $0x10, R11
  19945 	MOVB $0xf8, (AX)
  19946 	MOVW R10, 1(AX)
  19947 	MOVB R11, 3(AX)
  19948 	ADDQ $0x04, AX
  19949 	JMP  memmove_long_lz4_snappy
  19950 
  19951 three_bytes_lz4_snappy:
  19952 	MOVB $0xf4, (AX)
  19953 	MOVW R10, 1(AX)
  19954 	ADDQ $0x03, AX
  19955 	JMP  memmove_long_lz4_snappy
  19956 
  19957 two_bytes_lz4_snappy:
  19958 	MOVB $0xf0, (AX)
  19959 	MOVB R10, 1(AX)
  19960 	ADDQ $0x02, AX
  19961 	CMPL R10, $0x40
  19962 	JB   memmove_lz4_snappy
  19963 	JMP  memmove_long_lz4_snappy
  19964 
  19965 one_byte_lz4_snappy:
  19966 	SHLB $0x02, R10
  19967 	MOVB R10, (AX)
  19968 	ADDQ $0x01, AX
  19969 
  19970 memmove_lz4_snappy:
  19971 	LEAQ (AX)(R8*1), R10
  19972 
  19973 	// genMemMoveShort
  19974 	CMPQ R8, $0x08
  19975 	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8
  19976 	CMPQ R8, $0x10
  19977 	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8through16
  19978 	CMPQ R8, $0x20
  19979 	JBE  emit_lit_memmove_lz4_snappy_memmove_move_17through32
  19980 	JMP  emit_lit_memmove_lz4_snappy_memmove_move_33through64
  19981 
  19982 emit_lit_memmove_lz4_snappy_memmove_move_8:
  19983 	MOVQ (DX), R11
  19984 	MOVQ R11, (AX)
  19985 	JMP  memmove_end_copy_lz4_snappy
  19986 
  19987 emit_lit_memmove_lz4_snappy_memmove_move_8through16:
  19988 	MOVQ (DX), R11
  19989 	MOVQ -8(DX)(R8*1), DX
  19990 	MOVQ R11, (AX)
  19991 	MOVQ DX, -8(AX)(R8*1)
  19992 	JMP  memmove_end_copy_lz4_snappy
  19993 
  19994 emit_lit_memmove_lz4_snappy_memmove_move_17through32:
  19995 	MOVOU (DX), X0
  19996 	MOVOU -16(DX)(R8*1), X1
  19997 	MOVOU X0, (AX)
  19998 	MOVOU X1, -16(AX)(R8*1)
  19999 	JMP   memmove_end_copy_lz4_snappy
  20000 
  20001 emit_lit_memmove_lz4_snappy_memmove_move_33through64:
  20002 	MOVOU (DX), X0
  20003 	MOVOU 16(DX), X1
  20004 	MOVOU -32(DX)(R8*1), X2
  20005 	MOVOU -16(DX)(R8*1), X3
  20006 	MOVOU X0, (AX)
  20007 	MOVOU X1, 16(AX)
  20008 	MOVOU X2, -32(AX)(R8*1)
  20009 	MOVOU X3, -16(AX)(R8*1)
  20010 
  20011 memmove_end_copy_lz4_snappy:
  20012 	MOVQ R10, AX
  20013 	JMP  lz4_snappy_lits_emit_done
  20014 
  20015 memmove_long_lz4_snappy:
  20016 	LEAQ (AX)(R8*1), R10
  20017 
  20018 	// genMemMoveLong
  20019 	MOVOU (DX), X0
  20020 	MOVOU 16(DX), X1
  20021 	MOVOU -32(DX)(R8*1), X2
  20022 	MOVOU -16(DX)(R8*1), X3
  20023 	MOVQ  R8, R12
  20024 	SHRQ  $0x05, R12
  20025 	MOVQ  AX, R11
  20026 	ANDL  $0x0000001f, R11
  20027 	MOVQ  $0x00000040, R13
  20028 	SUBQ  R11, R13
  20029 	DECQ  R12
  20030 	JA    emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
  20031 	LEAQ  -32(DX)(R13*1), R11
  20032 	LEAQ  -32(AX)(R13*1), R14
  20033 
  20034 emit_lit_memmove_long_lz4_snappylarge_big_loop_back:
  20035 	MOVOU (R11), X4
  20036 	MOVOU 16(R11), X5
  20037 	MOVOA X4, (R14)
  20038 	MOVOA X5, 16(R14)
  20039 	ADDQ  $0x20, R14
  20040 	ADDQ  $0x20, R11
  20041 	ADDQ  $0x20, R13
  20042 	DECQ  R12
  20043 	JNA   emit_lit_memmove_long_lz4_snappylarge_big_loop_back
  20044 
  20045 emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32:
  20046 	MOVOU -32(DX)(R13*1), X4
  20047 	MOVOU -16(DX)(R13*1), X5
  20048 	MOVOA X4, -32(AX)(R13*1)
  20049 	MOVOA X5, -16(AX)(R13*1)
  20050 	ADDQ  $0x20, R13
  20051 	CMPQ  R8, R13
  20052 	JAE   emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
  20053 	MOVOU X0, (AX)
  20054 	MOVOU X1, 16(AX)
  20055 	MOVOU X2, -32(AX)(R8*1)
  20056 	MOVOU X3, -16(AX)(R8*1)
  20057 	MOVQ  R10, AX
  20058 
  20059 lz4_snappy_lits_emit_done:
  20060 	MOVQ DI, DX
  20061 
  20062 lz4_snappy_lits_done:
  20063 	CMPQ DX, BX
  20064 	JNE  lz4_snappy_match
  20065 	CMPQ R9, $0x04
  20066 	JEQ  lz4_snappy_done
  20067 	JMP  lz4_snappy_corrupt
  20068 
  20069 lz4_snappy_match:
  20070 	LEAQ    2(DX), DI
  20071 	CMPQ    DI, BX
  20072 	JAE     lz4_snappy_corrupt
  20073 	MOVWQZX (DX), R8
  20074 	MOVQ    DI, DX
  20075 	TESTQ   R8, R8
  20076 	JZ      lz4_snappy_corrupt
  20077 	CMPQ    R8, SI
  20078 	JA      lz4_snappy_corrupt
  20079 	CMPQ    R9, $0x13
  20080 	JNE     lz4_snappy_ml_done
  20081 
  20082 lz4_snappy_ml_loop:
  20083 	MOVBQZX (DX), DI
  20084 	INCQ    DX
  20085 	ADDQ    DI, R9
  20086 	CMPQ    DX, BX
  20087 	JAE     lz4_snappy_corrupt
  20088 	CMPQ    DI, $0xff
  20089 	JEQ     lz4_snappy_ml_loop
  20090 
  20091 lz4_snappy_ml_done:
  20092 	ADDQ R9, SI
  20093 
  20094 	// emitCopy
  20095 two_byte_offset_lz4_s2:
  20096 	CMPL R9, $0x40
  20097 	JBE  two_byte_offset_short_lz4_s2
  20098 	MOVB $0xee, (AX)
  20099 	MOVW R8, 1(AX)
  20100 	LEAL -60(R9), R9
  20101 	ADDQ $0x03, AX
  20102 	CMPQ AX, CX
  20103 	JAE  lz4_snappy_loop
  20104 	JMP  two_byte_offset_lz4_s2
  20105 
  20106 two_byte_offset_short_lz4_s2:
  20107 	MOVL R9, DI
  20108 	SHLL $0x02, DI
  20109 	CMPL R9, $0x0c
  20110 	JAE  emit_copy_three_lz4_s2
  20111 	CMPL R8, $0x00000800
  20112 	JAE  emit_copy_three_lz4_s2
  20113 	LEAL -15(DI), DI
  20114 	MOVB R8, 1(AX)
  20115 	SHRL $0x08, R8
  20116 	SHLL $0x05, R8
  20117 	ORL  R8, DI
  20118 	MOVB DI, (AX)
  20119 	ADDQ $0x02, AX
  20120 	JMP  lz4_snappy_loop
  20121 
  20122 emit_copy_three_lz4_s2:
  20123 	LEAL -2(DI), DI
  20124 	MOVB DI, (AX)
  20125 	MOVW R8, 1(AX)
  20126 	ADDQ $0x03, AX
  20127 	JMP  lz4_snappy_loop
  20128 
  20129 lz4_snappy_done:
  20130 	MOVQ dst_base+0(FP), CX
  20131 	SUBQ CX, AX
  20132 	MOVQ SI, uncompressed+48(FP)
  20133 	MOVQ AX, dstUsed+56(FP)
  20134 	RET
  20135 
  20136 lz4_snappy_corrupt:
  20137 	XORQ AX, AX
  20138 	LEAQ -1(AX), SI
  20139 	MOVQ SI, uncompressed+48(FP)
  20140 	RET
  20141 
  20142 lz4_snappy_dstfull:
  20143 	XORQ AX, AX
  20144 	LEAQ -2(AX), SI
  20145 	MOVQ SI, uncompressed+48(FP)
  20146 	RET
  20147 
  20148 // func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  20149 // Requires: SSE2
  20150 TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
  20151 	XORQ SI, SI
  20152 	MOVQ dst_base+0(FP), AX
  20153 	MOVQ dst_len+8(FP), CX
  20154 	MOVQ src_base+24(FP), DX
  20155 	MOVQ src_len+32(FP), BX
  20156 	LEAQ (DX)(BX*1), BX
  20157 	LEAQ -10(AX)(CX*1), CX
  20158 
  20159 lz4s_snappy_loop:
  20160 	CMPQ    DX, BX
  20161 	JAE     lz4s_snappy_corrupt
  20162 	CMPQ    AX, CX
  20163 	JAE     lz4s_snappy_dstfull
  20164 	MOVBQZX (DX), DI
  20165 	MOVQ    DI, R8
  20166 	MOVQ    DI, R9
  20167 	SHRQ    $0x04, R8
  20168 	ANDQ    $0x0f, R9
  20169 	CMPQ    DI, $0xf0
  20170 	JB      lz4s_snappy_ll_end
  20171 
  20172 lz4s_snappy_ll_loop:
  20173 	INCQ    DX
  20174 	CMPQ    DX, BX
  20175 	JAE     lz4s_snappy_corrupt
  20176 	MOVBQZX (DX), DI
  20177 	ADDQ    DI, R8
  20178 	CMPQ    DI, $0xff
  20179 	JEQ     lz4s_snappy_ll_loop
  20180 
  20181 lz4s_snappy_ll_end:
  20182 	LEAQ  (DX)(R8*1), DI
  20183 	ADDQ  $0x03, R9
  20184 	CMPQ  DI, BX
  20185 	JAE   lz4s_snappy_corrupt
  20186 	INCQ  DX
  20187 	INCQ  DI
  20188 	TESTQ R8, R8
  20189 	JZ    lz4s_snappy_lits_done
  20190 	LEAQ  (AX)(R8*1), R10
  20191 	CMPQ  R10, CX
  20192 	JAE   lz4s_snappy_dstfull
  20193 	ADDQ  R8, SI
  20194 	LEAL  -1(R8), R10
  20195 	CMPL  R10, $0x3c
  20196 	JB    one_byte_lz4s_snappy
  20197 	CMPL  R10, $0x00000100
  20198 	JB    two_bytes_lz4s_snappy
  20199 	CMPL  R10, $0x00010000
  20200 	JB    three_bytes_lz4s_snappy
  20201 	CMPL  R10, $0x01000000
  20202 	JB    four_bytes_lz4s_snappy
  20203 	MOVB  $0xfc, (AX)
  20204 	MOVL  R10, 1(AX)
  20205 	ADDQ  $0x05, AX
  20206 	JMP   memmove_long_lz4s_snappy
  20207 
  20208 four_bytes_lz4s_snappy:
  20209 	MOVL R10, R11
  20210 	SHRL $0x10, R11
  20211 	MOVB $0xf8, (AX)
  20212 	MOVW R10, 1(AX)
  20213 	MOVB R11, 3(AX)
  20214 	ADDQ $0x04, AX
  20215 	JMP  memmove_long_lz4s_snappy
  20216 
  20217 three_bytes_lz4s_snappy:
  20218 	MOVB $0xf4, (AX)
  20219 	MOVW R10, 1(AX)
  20220 	ADDQ $0x03, AX
  20221 	JMP  memmove_long_lz4s_snappy
  20222 
  20223 two_bytes_lz4s_snappy:
  20224 	MOVB $0xf0, (AX)
  20225 	MOVB R10, 1(AX)
  20226 	ADDQ $0x02, AX
  20227 	CMPL R10, $0x40
  20228 	JB   memmove_lz4s_snappy
  20229 	JMP  memmove_long_lz4s_snappy
  20230 
  20231 one_byte_lz4s_snappy:
  20232 	SHLB $0x02, R10
  20233 	MOVB R10, (AX)
  20234 	ADDQ $0x01, AX
  20235 
  20236 memmove_lz4s_snappy:
  20237 	LEAQ (AX)(R8*1), R10
  20238 
  20239 	// genMemMoveShort
  20240 	CMPQ R8, $0x08
  20241 	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8
  20242 	CMPQ R8, $0x10
  20243 	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8through16
  20244 	CMPQ R8, $0x20
  20245 	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_17through32
  20246 	JMP  emit_lit_memmove_lz4s_snappy_memmove_move_33through64
  20247 
  20248 emit_lit_memmove_lz4s_snappy_memmove_move_8:
  20249 	MOVQ (DX), R11
  20250 	MOVQ R11, (AX)
  20251 	JMP  memmove_end_copy_lz4s_snappy
  20252 
  20253 emit_lit_memmove_lz4s_snappy_memmove_move_8through16:
  20254 	MOVQ (DX), R11
  20255 	MOVQ -8(DX)(R8*1), DX
  20256 	MOVQ R11, (AX)
  20257 	MOVQ DX, -8(AX)(R8*1)
  20258 	JMP  memmove_end_copy_lz4s_snappy
  20259 
  20260 emit_lit_memmove_lz4s_snappy_memmove_move_17through32:
  20261 	MOVOU (DX), X0
  20262 	MOVOU -16(DX)(R8*1), X1
  20263 	MOVOU X0, (AX)
  20264 	MOVOU X1, -16(AX)(R8*1)
  20265 	JMP   memmove_end_copy_lz4s_snappy
  20266 
  20267 emit_lit_memmove_lz4s_snappy_memmove_move_33through64:
  20268 	MOVOU (DX), X0
  20269 	MOVOU 16(DX), X1
  20270 	MOVOU -32(DX)(R8*1), X2
  20271 	MOVOU -16(DX)(R8*1), X3
  20272 	MOVOU X0, (AX)
  20273 	MOVOU X1, 16(AX)
  20274 	MOVOU X2, -32(AX)(R8*1)
  20275 	MOVOU X3, -16(AX)(R8*1)
  20276 
  20277 memmove_end_copy_lz4s_snappy:
  20278 	MOVQ R10, AX
  20279 	JMP  lz4s_snappy_lits_emit_done
  20280 
  20281 memmove_long_lz4s_snappy:
  20282 	LEAQ (AX)(R8*1), R10
  20283 
  20284 	// genMemMoveLong
  20285 	MOVOU (DX), X0
  20286 	MOVOU 16(DX), X1
  20287 	MOVOU -32(DX)(R8*1), X2
  20288 	MOVOU -16(DX)(R8*1), X3
  20289 	MOVQ  R8, R12
  20290 	SHRQ  $0x05, R12
  20291 	MOVQ  AX, R11
  20292 	ANDL  $0x0000001f, R11
  20293 	MOVQ  $0x00000040, R13
  20294 	SUBQ  R11, R13
  20295 	DECQ  R12
  20296 	JA    emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
  20297 	LEAQ  -32(DX)(R13*1), R11
  20298 	LEAQ  -32(AX)(R13*1), R14
  20299 
  20300 emit_lit_memmove_long_lz4s_snappylarge_big_loop_back:
  20301 	MOVOU (R11), X4
  20302 	MOVOU 16(R11), X5
  20303 	MOVOA X4, (R14)
  20304 	MOVOA X5, 16(R14)
  20305 	ADDQ  $0x20, R14
  20306 	ADDQ  $0x20, R11
  20307 	ADDQ  $0x20, R13
  20308 	DECQ  R12
  20309 	JNA   emit_lit_memmove_long_lz4s_snappylarge_big_loop_back
  20310 
  20311 emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32:
  20312 	MOVOU -32(DX)(R13*1), X4
  20313 	MOVOU -16(DX)(R13*1), X5
  20314 	MOVOA X4, -32(AX)(R13*1)
  20315 	MOVOA X5, -16(AX)(R13*1)
  20316 	ADDQ  $0x20, R13
  20317 	CMPQ  R8, R13
  20318 	JAE   emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
  20319 	MOVOU X0, (AX)
  20320 	MOVOU X1, 16(AX)
  20321 	MOVOU X2, -32(AX)(R8*1)
  20322 	MOVOU X3, -16(AX)(R8*1)
  20323 	MOVQ  R10, AX
  20324 
  20325 lz4s_snappy_lits_emit_done:
  20326 	MOVQ DI, DX
  20327 
  20328 lz4s_snappy_lits_done:
  20329 	CMPQ DX, BX
  20330 	JNE  lz4s_snappy_match
  20331 	CMPQ R9, $0x03
  20332 	JEQ  lz4s_snappy_done
  20333 	JMP  lz4s_snappy_corrupt
  20334 
  20335 lz4s_snappy_match:
  20336 	CMPQ    R9, $0x03
  20337 	JEQ     lz4s_snappy_loop
  20338 	LEAQ    2(DX), DI
  20339 	CMPQ    DI, BX
  20340 	JAE     lz4s_snappy_corrupt
  20341 	MOVWQZX (DX), R8
  20342 	MOVQ    DI, DX
  20343 	TESTQ   R8, R8
  20344 	JZ      lz4s_snappy_corrupt
  20345 	CMPQ    R8, SI
  20346 	JA      lz4s_snappy_corrupt
  20347 	CMPQ    R9, $0x12
  20348 	JNE     lz4s_snappy_ml_done
  20349 
  20350 lz4s_snappy_ml_loop:
  20351 	MOVBQZX (DX), DI
  20352 	INCQ    DX
  20353 	ADDQ    DI, R9
  20354 	CMPQ    DX, BX
  20355 	JAE     lz4s_snappy_corrupt
  20356 	CMPQ    DI, $0xff
  20357 	JEQ     lz4s_snappy_ml_loop
  20358 
  20359 lz4s_snappy_ml_done:
  20360 	ADDQ R9, SI
  20361 
  20362 	// emitCopy
  20363 two_byte_offset_lz4_s2:
  20364 	CMPL R9, $0x40
  20365 	JBE  two_byte_offset_short_lz4_s2
  20366 	MOVB $0xee, (AX)
  20367 	MOVW R8, 1(AX)
  20368 	LEAL -60(R9), R9
  20369 	ADDQ $0x03, AX
  20370 	CMPQ AX, CX
  20371 	JAE  lz4s_snappy_loop
  20372 	JMP  two_byte_offset_lz4_s2
  20373 
  20374 two_byte_offset_short_lz4_s2:
  20375 	MOVL R9, DI
  20376 	SHLL $0x02, DI
  20377 	CMPL R9, $0x0c
  20378 	JAE  emit_copy_three_lz4_s2
  20379 	CMPL R8, $0x00000800
  20380 	JAE  emit_copy_three_lz4_s2
  20381 	LEAL -15(DI), DI
  20382 	MOVB R8, 1(AX)
  20383 	SHRL $0x08, R8
  20384 	SHLL $0x05, R8
  20385 	ORL  R8, DI
  20386 	MOVB DI, (AX)
  20387 	ADDQ $0x02, AX
  20388 	JMP  lz4s_snappy_loop
  20389 
  20390 emit_copy_three_lz4_s2:
  20391 	LEAL -2(DI), DI
  20392 	MOVB DI, (AX)
  20393 	MOVW R8, 1(AX)
  20394 	ADDQ $0x03, AX
  20395 	JMP  lz4s_snappy_loop
  20396 
  20397 lz4s_snappy_done:
  20398 	MOVQ dst_base+0(FP), CX
  20399 	SUBQ CX, AX
  20400 	MOVQ SI, uncompressed+48(FP)
  20401 	MOVQ AX, dstUsed+56(FP)
  20402 	RET
  20403 
  20404 lz4s_snappy_corrupt:
  20405 	XORQ AX, AX
  20406 	LEAQ -1(AX), SI
  20407 	MOVQ SI, uncompressed+48(FP)
  20408 	RET
  20409 
  20410 lz4s_snappy_dstfull:
  20411 	XORQ AX, AX
  20412 	LEAQ -2(AX), SI
  20413 	MOVQ SI, uncompressed+48(FP)
  20414 	RET