gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

hex_encode_amd64.s (3929B)


      1 // Copyright 2016 Tom Thorogood. All rights reserved.
      2 // Use of this source code is governed by a
      3 // Modified BSD License license that can be found in
      4 // the LICENSE file.
      5 //
      6 // Copyright 2005-2016, Wojciech Muła. All rights reserved.
      7 // Use of this source code is governed by a
      8 // Simplified BSD License license that can be found in
      9 // the LICENSE file.
     10 //
     11 // This file is auto-generated - do not modify
     12 
     13 // +build amd64,!gccgo,!appengine
     14 
     15 #include "textflag.h"
     16 
     17 DATA encodeMask<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f
     18 DATA encodeMask<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f
     19 GLOBL encodeMask<>(SB),RODATA,$16
     20 
     21 TEXT ·encodeAVX(SB),NOSPLIT,$0
     22 	MOVQ dst+0(FP), DI
     23 	MOVQ src+8(FP), SI
     24 	MOVQ len+16(FP), BX
     25 	MOVQ alpha+24(FP), DX
     26 	MOVOU (DX), X15
     27 	CMPQ BX, $16
     28 	JB tail
     29 bigloop:
     30 	MOVOU -16(SI)(BX*1), X0
     31 	VPAND encodeMask<>(SB), X0, X1
     32 	PSRLW $4, X0
     33 	PAND encodeMask<>(SB), X0
     34 	VPUNPCKHBW X1, X0, X3
     35 	PUNPCKLBW X1, X0
     36 	VPSHUFB X0, X15, X1
     37 	VPSHUFB X3, X15, X2
     38 	MOVOU X2, -16(DI)(BX*2)
     39 	MOVOU X1, -32(DI)(BX*2)
     40 	SUBQ $16, BX
     41 	JZ ret
     42 	CMPQ BX, $16
     43 	JAE bigloop
     44 tail:
     45 	CMPQ BX, $2
     46 	JB tail_in_1
     47 	JE tail_in_2
     48 	CMPQ BX, $4
     49 	JB tail_in_3
     50 	JE tail_in_4
     51 	CMPQ BX, $6
     52 	JB tail_in_5
     53 	JE tail_in_6
     54 	CMPQ BX, $8
     55 	JB tail_in_7
     56 tail_in_8:
     57 	MOVQ (SI), X0
     58 	JMP tail_conv
     59 tail_in_7:
     60 	PINSRB $6, 6(SI), X0
     61 tail_in_6:
     62 	PINSRB $5, 5(SI), X0
     63 tail_in_5:
     64 	PINSRB $4, 4(SI), X0
     65 tail_in_4:
     66 	PINSRD $0, (SI), X0
     67 	JMP tail_conv
     68 tail_in_3:
     69 	PINSRB $2, 2(SI), X0
     70 tail_in_2:
     71 	PINSRB $1, 1(SI), X0
     72 tail_in_1:
     73 	PINSRB $0, (SI), X0
     74 tail_conv:
     75 	VPAND encodeMask<>(SB), X0, X1
     76 	PSRLW $4, X0
     77 	PAND encodeMask<>(SB), X0
     78 	PUNPCKLBW X1, X0
     79 	VPSHUFB X0, X15, X1
     80 	CMPQ BX, $2
     81 	JB tail_out_1
     82 	JE tail_out_2
     83 	CMPQ BX, $4
     84 	JB tail_out_3
     85 	JE tail_out_4
     86 	CMPQ BX, $6
     87 	JB tail_out_5
     88 	JE tail_out_6
     89 	CMPQ BX, $8
     90 	JB tail_out_7
     91 tail_out_8:
     92 	MOVOU X1, (DI)
     93 	SUBQ $8, BX
     94 	JZ ret
     95 	ADDQ $8, SI
     96 	ADDQ $16, DI
     97 	JMP tail
     98 tail_out_7:
     99 	PEXTRB $13, X1, 13(DI)
    100 	PEXTRB $12, X1, 12(DI)
    101 tail_out_6:
    102 	PEXTRB $11, X1, 11(DI)
    103 	PEXTRB $10, X1, 10(DI)
    104 tail_out_5:
    105 	PEXTRB $9, X1, 9(DI)
    106 	PEXTRB $8, X1, 8(DI)
    107 tail_out_4:
    108 	MOVQ X1, (DI)
    109 	RET
    110 tail_out_3:
    111 	PEXTRB $5, X1, 5(DI)
    112 	PEXTRB $4, X1, 4(DI)
    113 tail_out_2:
    114 	PEXTRB $3, X1, 3(DI)
    115 	PEXTRB $2, X1, 2(DI)
    116 tail_out_1:
    117 	PEXTRB $1, X1, 1(DI)
    118 	PEXTRB $0, X1, (DI)
    119 ret:
    120 	RET
    121 
    122 TEXT ·encodeSSE(SB),NOSPLIT,$0
    123 	MOVQ dst+0(FP), DI
    124 	MOVQ src+8(FP), SI
    125 	MOVQ len+16(FP), BX
    126 	MOVQ alpha+24(FP), DX
    127 	MOVOU (DX), X15
    128 	CMPQ BX, $16
    129 	JB tail
    130 bigloop:
    131 	MOVOU -16(SI)(BX*1), X0
    132 	MOVOU X0, X1
    133 	PAND encodeMask<>(SB), X1
    134 	PSRLW $4, X0
    135 	PAND encodeMask<>(SB), X0
    136 	MOVOU X0, X3
    137 	PUNPCKHBW X1, X3
    138 	PUNPCKLBW X1, X0
    139 	MOVOU X15, X1
    140 	PSHUFB X0, X1
    141 	MOVOU X15, X2
    142 	PSHUFB X3, X2
    143 	MOVOU X2, -16(DI)(BX*2)
    144 	MOVOU X1, -32(DI)(BX*2)
    145 	SUBQ $16, BX
    146 	JZ ret
    147 	CMPQ BX, $16
    148 	JAE bigloop
    149 tail:
    150 	CMPQ BX, $2
    151 	JB tail_in_1
    152 	JE tail_in_2
    153 	CMPQ BX, $4
    154 	JB tail_in_3
    155 	JE tail_in_4
    156 	CMPQ BX, $6
    157 	JB tail_in_5
    158 	JE tail_in_6
    159 	CMPQ BX, $8
    160 	JB tail_in_7
    161 tail_in_8:
    162 	MOVQ (SI), X0
    163 	JMP tail_conv
    164 tail_in_7:
    165 	PINSRB $6, 6(SI), X0
    166 tail_in_6:
    167 	PINSRB $5, 5(SI), X0
    168 tail_in_5:
    169 	PINSRB $4, 4(SI), X0
    170 tail_in_4:
    171 	PINSRD $0, (SI), X0
    172 	JMP tail_conv
    173 tail_in_3:
    174 	PINSRB $2, 2(SI), X0
    175 tail_in_2:
    176 	PINSRB $1, 1(SI), X0
    177 tail_in_1:
    178 	PINSRB $0, (SI), X0
    179 tail_conv:
    180 	MOVOU X0, X1
    181 	PAND encodeMask<>(SB), X1
    182 	PSRLW $4, X0
    183 	PAND encodeMask<>(SB), X0
    184 	PUNPCKLBW X1, X0
    185 	MOVOU X15, X1
    186 	PSHUFB X0, X1
    187 	CMPQ BX, $2
    188 	JB tail_out_1
    189 	JE tail_out_2
    190 	CMPQ BX, $4
    191 	JB tail_out_3
    192 	JE tail_out_4
    193 	CMPQ BX, $6
    194 	JB tail_out_5
    195 	JE tail_out_6
    196 	CMPQ BX, $8
    197 	JB tail_out_7
    198 tail_out_8:
    199 	MOVOU X1, (DI)
    200 	SUBQ $8, BX
    201 	JZ ret
    202 	ADDQ $8, SI
    203 	ADDQ $16, DI
    204 	JMP tail
    205 tail_out_7:
    206 	PEXTRB $13, X1, 13(DI)
    207 	PEXTRB $12, X1, 12(DI)
    208 tail_out_6:
    209 	PEXTRB $11, X1, 11(DI)
    210 	PEXTRB $10, X1, 10(DI)
    211 tail_out_5:
    212 	PEXTRB $9, X1, 9(DI)
    213 	PEXTRB $8, X1, 8(DI)
    214 tail_out_4:
    215 	MOVQ X1, (DI)
    216 	RET
    217 tail_out_3:
    218 	PEXTRB $5, X1, 5(DI)
    219 	PEXTRB $4, X1, 4(DI)
    220 tail_out_2:
    221 	PEXTRB $3, X1, 3(DI)
    222 	PEXTRB $2, X1, 2(DI)
    223 tail_out_1:
    224 	PEXTRB $1, X1, 1(DI)
    225 	PEXTRB $0, X1, (DI)
    226 ret:
    227 	RET