hex_encode_amd64.s (3929B)
1 // Copyright 2016 Tom Thorogood. All rights reserved. 2 // Use of this source code is governed by a 3 // Modified BSD License license that can be found in 4 // the LICENSE file. 5 // 6 // Copyright 2005-2016, Wojciech Muła. All rights reserved. 7 // Use of this source code is governed by a 8 // Simplified BSD License license that can be found in 9 // the LICENSE file. 10 // 11 // This file is auto-generated - do not modify 12 13 // +build amd64,!gccgo,!appengine 14 15 #include "textflag.h" 16 17 DATA encodeMask<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f 18 DATA encodeMask<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f 19 GLOBL encodeMask<>(SB),RODATA,$16 20 21 TEXT ·encodeAVX(SB),NOSPLIT,$0 22 MOVQ dst+0(FP), DI 23 MOVQ src+8(FP), SI 24 MOVQ len+16(FP), BX 25 MOVQ alpha+24(FP), DX 26 MOVOU (DX), X15 27 CMPQ BX, $16 28 JB tail 29 bigloop: 30 MOVOU -16(SI)(BX*1), X0 31 VPAND encodeMask<>(SB), X0, X1 32 PSRLW $4, X0 33 PAND encodeMask<>(SB), X0 34 VPUNPCKHBW X1, X0, X3 35 PUNPCKLBW X1, X0 36 VPSHUFB X0, X15, X1 37 VPSHUFB X3, X15, X2 38 MOVOU X2, -16(DI)(BX*2) 39 MOVOU X1, -32(DI)(BX*2) 40 SUBQ $16, BX 41 JZ ret 42 CMPQ BX, $16 43 JAE bigloop 44 tail: 45 CMPQ BX, $2 46 JB tail_in_1 47 JE tail_in_2 48 CMPQ BX, $4 49 JB tail_in_3 50 JE tail_in_4 51 CMPQ BX, $6 52 JB tail_in_5 53 JE tail_in_6 54 CMPQ BX, $8 55 JB tail_in_7 56 tail_in_8: 57 MOVQ (SI), X0 58 JMP tail_conv 59 tail_in_7: 60 PINSRB $6, 6(SI), X0 61 tail_in_6: 62 PINSRB $5, 5(SI), X0 63 tail_in_5: 64 PINSRB $4, 4(SI), X0 65 tail_in_4: 66 PINSRD $0, (SI), X0 67 JMP tail_conv 68 tail_in_3: 69 PINSRB $2, 2(SI), X0 70 tail_in_2: 71 PINSRB $1, 1(SI), X0 72 tail_in_1: 73 PINSRB $0, (SI), X0 74 tail_conv: 75 VPAND encodeMask<>(SB), X0, X1 76 PSRLW $4, X0 77 PAND encodeMask<>(SB), X0 78 PUNPCKLBW X1, X0 79 VPSHUFB X0, X15, X1 80 CMPQ BX, $2 81 JB tail_out_1 82 JE tail_out_2 83 CMPQ BX, $4 84 JB tail_out_3 85 JE tail_out_4 86 CMPQ BX, $6 87 JB tail_out_5 88 JE tail_out_6 89 CMPQ BX, $8 90 JB tail_out_7 91 tail_out_8: 92 MOVOU X1, (DI) 93 SUBQ $8, BX 94 JZ ret 95 ADDQ $8, SI 96 ADDQ $16, DI 97 JMP tail 98 tail_out_7: 99 PEXTRB $13, X1, 13(DI) 100 PEXTRB $12, X1, 12(DI) 101 tail_out_6: 102 PEXTRB $11, X1, 11(DI) 103 PEXTRB $10, X1, 10(DI) 104 tail_out_5: 105 PEXTRB $9, X1, 9(DI) 106 PEXTRB $8, X1, 8(DI) 107 tail_out_4: 108 MOVQ X1, (DI) 109 RET 110 tail_out_3: 111 PEXTRB $5, X1, 5(DI) 112 PEXTRB $4, X1, 4(DI) 113 tail_out_2: 114 PEXTRB $3, X1, 3(DI) 115 PEXTRB $2, X1, 2(DI) 116 tail_out_1: 117 PEXTRB $1, X1, 1(DI) 118 PEXTRB $0, X1, (DI) 119 ret: 120 RET 121 122 TEXT ·encodeSSE(SB),NOSPLIT,$0 123 MOVQ dst+0(FP), DI 124 MOVQ src+8(FP), SI 125 MOVQ len+16(FP), BX 126 MOVQ alpha+24(FP), DX 127 MOVOU (DX), X15 128 CMPQ BX, $16 129 JB tail 130 bigloop: 131 MOVOU -16(SI)(BX*1), X0 132 MOVOU X0, X1 133 PAND encodeMask<>(SB), X1 134 PSRLW $4, X0 135 PAND encodeMask<>(SB), X0 136 MOVOU X0, X3 137 PUNPCKHBW X1, X3 138 PUNPCKLBW X1, X0 139 MOVOU X15, X1 140 PSHUFB X0, X1 141 MOVOU X15, X2 142 PSHUFB X3, X2 143 MOVOU X2, -16(DI)(BX*2) 144 MOVOU X1, -32(DI)(BX*2) 145 SUBQ $16, BX 146 JZ ret 147 CMPQ BX, $16 148 JAE bigloop 149 tail: 150 CMPQ BX, $2 151 JB tail_in_1 152 JE tail_in_2 153 CMPQ BX, $4 154 JB tail_in_3 155 JE tail_in_4 156 CMPQ BX, $6 157 JB tail_in_5 158 JE tail_in_6 159 CMPQ BX, $8 160 JB tail_in_7 161 tail_in_8: 162 MOVQ (SI), X0 163 JMP tail_conv 164 tail_in_7: 165 PINSRB $6, 6(SI), X0 166 tail_in_6: 167 PINSRB $5, 5(SI), X0 168 tail_in_5: 169 PINSRB $4, 4(SI), X0 170 tail_in_4: 171 PINSRD $0, (SI), X0 172 JMP tail_conv 173 tail_in_3: 174 PINSRB $2, 2(SI), X0 175 tail_in_2: 176 PINSRB $1, 1(SI), X0 177 tail_in_1: 178 PINSRB $0, (SI), X0 179 tail_conv: 180 MOVOU X0, X1 181 PAND encodeMask<>(SB), X1 182 PSRLW $4, X0 183 PAND encodeMask<>(SB), X0 184 PUNPCKLBW X1, X0 185 MOVOU X15, X1 186 PSHUFB X0, X1 187 CMPQ BX, $2 188 JB tail_out_1 189 JE tail_out_2 190 CMPQ BX, $4 191 JB tail_out_3 192 JE tail_out_4 193 CMPQ BX, $6 194 JB tail_out_5 195 JE tail_out_6 196 CMPQ BX, $8 197 JB tail_out_7 198 tail_out_8: 199 MOVOU X1, (DI) 200 SUBQ $8, BX 201 JZ ret 202 ADDQ $8, SI 203 ADDQ $16, DI 204 JMP tail 205 tail_out_7: 206 PEXTRB $13, X1, 13(DI) 207 PEXTRB $12, X1, 12(DI) 208 tail_out_6: 209 PEXTRB $11, X1, 11(DI) 210 PEXTRB $10, X1, 10(DI) 211 tail_out_5: 212 PEXTRB $9, X1, 9(DI) 213 PEXTRB $8, X1, 8(DI) 214 tail_out_4: 215 MOVQ X1, (DI) 216 RET 217 tail_out_3: 218 PEXTRB $5, X1, 5(DI) 219 PEXTRB $4, X1, 4(DI) 220 tail_out_2: 221 PEXTRB $3, X1, 3(DI) 222 PEXTRB $2, X1, 2(DI) 223 tail_out_1: 224 PEXTRB $1, X1, 1(DI) 225 PEXTRB $0, X1, (DI) 226 ret: 227 RET