hex_decode_amd64.s (6151B)
1 // Copyright 2016 Tom Thorogood. All rights reserved. 2 // Use of this source code is governed by a 3 // Modified BSD License license that can be found in 4 // the LICENSE file. 5 // 6 // Copyright 2005-2016, Wojciech Muła. All rights reserved. 7 // Use of this source code is governed by a 8 // Simplified BSD License license that can be found in 9 // the LICENSE file. 10 // 11 // This file is auto-generated - do not modify 12 13 // +build amd64,!gccgo,!appengine 14 15 #include "textflag.h" 16 17 DATA decodeBase<>+0x00(SB)/8, $0x3030303030303030 18 DATA decodeBase<>+0x08(SB)/8, $0x3030303030303030 19 DATA decodeBase<>+0x10(SB)/8, $0x2727272727272727 20 DATA decodeBase<>+0x18(SB)/8, $0x2727272727272727 21 GLOBL decodeBase<>(SB),RODATA,$32 22 23 DATA decodeToLower<>+0x00(SB)/8, $0x2020202020202020 24 DATA decodeToLower<>+0x08(SB)/8, $0x2020202020202020 25 GLOBL decodeToLower<>(SB),RODATA,$16 26 27 DATA decodeHigh<>+0x00(SB)/8, $0x0e0c0a0806040200 28 DATA decodeHigh<>+0x08(SB)/8, $0xffffffffffffffff 29 GLOBL decodeHigh<>(SB),RODATA,$16 30 31 DATA decodeLow<>+0x00(SB)/8, $0x0f0d0b0907050301 32 DATA decodeLow<>+0x08(SB)/8, $0xffffffffffffffff 33 GLOBL decodeLow<>(SB),RODATA,$16 34 35 DATA decodeValid<>+0x00(SB)/8, $0xb0b0b0b0b0b0b0b0 36 DATA decodeValid<>+0x08(SB)/8, $0xb0b0b0b0b0b0b0b0 37 DATA decodeValid<>+0x10(SB)/8, $0xb9b9b9b9b9b9b9b9 38 DATA decodeValid<>+0x18(SB)/8, $0xb9b9b9b9b9b9b9b9 39 DATA decodeValid<>+0x20(SB)/8, $0xe1e1e1e1e1e1e1e1 40 DATA decodeValid<>+0x28(SB)/8, $0xe1e1e1e1e1e1e1e1 41 DATA decodeValid<>+0x30(SB)/8, $0xe6e6e6e6e6e6e6e6 42 DATA decodeValid<>+0x38(SB)/8, $0xe6e6e6e6e6e6e6e6 43 GLOBL decodeValid<>(SB),RODATA,$64 44 45 DATA decodeToSigned<>+0x00(SB)/8, $0x8080808080808080 46 DATA decodeToSigned<>+0x08(SB)/8, $0x8080808080808080 47 GLOBL decodeToSigned<>(SB),RODATA,$16 48 49 TEXT ·decodeAVX(SB),NOSPLIT,$0 50 MOVQ dst+0(FP), DI 51 MOVQ src+8(FP), SI 52 MOVQ len+16(FP), BX 53 MOVQ SI, R15 54 MOVOU decodeValid<>(SB), X14 55 MOVOU decodeValid<>+0x20(SB), X15 56 MOVW $65535, DX 57 CMPQ BX, $16 58 JB tail 59 bigloop: 60 MOVOU (SI), X0 61 VPXOR decodeToSigned<>(SB), X0, X1 62 POR decodeToLower<>(SB), X0 63 VPXOR decodeToSigned<>(SB), X0, X2 64 VPCMPGTB X1, X14, X3 65 PCMPGTB decodeValid<>+0x10(SB), X1 66 VPCMPGTB X2, X15, X4 67 PCMPGTB decodeValid<>+0x30(SB), X2 68 PAND X4, X1 69 POR X2, X3 70 POR X1, X3 71 PMOVMSKB X3, AX 72 TESTW AX, DX 73 JNZ invalid 74 PSUBB decodeBase<>(SB), X0 75 PANDN decodeBase<>+0x10(SB), X4 76 PSUBB X4, X0 77 VPSHUFB decodeLow<>(SB), X0, X3 78 PSHUFB decodeHigh<>(SB), X0 79 PSLLW $4, X0 80 POR X3, X0 81 MOVQ X0, (DI) 82 SUBQ $16, BX 83 JZ ret 84 ADDQ $16, SI 85 ADDQ $8, DI 86 CMPQ BX, $16 87 JAE bigloop 88 tail: 89 MOVQ $16, CX 90 SUBQ BX, CX 91 SHRW CX, DX 92 CMPQ BX, $4 93 JB tail_in_2 94 JE tail_in_4 95 CMPQ BX, $8 96 JB tail_in_6 97 JE tail_in_8 98 CMPQ BX, $12 99 JB tail_in_10 100 JE tail_in_12 101 tail_in_14: 102 PINSRW $6, 12(SI), X0 103 tail_in_12: 104 PINSRW $5, 10(SI), X0 105 tail_in_10: 106 PINSRW $4, 8(SI), X0 107 tail_in_8: 108 PINSRQ $0, (SI), X0 109 JMP tail_conv 110 tail_in_6: 111 PINSRW $2, 4(SI), X0 112 tail_in_4: 113 PINSRW $1, 2(SI), X0 114 tail_in_2: 115 PINSRW $0, (SI), X0 116 tail_conv: 117 VPXOR decodeToSigned<>(SB), X0, X1 118 POR decodeToLower<>(SB), X0 119 VPXOR decodeToSigned<>(SB), X0, X2 120 VPCMPGTB X1, X14, X3 121 PCMPGTB decodeValid<>+0x10(SB), X1 122 VPCMPGTB X2, X15, X4 123 PCMPGTB decodeValid<>+0x30(SB), X2 124 PAND X4, X1 125 POR X2, X3 126 POR X1, X3 127 PMOVMSKB X3, AX 128 TESTW AX, DX 129 JNZ invalid 130 PSUBB decodeBase<>(SB), X0 131 PANDN decodeBase<>+0x10(SB), X4 132 PSUBB X4, X0 133 VPSHUFB decodeLow<>(SB), X0, X3 134 PSHUFB decodeHigh<>(SB), X0 135 PSLLW $4, X0 136 POR X3, X0 137 CMPQ BX, $4 138 JB tail_out_2 139 JE tail_out_4 140 CMPQ BX, $8 141 JB tail_out_6 142 JE tail_out_8 143 CMPQ BX, $12 144 JB tail_out_10 145 JE tail_out_12 146 tail_out_14: 147 PEXTRB $6, X0, 6(DI) 148 tail_out_12: 149 PEXTRB $5, X0, 5(DI) 150 tail_out_10: 151 PEXTRB $4, X0, 4(DI) 152 tail_out_8: 153 MOVL X0, (DI) 154 JMP ret 155 tail_out_6: 156 PEXTRB $2, X0, 2(DI) 157 tail_out_4: 158 PEXTRB $1, X0, 1(DI) 159 tail_out_2: 160 PEXTRB $0, X0, (DI) 161 ret: 162 MOVB $1, ok+32(FP) 163 RET 164 invalid: 165 BSFW AX, AX 166 SUBQ R15, SI 167 ADDQ SI, AX 168 MOVQ AX, n+24(FP) 169 MOVB $0, ok+32(FP) 170 RET 171 172 TEXT ·decodeSSE(SB),NOSPLIT,$0 173 MOVQ dst+0(FP), DI 174 MOVQ src+8(FP), SI 175 MOVQ len+16(FP), BX 176 MOVQ SI, R15 177 MOVOU decodeValid<>(SB), X14 178 MOVOU decodeValid<>+0x20(SB), X15 179 MOVW $65535, DX 180 CMPQ BX, $16 181 JB tail 182 bigloop: 183 MOVOU (SI), X0 184 MOVOU X0, X1 185 PXOR decodeToSigned<>(SB), X1 186 POR decodeToLower<>(SB), X0 187 MOVOU X0, X2 188 PXOR decodeToSigned<>(SB), X2 189 MOVOU X14, X3 190 PCMPGTB X1, X3 191 PCMPGTB decodeValid<>+0x10(SB), X1 192 MOVOU X15, X4 193 PCMPGTB X2, X4 194 PCMPGTB decodeValid<>+0x30(SB), X2 195 PAND X4, X1 196 POR X2, X3 197 POR X1, X3 198 PMOVMSKB X3, AX 199 TESTW AX, DX 200 JNZ invalid 201 PSUBB decodeBase<>(SB), X0 202 PANDN decodeBase<>+0x10(SB), X4 203 PSUBB X4, X0 204 MOVOU X0, X3 205 PSHUFB decodeLow<>(SB), X3 206 PSHUFB decodeHigh<>(SB), X0 207 PSLLW $4, X0 208 POR X3, X0 209 MOVQ X0, (DI) 210 SUBQ $16, BX 211 JZ ret 212 ADDQ $16, SI 213 ADDQ $8, DI 214 CMPQ BX, $16 215 JAE bigloop 216 tail: 217 MOVQ $16, CX 218 SUBQ BX, CX 219 SHRW CX, DX 220 CMPQ BX, $4 221 JB tail_in_2 222 JE tail_in_4 223 CMPQ BX, $8 224 JB tail_in_6 225 JE tail_in_8 226 CMPQ BX, $12 227 JB tail_in_10 228 JE tail_in_12 229 tail_in_14: 230 PINSRW $6, 12(SI), X0 231 tail_in_12: 232 PINSRW $5, 10(SI), X0 233 tail_in_10: 234 PINSRW $4, 8(SI), X0 235 tail_in_8: 236 PINSRQ $0, (SI), X0 237 JMP tail_conv 238 tail_in_6: 239 PINSRW $2, 4(SI), X0 240 tail_in_4: 241 PINSRW $1, 2(SI), X0 242 tail_in_2: 243 PINSRW $0, (SI), X0 244 tail_conv: 245 MOVOU X0, X1 246 PXOR decodeToSigned<>(SB), X1 247 POR decodeToLower<>(SB), X0 248 MOVOU X0, X2 249 PXOR decodeToSigned<>(SB), X2 250 MOVOU X14, X3 251 PCMPGTB X1, X3 252 PCMPGTB decodeValid<>+0x10(SB), X1 253 MOVOU X15, X4 254 PCMPGTB X2, X4 255 PCMPGTB decodeValid<>+0x30(SB), X2 256 PAND X4, X1 257 POR X2, X3 258 POR X1, X3 259 PMOVMSKB X3, AX 260 TESTW AX, DX 261 JNZ invalid 262 PSUBB decodeBase<>(SB), X0 263 PANDN decodeBase<>+0x10(SB), X4 264 PSUBB X4, X0 265 MOVOU X0, X3 266 PSHUFB decodeLow<>(SB), X3 267 PSHUFB decodeHigh<>(SB), X0 268 PSLLW $4, X0 269 POR X3, X0 270 CMPQ BX, $4 271 JB tail_out_2 272 JE tail_out_4 273 CMPQ BX, $8 274 JB tail_out_6 275 JE tail_out_8 276 CMPQ BX, $12 277 JB tail_out_10 278 JE tail_out_12 279 tail_out_14: 280 PEXTRB $6, X0, 6(DI) 281 tail_out_12: 282 PEXTRB $5, X0, 5(DI) 283 tail_out_10: 284 PEXTRB $4, X0, 4(DI) 285 tail_out_8: 286 MOVL X0, (DI) 287 JMP ret 288 tail_out_6: 289 PEXTRB $2, X0, 2(DI) 290 tail_out_4: 291 PEXTRB $1, X0, 1(DI) 292 tail_out_2: 293 PEXTRB $0, X0, (DI) 294 ret: 295 MOVB $1, ok+32(FP) 296 RET 297 invalid: 298 BSFW AX, AX 299 SUBQ R15, SI 300 ADDQ SI, AX 301 MOVQ AX, n+24(FP) 302 MOVB $0, ok+32(FP) 303 RET