gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

sha256block_amd64.s (8178B)


      1 //+build !noasm,!appengine,gc
      2 
      3 // SHA intrinsic version of SHA256
      4 
      5 // Kristofer Peterson, (C) 2018.
      6 //
      7 // Licensed under the Apache License, Version 2.0 (the "License");
      8 // you may not use this file except in compliance with the License.
      9 // You may obtain a copy of the License at
     10 //
     11 //     http://www.apache.org/licenses/LICENSE-2.0
     12 //
     13 // Unless required by applicable law or agreed to in writing, software
     14 // distributed under the License is distributed on an "AS IS" BASIS,
     15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16 // See the License for the specific language governing permissions and
     17 // limitations under the License.
     18 //
     19 
     20 #include "textflag.h"
     21 
     22 DATA K<>+0x00(SB)/4, $0x428a2f98
     23 DATA K<>+0x04(SB)/4, $0x71374491
     24 DATA K<>+0x08(SB)/4, $0xb5c0fbcf
     25 DATA K<>+0x0c(SB)/4, $0xe9b5dba5
     26 DATA K<>+0x10(SB)/4, $0x3956c25b
     27 DATA K<>+0x14(SB)/4, $0x59f111f1
     28 DATA K<>+0x18(SB)/4, $0x923f82a4
     29 DATA K<>+0x1c(SB)/4, $0xab1c5ed5
     30 DATA K<>+0x20(SB)/4, $0xd807aa98
     31 DATA K<>+0x24(SB)/4, $0x12835b01
     32 DATA K<>+0x28(SB)/4, $0x243185be
     33 DATA K<>+0x2c(SB)/4, $0x550c7dc3
     34 DATA K<>+0x30(SB)/4, $0x72be5d74
     35 DATA K<>+0x34(SB)/4, $0x80deb1fe
     36 DATA K<>+0x38(SB)/4, $0x9bdc06a7
     37 DATA K<>+0x3c(SB)/4, $0xc19bf174
     38 DATA K<>+0x40(SB)/4, $0xe49b69c1
     39 DATA K<>+0x44(SB)/4, $0xefbe4786
     40 DATA K<>+0x48(SB)/4, $0x0fc19dc6
     41 DATA K<>+0x4c(SB)/4, $0x240ca1cc
     42 DATA K<>+0x50(SB)/4, $0x2de92c6f
     43 DATA K<>+0x54(SB)/4, $0x4a7484aa
     44 DATA K<>+0x58(SB)/4, $0x5cb0a9dc
     45 DATA K<>+0x5c(SB)/4, $0x76f988da
     46 DATA K<>+0x60(SB)/4, $0x983e5152
     47 DATA K<>+0x64(SB)/4, $0xa831c66d
     48 DATA K<>+0x68(SB)/4, $0xb00327c8
     49 DATA K<>+0x6c(SB)/4, $0xbf597fc7
     50 DATA K<>+0x70(SB)/4, $0xc6e00bf3
     51 DATA K<>+0x74(SB)/4, $0xd5a79147
     52 DATA K<>+0x78(SB)/4, $0x06ca6351
     53 DATA K<>+0x7c(SB)/4, $0x14292967
     54 DATA K<>+0x80(SB)/4, $0x27b70a85
     55 DATA K<>+0x84(SB)/4, $0x2e1b2138
     56 DATA K<>+0x88(SB)/4, $0x4d2c6dfc
     57 DATA K<>+0x8c(SB)/4, $0x53380d13
     58 DATA K<>+0x90(SB)/4, $0x650a7354
     59 DATA K<>+0x94(SB)/4, $0x766a0abb
     60 DATA K<>+0x98(SB)/4, $0x81c2c92e
     61 DATA K<>+0x9c(SB)/4, $0x92722c85
     62 DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
     63 DATA K<>+0xa4(SB)/4, $0xa81a664b
     64 DATA K<>+0xa8(SB)/4, $0xc24b8b70
     65 DATA K<>+0xac(SB)/4, $0xc76c51a3
     66 DATA K<>+0xb0(SB)/4, $0xd192e819
     67 DATA K<>+0xb4(SB)/4, $0xd6990624
     68 DATA K<>+0xb8(SB)/4, $0xf40e3585
     69 DATA K<>+0xbc(SB)/4, $0x106aa070
     70 DATA K<>+0xc0(SB)/4, $0x19a4c116
     71 DATA K<>+0xc4(SB)/4, $0x1e376c08
     72 DATA K<>+0xc8(SB)/4, $0x2748774c
     73 DATA K<>+0xcc(SB)/4, $0x34b0bcb5
     74 DATA K<>+0xd0(SB)/4, $0x391c0cb3
     75 DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
     76 DATA K<>+0xd8(SB)/4, $0x5b9cca4f
     77 DATA K<>+0xdc(SB)/4, $0x682e6ff3
     78 DATA K<>+0xe0(SB)/4, $0x748f82ee
     79 DATA K<>+0xe4(SB)/4, $0x78a5636f
     80 DATA K<>+0xe8(SB)/4, $0x84c87814
     81 DATA K<>+0xec(SB)/4, $0x8cc70208
     82 DATA K<>+0xf0(SB)/4, $0x90befffa
     83 DATA K<>+0xf4(SB)/4, $0xa4506ceb
     84 DATA K<>+0xf8(SB)/4, $0xbef9a3f7
     85 DATA K<>+0xfc(SB)/4, $0xc67178f2
     86 GLOBL K<>(SB), RODATA|NOPTR, $256
     87 
     88 DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
     89 DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
     90 GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
     91 
     92 // Register Usage
     93 // BX  base address of constant table (constant)
     94 // DX  hash_state (constant)
     95 // SI  hash_data.data
     96 // DI  hash_data.data + hash_data.length - 64 (constant)
     97 // X0  scratch
     98 // X1  scratch
     99 // X2  working hash state // ABEF
    100 // X3  working hash state // CDGH
    101 // X4  first 16 bytes of block
    102 // X5  second 16 bytes of block
    103 // X6  third 16 bytes of block
    104 // X7  fourth 16 bytes of block
    105 // X12 saved hash state // ABEF
    106 // X13 saved hash state // CDGH
    107 // X15 data shuffle mask (constant)
    108 
    109 TEXT ·blockIntelSha(SB), NOSPLIT, $0-32
    110 	MOVQ      h+0(FP), DX
    111 	MOVQ      message_base+8(FP), SI
    112 	MOVQ      message_len+16(FP), DI
    113 	LEAQ      -64(SI)(DI*1), DI
    114 	MOVOU     (DX), X2
    115 	MOVOU     16(DX), X1
    116 	MOVO      X2, X3
    117 	PUNPCKLLQ X1, X2
    118 	PUNPCKHLQ X1, X3
    119 	PSHUFD    $0x27, X2, X2
    120 	PSHUFD    $0x27, X3, X3
    121 	MOVO      SHUF_MASK<>(SB), X15
    122 	LEAQ      K<>(SB), BX
    123 
    124 	JMP TEST
    125 
    126 LOOP:
    127 	MOVO X2, X12
    128 	MOVO X3, X13
    129 
    130 	// load block and shuffle
    131 	MOVOU  (SI), X4
    132 	MOVOU  16(SI), X5
    133 	MOVOU  32(SI), X6
    134 	MOVOU  48(SI), X7
    135 	PSHUFB X15, X4
    136 	PSHUFB X15, X5
    137 	PSHUFB X15, X6
    138 	PSHUFB X15, X7
    139 
    140 #define ROUND456 \
    141 	PADDL  X5, X0                    \
    142 	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
    143 	MOVO   X5, X1                    \
    144 	LONG   $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
    145 	PADDL  X1, X6                    \
    146 	LONG   $0xf5cd380f               \ // SHA256MSG2 XMM6, XMM5
    147 	PSHUFD $0x4e, X0, X0             \
    148 	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
    149 	LONG   $0xe5cc380f               // SHA256MSG1 XMM4, XMM5
    150 
    151 #define ROUND567 \
    152 	PADDL  X6, X0                    \
    153 	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
    154 	MOVO   X6, X1                    \
    155 	LONG   $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
    156 	PADDL  X1, X7                    \
    157 	LONG   $0xfecd380f               \ // SHA256MSG2 XMM7, XMM6
    158 	PSHUFD $0x4e, X0, X0             \
    159 	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
    160 	LONG   $0xeecc380f               // SHA256MSG1 XMM5, XMM6
    161 
    162 #define ROUND674 \
    163 	PADDL  X7, X0                    \
    164 	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
    165 	MOVO   X7, X1                    \
    166 	LONG   $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
    167 	PADDL  X1, X4                    \
    168 	LONG   $0xe7cd380f               \ // SHA256MSG2 XMM4, XMM7
    169 	PSHUFD $0x4e, X0, X0             \
    170 	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
    171 	LONG   $0xf7cc380f               // SHA256MSG1 XMM6, XMM7
    172 
    173 #define ROUND745 \
    174 	PADDL  X4, X0                    \
    175 	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
    176 	MOVO   X4, X1                    \
    177 	LONG   $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
    178 	PADDL  X1, X5                    \
    179 	LONG   $0xeccd380f               \ // SHA256MSG2 XMM5, XMM4
    180 	PSHUFD $0x4e, X0, X0             \
    181 	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
    182 	LONG   $0xfccc380f               // SHA256MSG1 XMM7, XMM4
    183 
    184 	// rounds 0-3
    185 	MOVO   (BX), X0
    186 	PADDL  X4, X0
    187 	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
    188 	PSHUFD $0x4e, X0, X0
    189 	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
    190 
    191 	// rounds 4-7
    192 	MOVO   1*16(BX), X0
    193 	PADDL  X5, X0
    194 	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
    195 	PSHUFD $0x4e, X0, X0
    196 	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
    197 	LONG   $0xe5cc380f   // SHA256MSG1 XMM4, XMM5
    198 
    199 	// rounds 8-11
    200 	MOVO   2*16(BX), X0
    201 	PADDL  X6, X0
    202 	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
    203 	PSHUFD $0x4e, X0, X0
    204 	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
    205 	LONG   $0xeecc380f   // SHA256MSG1 XMM5, XMM6
    206 
    207 	MOVO 3*16(BX), X0; ROUND674  // rounds 12-15
    208 	MOVO 4*16(BX), X0; ROUND745  // rounds 16-19
    209 	MOVO 5*16(BX), X0; ROUND456  // rounds 20-23
    210 	MOVO 6*16(BX), X0; ROUND567  // rounds 24-27
    211 	MOVO 7*16(BX), X0; ROUND674  // rounds 28-31
    212 	MOVO 8*16(BX), X0; ROUND745  // rounds 32-35
    213 	MOVO 9*16(BX), X0; ROUND456  // rounds 36-39
    214 	MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
    215 	MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
    216 	MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
    217 
    218 	// rounds 52-55
    219 	MOVO   13*16(BX), X0
    220 	PADDL  X5, X0
    221 	LONG   $0xdacb380f               // SHA256RNDS2 XMM3, XMM2
    222 	MOVO   X5, X1
    223 	LONG   $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
    224 	PADDL  X1, X6
    225 	LONG   $0xf5cd380f               // SHA256MSG2 XMM6, XMM5
    226 	PSHUFD $0x4e, X0, X0
    227 	LONG   $0xd3cb380f               // SHA256RNDS2 XMM2, XMM3
    228 
    229 	// rounds 56-59
    230 	MOVO   14*16(BX), X0
    231 	PADDL  X6, X0
    232 	LONG   $0xdacb380f               // SHA256RNDS2 XMM3, XMM2
    233 	MOVO   X6, X1
    234 	LONG   $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
    235 	PADDL  X1, X7
    236 	LONG   $0xfecd380f               // SHA256MSG2 XMM7, XMM6
    237 	PSHUFD $0x4e, X0, X0
    238 	LONG   $0xd3cb380f               // SHA256RNDS2 XMM2, XMM3
    239 
    240 	// rounds 60-63
    241 	MOVO   15*16(BX), X0
    242 	PADDL  X7, X0
    243 	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
    244 	PSHUFD $0x4e, X0, X0
    245 	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
    246 
    247 	PADDL X12, X2
    248 	PADDL X13, X3
    249 
    250 	ADDQ $64, SI
    251 
    252 TEST:
    253 	CMPQ SI, DI
    254 	JBE  LOOP
    255 
    256 	PSHUFD $0x4e, X3, X0
    257 	LONG   $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
    258 	PSHUFD $0x4e, X2, X1
    259 	LONG   $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
    260 	PSHUFD $0x1b, X0, X0
    261 	PSHUFD $0x1b, X1, X1
    262 
    263 	MOVOU X0, (DX)
    264 	MOVOU X1, 16(DX)
    265 
    266 	RET