gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

chacha_s390x.s (5492B)


      1 // Copyright 2018 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 //go:build gc && !purego
      6 // +build gc,!purego
      7 
      8 #include "go_asm.h"
      9 #include "textflag.h"
     10 
     11 // This is an implementation of the ChaCha20 encryption algorithm as
     12 // specified in RFC 7539. It uses vector instructions to compute
     13 // 4 keystream blocks in parallel (256 bytes) which are then XORed
     14 // with the bytes in the input slice.
     15 
     16 GLOBL ·constants<>(SB), RODATA|NOPTR, $32
     17 // BSWAP: swap bytes in each 4-byte element
     18 DATA ·constants<>+0x00(SB)/4, $0x03020100
     19 DATA ·constants<>+0x04(SB)/4, $0x07060504
     20 DATA ·constants<>+0x08(SB)/4, $0x0b0a0908
     21 DATA ·constants<>+0x0c(SB)/4, $0x0f0e0d0c
     22 // J0: [j0, j1, j2, j3]
     23 DATA ·constants<>+0x10(SB)/4, $0x61707865
     24 DATA ·constants<>+0x14(SB)/4, $0x3320646e
     25 DATA ·constants<>+0x18(SB)/4, $0x79622d32
     26 DATA ·constants<>+0x1c(SB)/4, $0x6b206574
     27 
     28 #define BSWAP V5
     29 #define J0    V6
     30 #define KEY0  V7
     31 #define KEY1  V8
     32 #define NONCE V9
     33 #define CTR   V10
     34 #define M0    V11
     35 #define M1    V12
     36 #define M2    V13
     37 #define M3    V14
     38 #define INC   V15
     39 #define X0    V16
     40 #define X1    V17
     41 #define X2    V18
     42 #define X3    V19
     43 #define X4    V20
     44 #define X5    V21
     45 #define X6    V22
     46 #define X7    V23
     47 #define X8    V24
     48 #define X9    V25
     49 #define X10   V26
     50 #define X11   V27
     51 #define X12   V28
     52 #define X13   V29
     53 #define X14   V30
     54 #define X15   V31
     55 
     56 #define NUM_ROUNDS 20
     57 
     58 #define ROUND4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
     59 	VAF    a1, a0, a0  \
     60 	VAF    b1, b0, b0  \
     61 	VAF    c1, c0, c0  \
     62 	VAF    d1, d0, d0  \
     63 	VX     a0, a2, a2  \
     64 	VX     b0, b2, b2  \
     65 	VX     c0, c2, c2  \
     66 	VX     d0, d2, d2  \
     67 	VERLLF $16, a2, a2 \
     68 	VERLLF $16, b2, b2 \
     69 	VERLLF $16, c2, c2 \
     70 	VERLLF $16, d2, d2 \
     71 	VAF    a2, a3, a3  \
     72 	VAF    b2, b3, b3  \
     73 	VAF    c2, c3, c3  \
     74 	VAF    d2, d3, d3  \
     75 	VX     a3, a1, a1  \
     76 	VX     b3, b1, b1  \
     77 	VX     c3, c1, c1  \
     78 	VX     d3, d1, d1  \
     79 	VERLLF $12, a1, a1 \
     80 	VERLLF $12, b1, b1 \
     81 	VERLLF $12, c1, c1 \
     82 	VERLLF $12, d1, d1 \
     83 	VAF    a1, a0, a0  \
     84 	VAF    b1, b0, b0  \
     85 	VAF    c1, c0, c0  \
     86 	VAF    d1, d0, d0  \
     87 	VX     a0, a2, a2  \
     88 	VX     b0, b2, b2  \
     89 	VX     c0, c2, c2  \
     90 	VX     d0, d2, d2  \
     91 	VERLLF $8, a2, a2  \
     92 	VERLLF $8, b2, b2  \
     93 	VERLLF $8, c2, c2  \
     94 	VERLLF $8, d2, d2  \
     95 	VAF    a2, a3, a3  \
     96 	VAF    b2, b3, b3  \
     97 	VAF    c2, c3, c3  \
     98 	VAF    d2, d3, d3  \
     99 	VX     a3, a1, a1  \
    100 	VX     b3, b1, b1  \
    101 	VX     c3, c1, c1  \
    102 	VX     d3, d1, d1  \
    103 	VERLLF $7, a1, a1  \
    104 	VERLLF $7, b1, b1  \
    105 	VERLLF $7, c1, c1  \
    106 	VERLLF $7, d1, d1
    107 
    108 #define PERMUTE(mask, v0, v1, v2, v3) \
    109 	VPERM v0, v0, mask, v0 \
    110 	VPERM v1, v1, mask, v1 \
    111 	VPERM v2, v2, mask, v2 \
    112 	VPERM v3, v3, mask, v3
    113 
    114 #define ADDV(x, v0, v1, v2, v3) \
    115 	VAF x, v0, v0 \
    116 	VAF x, v1, v1 \
    117 	VAF x, v2, v2 \
    118 	VAF x, v3, v3
    119 
    120 #define XORV(off, dst, src, v0, v1, v2, v3) \
    121 	VLM  off(src), M0, M3          \
    122 	PERMUTE(BSWAP, v0, v1, v2, v3) \
    123 	VX   v0, M0, M0                \
    124 	VX   v1, M1, M1                \
    125 	VX   v2, M2, M2                \
    126 	VX   v3, M3, M3                \
    127 	VSTM M0, M3, off(dst)
    128 
    129 #define SHUFFLE(a, b, c, d, t, u, v, w) \
    130 	VMRHF a, c, t \ // t = {a[0], c[0], a[1], c[1]}
    131 	VMRHF b, d, u \ // u = {b[0], d[0], b[1], d[1]}
    132 	VMRLF a, c, v \ // v = {a[2], c[2], a[3], c[3]}
    133 	VMRLF b, d, w \ // w = {b[2], d[2], b[3], d[3]}
    134 	VMRHF t, u, a \ // a = {a[0], b[0], c[0], d[0]}
    135 	VMRLF t, u, b \ // b = {a[1], b[1], c[1], d[1]}
    136 	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
    137 	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}
    138 
    139 // func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
    140 TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
    141 	MOVD $·constants<>(SB), R1
    142 	MOVD dst+0(FP), R2         // R2=&dst[0]
    143 	LMG  src+24(FP), R3, R4    // R3=&src[0] R4=len(src)
    144 	MOVD key+48(FP), R5        // R5=key
    145 	MOVD nonce+56(FP), R6      // R6=nonce
    146 	MOVD counter+64(FP), R7    // R7=counter
    147 
    148 	// load BSWAP and J0
    149 	VLM (R1), BSWAP, J0
    150 
    151 	// setup
    152 	MOVD  $95, R0
    153 	VLM   (R5), KEY0, KEY1
    154 	VLL   R0, (R6), NONCE
    155 	VZERO M0
    156 	VLEIB $7, $32, M0
    157 	VSRLB M0, NONCE, NONCE
    158 
    159 	// initialize counter values
    160 	VLREPF (R7), CTR
    161 	VZERO  INC
    162 	VLEIF  $1, $1, INC
    163 	VLEIF  $2, $2, INC
    164 	VLEIF  $3, $3, INC
    165 	VAF    INC, CTR, CTR
    166 	VREPIF $4, INC
    167 
    168 chacha:
    169 	VREPF $0, J0, X0
    170 	VREPF $1, J0, X1
    171 	VREPF $2, J0, X2
    172 	VREPF $3, J0, X3
    173 	VREPF $0, KEY0, X4
    174 	VREPF $1, KEY0, X5
    175 	VREPF $2, KEY0, X6
    176 	VREPF $3, KEY0, X7
    177 	VREPF $0, KEY1, X8
    178 	VREPF $1, KEY1, X9
    179 	VREPF $2, KEY1, X10
    180 	VREPF $3, KEY1, X11
    181 	VLR   CTR, X12
    182 	VREPF $1, NONCE, X13
    183 	VREPF $2, NONCE, X14
    184 	VREPF $3, NONCE, X15
    185 
    186 	MOVD $(NUM_ROUNDS/2), R1
    187 
    188 loop:
    189 	ROUND4(X0, X4, X12,  X8, X1, X5, X13,  X9, X2, X6, X14, X10, X3, X7, X15, X11)
    190 	ROUND4(X0, X5, X15, X10, X1, X6, X12, X11, X2, X7, X13, X8,  X3, X4, X14, X9)
    191 
    192 	ADD $-1, R1
    193 	BNE loop
    194 
    195 	// decrement length
    196 	ADD $-256, R4
    197 
    198 	// rearrange vectors
    199 	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
    200 	ADDV(J0, X0, X1, X2, X3)
    201 	SHUFFLE(X4, X5, X6, X7, M0, M1, M2, M3)
    202 	ADDV(KEY0, X4, X5, X6, X7)
    203 	SHUFFLE(X8, X9, X10, X11, M0, M1, M2, M3)
    204 	ADDV(KEY1, X8, X9, X10, X11)
    205 	VAF CTR, X12, X12
    206 	SHUFFLE(X12, X13, X14, X15, M0, M1, M2, M3)
    207 	ADDV(NONCE, X12, X13, X14, X15)
    208 
    209 	// increment counters
    210 	VAF INC, CTR, CTR
    211 
    212 	// xor keystream with plaintext
    213 	XORV(0*64, R2, R3, X0, X4,  X8, X12)
    214 	XORV(1*64, R2, R3, X1, X5,  X9, X13)
    215 	XORV(2*64, R2, R3, X2, X6, X10, X14)
    216 	XORV(3*64, R2, R3, X3, X7, X11, X15)
    217 
    218 	// increment pointers
    219 	MOVD $256(R2), R2
    220 	MOVD $256(R3), R3
    221 
    222 	CMPBNE  R4, $0, chacha
    223 
    224 	VSTEF $0, CTR, (R7)
    225 	RET