gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

sha256block_arm64.s (7528B)


      1 //+build !noasm,!appengine,gc
      2 
      3 // ARM64 version of SHA256
      4 
      5 //
      6 // Minio Cloud Storage, (C) 2016 Minio, Inc.
      7 //
      8 // Licensed under the Apache License, Version 2.0 (the "License");
      9 // you may not use this file except in compliance with the License.
     10 // You may obtain a copy of the License at
     11 //
     12 //     http://www.apache.org/licenses/LICENSE-2.0
     13 //
     14 // Unless required by applicable law or agreed to in writing, software
     15 // distributed under the License is distributed on an "AS IS" BASIS,
     16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     17 // See the License for the specific language governing permissions and
     18 // limitations under the License.
     19 //
     20 
     21 //
     22 // Based on implementation as found in https://github.com/jocover/sha256-armv8
     23 //
     24 // Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
     25 // their Plan9 equivalents
     26 //
     27 
     28 TEXT ·blockArmSha2(SB), 7, $0
     29 	MOVD h+0(FP), R0
     30 	MOVD message+24(FP), R1
     31 	MOVD message_len+32(FP), R2 // length of message
     32 	SUBS $64, R2
     33 	BMI  complete
     34 
     35 	// Load constants table pointer
     36 	MOVD $·constants(SB), R3
     37 
     38 	// Cache constants table in registers v16 - v31
     39 	WORD $0x4cdf2870 // ld1	{v16.4s-v19.4s}, [x3], #64
     40 	WORD $0x4cdf7800 // ld1	{v0.4s}, [x0], #16
     41 	WORD $0x4cdf2874 // ld1	{v20.4s-v23.4s}, [x3], #64
     42 
     43 	WORD $0x4c407801 // ld1	{v1.4s}, [x0]
     44 	WORD $0x4cdf2878 // ld1	{v24.4s-v27.4s}, [x3], #64
     45 	WORD $0xd1004000 // sub	x0, x0, #0x10
     46 	WORD $0x4cdf287c // ld1	{v28.4s-v31.4s}, [x3], #64
     47 
     48 loop:
     49 	// Main loop
     50 	WORD $0x4cdf2025 // ld1	{v5.16b-v8.16b}, [x1], #64
     51 	WORD $0x4ea01c02 // mov	v2.16b, v0.16b
     52 	WORD $0x4ea11c23 // mov	v3.16b, v1.16b
     53 	WORD $0x6e2008a5 // rev32	v5.16b, v5.16b
     54 	WORD $0x6e2008c6 // rev32	v6.16b, v6.16b
     55 	WORD $0x4eb084a9 // add	v9.4s, v5.4s, v16.4s
     56 	WORD $0x6e2008e7 // rev32	v7.16b, v7.16b
     57 	WORD $0x4eb184ca // add	v10.4s, v6.4s, v17.4s
     58 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     59 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
     60 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
     61 	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
     62 	WORD $0x6e200908 // rev32	v8.16b, v8.16b
     63 	WORD $0x4eb284e9 // add	v9.4s, v7.4s, v18.4s
     64 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     65 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
     66 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
     67 	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
     68 	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
     69 	WORD $0x4eb3850a // add	v10.4s, v8.4s, v19.4s
     70 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     71 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
     72 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
     73 	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
     74 	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
     75 	WORD $0x4eb484a9 // add	v9.4s, v5.4s, v20.4s
     76 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     77 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
     78 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
     79 	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
     80 	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
     81 	WORD $0x4eb584ca // add	v10.4s, v6.4s, v21.4s
     82 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     83 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
     84 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
     85 	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
     86 	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
     87 	WORD $0x4eb684e9 // add	v9.4s, v7.4s, v22.4s
     88 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     89 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
     90 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
     91 	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
     92 	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
     93 	WORD $0x4eb7850a // add	v10.4s, v8.4s, v23.4s
     94 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
     95 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
     96 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
     97 	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
     98 	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
     99 	WORD $0x4eb884a9 // add	v9.4s, v5.4s, v24.4s
    100 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    101 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
    102 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
    103 	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
    104 	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
    105 	WORD $0x4eb984ca // add	v10.4s, v6.4s, v25.4s
    106 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    107 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
    108 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
    109 	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
    110 	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
    111 	WORD $0x4eba84e9 // add	v9.4s, v7.4s, v26.4s
    112 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    113 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
    114 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
    115 	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
    116 	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
    117 	WORD $0x4ebb850a // add	v10.4s, v8.4s, v27.4s
    118 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    119 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
    120 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
    121 	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
    122 	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
    123 	WORD $0x4ebc84a9 // add	v9.4s, v5.4s, v28.4s
    124 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    125 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
    126 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
    127 	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
    128 	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
    129 	WORD $0x4ebd84ca // add	v10.4s, v6.4s, v29.4s
    130 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    131 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
    132 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
    133 	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
    134 	WORD $0x4ebe84e9 // add	v9.4s, v7.4s, v30.4s
    135 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    136 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
    137 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
    138 	WORD $0x4ebf850a // add	v10.4s, v8.4s, v31.4s
    139 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    140 	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
    141 	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
    142 	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
    143 	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
    144 	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
    145 	WORD $0x4ea38421 // add	v1.4s, v1.4s, v3.4s
    146 	WORD $0x4ea28400 // add	v0.4s, v0.4s, v2.4s
    147 
    148 	SUBS $64, R2
    149 	BPL  loop
    150 
    151 	// Store result
    152 	WORD $0x4c00a800 // st1	{v0.4s, v1.4s}, [x0]
    153 
    154 complete:
    155 	RET
    156 
    157 // Constants table
    158 DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
    159 DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
    160 DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b
    161 DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4
    162 DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98
    163 DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be
    164 DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74
    165 DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7
    166 DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1
    167 DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6
    168 DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f
    169 DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc
    170 DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152
    171 DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8
    172 DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3
    173 DATA ·constants+0x78(SB)/8, $0x1429296706ca6351
    174 DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85
    175 DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc
    176 DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354
    177 DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e
    178 DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1
    179 DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70
    180 DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819
    181 DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585
    182 DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116
    183 DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c
    184 DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3
    185 DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f
    186 DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee
    187 DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814
    188 DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa
    189 DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7
    190 
    191 GLOBL ·constants(SB), 8, $256
    192