sha256block_arm64.s (7528B)
1 //+build !noasm,!appengine,gc 2 3 // ARM64 version of SHA256 4 5 // 6 // Minio Cloud Storage, (C) 2016 Minio, Inc. 7 // 8 // Licensed under the Apache License, Version 2.0 (the "License"); 9 // you may not use this file except in compliance with the License. 10 // You may obtain a copy of the License at 11 // 12 // http://www.apache.org/licenses/LICENSE-2.0 13 // 14 // Unless required by applicable law or agreed to in writing, software 15 // distributed under the License is distributed on an "AS IS" BASIS, 16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 // See the License for the specific language governing permissions and 18 // limitations under the License. 19 // 20 21 // 22 // Based on implementation as found in https://github.com/jocover/sha256-armv8 23 // 24 // Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to 25 // their Plan9 equivalents 26 // 27 28 TEXT ·blockArmSha2(SB), 7, $0 29 MOVD h+0(FP), R0 30 MOVD message+24(FP), R1 31 MOVD message_len+32(FP), R2 // length of message 32 SUBS $64, R2 33 BMI complete 34 35 // Load constants table pointer 36 MOVD $·constants(SB), R3 37 38 // Cache constants table in registers v16 - v31 39 WORD $0x4cdf2870 // ld1 {v16.4s-v19.4s}, [x3], #64 40 WORD $0x4cdf7800 // ld1 {v0.4s}, [x0], #16 41 WORD $0x4cdf2874 // ld1 {v20.4s-v23.4s}, [x3], #64 42 43 WORD $0x4c407801 // ld1 {v1.4s}, [x0] 44 WORD $0x4cdf2878 // ld1 {v24.4s-v27.4s}, [x3], #64 45 WORD $0xd1004000 // sub x0, x0, #0x10 46 WORD $0x4cdf287c // ld1 {v28.4s-v31.4s}, [x3], #64 47 48 loop: 49 // Main loop 50 WORD $0x4cdf2025 // ld1 {v5.16b-v8.16b}, [x1], #64 51 WORD $0x4ea01c02 // mov v2.16b, v0.16b 52 WORD $0x4ea11c23 // mov v3.16b, v1.16b 53 WORD $0x6e2008a5 // rev32 v5.16b, v5.16b 54 WORD $0x6e2008c6 // rev32 v6.16b, v6.16b 55 WORD $0x4eb084a9 // add v9.4s, v5.4s, v16.4s 56 WORD $0x6e2008e7 // rev32 v7.16b, v7.16b 57 WORD $0x4eb184ca // add v10.4s, v6.4s, v17.4s 58 WORD $0x4ea21c44 // mov v4.16b, v2.16b 59 WORD $0x5e094062 // sha256h q2, q3, v9.4s 60 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 61 WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s 62 WORD $0x6e200908 // rev32 v8.16b, v8.16b 63 WORD $0x4eb284e9 // add v9.4s, v7.4s, v18.4s 64 WORD $0x4ea21c44 // mov v4.16b, v2.16b 65 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 66 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 67 WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s 68 WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s 69 WORD $0x4eb3850a // add v10.4s, v8.4s, v19.4s 70 WORD $0x4ea21c44 // mov v4.16b, v2.16b 71 WORD $0x5e094062 // sha256h q2, q3, v9.4s 72 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 73 WORD $0x5e282907 // sha256su0 v7.4s, v8.4s 74 WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s 75 WORD $0x4eb484a9 // add v9.4s, v5.4s, v20.4s 76 WORD $0x4ea21c44 // mov v4.16b, v2.16b 77 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 78 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 79 WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s 80 WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s 81 WORD $0x4eb584ca // add v10.4s, v6.4s, v21.4s 82 WORD $0x4ea21c44 // mov v4.16b, v2.16b 83 WORD $0x5e094062 // sha256h q2, q3, v9.4s 84 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 85 WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s 86 WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s 87 WORD $0x4eb684e9 // add v9.4s, v7.4s, v22.4s 88 WORD $0x4ea21c44 // mov v4.16b, v2.16b 89 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 90 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 91 WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s 92 WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s 93 WORD $0x4eb7850a // add v10.4s, v8.4s, v23.4s 94 WORD $0x4ea21c44 // mov v4.16b, v2.16b 95 WORD $0x5e094062 // sha256h q2, q3, v9.4s 96 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 97 WORD $0x5e282907 // sha256su0 v7.4s, v8.4s 98 WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s 99 WORD $0x4eb884a9 // add v9.4s, v5.4s, v24.4s 100 WORD $0x4ea21c44 // mov v4.16b, v2.16b 101 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 102 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 103 WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s 104 WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s 105 WORD $0x4eb984ca // add v10.4s, v6.4s, v25.4s 106 WORD $0x4ea21c44 // mov v4.16b, v2.16b 107 WORD $0x5e094062 // sha256h q2, q3, v9.4s 108 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 109 WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s 110 WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s 111 WORD $0x4eba84e9 // add v9.4s, v7.4s, v26.4s 112 WORD $0x4ea21c44 // mov v4.16b, v2.16b 113 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 114 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 115 WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s 116 WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s 117 WORD $0x4ebb850a // add v10.4s, v8.4s, v27.4s 118 WORD $0x4ea21c44 // mov v4.16b, v2.16b 119 WORD $0x5e094062 // sha256h q2, q3, v9.4s 120 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 121 WORD $0x5e282907 // sha256su0 v7.4s, v8.4s 122 WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s 123 WORD $0x4ebc84a9 // add v9.4s, v5.4s, v28.4s 124 WORD $0x4ea21c44 // mov v4.16b, v2.16b 125 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 126 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 127 WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s 128 WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s 129 WORD $0x4ebd84ca // add v10.4s, v6.4s, v29.4s 130 WORD $0x4ea21c44 // mov v4.16b, v2.16b 131 WORD $0x5e094062 // sha256h q2, q3, v9.4s 132 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 133 WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s 134 WORD $0x4ebe84e9 // add v9.4s, v7.4s, v30.4s 135 WORD $0x4ea21c44 // mov v4.16b, v2.16b 136 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 137 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 138 WORD $0x4ebf850a // add v10.4s, v8.4s, v31.4s 139 WORD $0x4ea21c44 // mov v4.16b, v2.16b 140 WORD $0x5e094062 // sha256h q2, q3, v9.4s 141 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s 142 WORD $0x4ea21c44 // mov v4.16b, v2.16b 143 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s 144 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s 145 WORD $0x4ea38421 // add v1.4s, v1.4s, v3.4s 146 WORD $0x4ea28400 // add v0.4s, v0.4s, v2.4s 147 148 SUBS $64, R2 149 BPL loop 150 151 // Store result 152 WORD $0x4c00a800 // st1 {v0.4s, v1.4s}, [x0] 153 154 complete: 155 RET 156 157 // Constants table 158 DATA ·constants+0x0(SB)/8, $0x71374491428a2f98 159 DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf 160 DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b 161 DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4 162 DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98 163 DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be 164 DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74 165 DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7 166 DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1 167 DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6 168 DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f 169 DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc 170 DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152 171 DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8 172 DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3 173 DATA ·constants+0x78(SB)/8, $0x1429296706ca6351 174 DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85 175 DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc 176 DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354 177 DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e 178 DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1 179 DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70 180 DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819 181 DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585 182 DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116 183 DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c 184 DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3 185 DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f 186 DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee 187 DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814 188 DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa 189 DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7 190 191 GLOBL ·constants(SB), 8, $256 192