encode.go (11779B)
1 // Copyright 2011 The Snappy-Go Authors. All rights reserved. 2 // Copyright (c) 2019 Klaus Post. All rights reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 package s2 7 8 import ( 9 "encoding/binary" 10 "math" 11 "math/bits" 12 ) 13 14 // Encode returns the encoded form of src. The returned slice may be a sub- 15 // slice of dst if dst was large enough to hold the entire encoded block. 16 // Otherwise, a newly allocated slice will be returned. 17 // 18 // The dst and src must not overlap. It is valid to pass a nil dst. 19 // 20 // The blocks will require the same amount of memory to decode as encoding, 21 // and does not make for concurrent decoding. 22 // Also note that blocks do not contain CRC information, so corruption may be undetected. 23 // 24 // If you need to encode larger amounts of data, consider using 25 // the streaming interface which gives all of these features. 26 func Encode(dst, src []byte) []byte { 27 if n := MaxEncodedLen(len(src)); n < 0 { 28 panic(ErrTooLarge) 29 } else if cap(dst) < n { 30 dst = make([]byte, n) 31 } else { 32 dst = dst[:n] 33 } 34 35 // The block starts with the varint-encoded length of the decompressed bytes. 36 d := binary.PutUvarint(dst, uint64(len(src))) 37 38 if len(src) == 0 { 39 return dst[:d] 40 } 41 if len(src) < minNonLiteralBlockSize { 42 d += emitLiteral(dst[d:], src) 43 return dst[:d] 44 } 45 n := encodeBlock(dst[d:], src) 46 if n > 0 { 47 d += n 48 return dst[:d] 49 } 50 // Not compressible 51 d += emitLiteral(dst[d:], src) 52 return dst[:d] 53 } 54 55 // EstimateBlockSize will perform a very fast compression 56 // without outputting the result and return the compressed output size. 57 // The function returns -1 if no improvement could be achieved. 58 // Using actual compression will most often produce better compression than the estimate. 59 func EstimateBlockSize(src []byte) (d int) { 60 if len(src) < 6 || int64(len(src)) > 0xffffffff { 61 return -1 62 } 63 if len(src) <= 1024 { 64 d = calcBlockSizeSmall(src) 65 } else { 66 d = calcBlockSize(src) 67 } 68 69 if d == 0 { 70 return -1 71 } 72 // Size of the varint encoded block size. 73 d += (bits.Len64(uint64(len(src))) + 7) / 7 74 75 if d >= len(src) { 76 return -1 77 } 78 return d 79 } 80 81 // EncodeBetter returns the encoded form of src. The returned slice may be a sub- 82 // slice of dst if dst was large enough to hold the entire encoded block. 83 // Otherwise, a newly allocated slice will be returned. 84 // 85 // EncodeBetter compresses better than Encode but typically with a 86 // 10-40% speed decrease on both compression and decompression. 87 // 88 // The dst and src must not overlap. It is valid to pass a nil dst. 89 // 90 // The blocks will require the same amount of memory to decode as encoding, 91 // and does not make for concurrent decoding. 92 // Also note that blocks do not contain CRC information, so corruption may be undetected. 93 // 94 // If you need to encode larger amounts of data, consider using 95 // the streaming interface which gives all of these features. 96 func EncodeBetter(dst, src []byte) []byte { 97 if n := MaxEncodedLen(len(src)); n < 0 { 98 panic(ErrTooLarge) 99 } else if len(dst) < n { 100 dst = make([]byte, n) 101 } 102 103 // The block starts with the varint-encoded length of the decompressed bytes. 104 d := binary.PutUvarint(dst, uint64(len(src))) 105 106 if len(src) == 0 { 107 return dst[:d] 108 } 109 if len(src) < minNonLiteralBlockSize { 110 d += emitLiteral(dst[d:], src) 111 return dst[:d] 112 } 113 n := encodeBlockBetter(dst[d:], src) 114 if n > 0 { 115 d += n 116 return dst[:d] 117 } 118 // Not compressible 119 d += emitLiteral(dst[d:], src) 120 return dst[:d] 121 } 122 123 // EncodeBest returns the encoded form of src. The returned slice may be a sub- 124 // slice of dst if dst was large enough to hold the entire encoded block. 125 // Otherwise, a newly allocated slice will be returned. 126 // 127 // EncodeBest compresses as good as reasonably possible but with a 128 // big speed decrease. 129 // 130 // The dst and src must not overlap. It is valid to pass a nil dst. 131 // 132 // The blocks will require the same amount of memory to decode as encoding, 133 // and does not make for concurrent decoding. 134 // Also note that blocks do not contain CRC information, so corruption may be undetected. 135 // 136 // If you need to encode larger amounts of data, consider using 137 // the streaming interface which gives all of these features. 138 func EncodeBest(dst, src []byte) []byte { 139 if n := MaxEncodedLen(len(src)); n < 0 { 140 panic(ErrTooLarge) 141 } else if len(dst) < n { 142 dst = make([]byte, n) 143 } 144 145 // The block starts with the varint-encoded length of the decompressed bytes. 146 d := binary.PutUvarint(dst, uint64(len(src))) 147 148 if len(src) == 0 { 149 return dst[:d] 150 } 151 if len(src) < minNonLiteralBlockSize { 152 d += emitLiteral(dst[d:], src) 153 return dst[:d] 154 } 155 n := encodeBlockBest(dst[d:], src, nil) 156 if n > 0 { 157 d += n 158 return dst[:d] 159 } 160 // Not compressible 161 d += emitLiteral(dst[d:], src) 162 return dst[:d] 163 } 164 165 // EncodeSnappy returns the encoded form of src. The returned slice may be a sub- 166 // slice of dst if dst was large enough to hold the entire encoded block. 167 // Otherwise, a newly allocated slice will be returned. 168 // 169 // The output is Snappy compatible and will likely decompress faster. 170 // 171 // The dst and src must not overlap. It is valid to pass a nil dst. 172 // 173 // The blocks will require the same amount of memory to decode as encoding, 174 // and does not make for concurrent decoding. 175 // Also note that blocks do not contain CRC information, so corruption may be undetected. 176 // 177 // If you need to encode larger amounts of data, consider using 178 // the streaming interface which gives all of these features. 179 func EncodeSnappy(dst, src []byte) []byte { 180 if n := MaxEncodedLen(len(src)); n < 0 { 181 panic(ErrTooLarge) 182 } else if cap(dst) < n { 183 dst = make([]byte, n) 184 } else { 185 dst = dst[:n] 186 } 187 188 // The block starts with the varint-encoded length of the decompressed bytes. 189 d := binary.PutUvarint(dst, uint64(len(src))) 190 191 if len(src) == 0 { 192 return dst[:d] 193 } 194 if len(src) < minNonLiteralBlockSize { 195 d += emitLiteral(dst[d:], src) 196 return dst[:d] 197 } 198 199 n := encodeBlockSnappy(dst[d:], src) 200 if n > 0 { 201 d += n 202 return dst[:d] 203 } 204 // Not compressible 205 d += emitLiteral(dst[d:], src) 206 return dst[:d] 207 } 208 209 // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub- 210 // slice of dst if dst was large enough to hold the entire encoded block. 211 // Otherwise, a newly allocated slice will be returned. 212 // 213 // The output is Snappy compatible and will likely decompress faster. 214 // 215 // The dst and src must not overlap. It is valid to pass a nil dst. 216 // 217 // The blocks will require the same amount of memory to decode as encoding, 218 // and does not make for concurrent decoding. 219 // Also note that blocks do not contain CRC information, so corruption may be undetected. 220 // 221 // If you need to encode larger amounts of data, consider using 222 // the streaming interface which gives all of these features. 223 func EncodeSnappyBetter(dst, src []byte) []byte { 224 if n := MaxEncodedLen(len(src)); n < 0 { 225 panic(ErrTooLarge) 226 } else if cap(dst) < n { 227 dst = make([]byte, n) 228 } else { 229 dst = dst[:n] 230 } 231 232 // The block starts with the varint-encoded length of the decompressed bytes. 233 d := binary.PutUvarint(dst, uint64(len(src))) 234 235 if len(src) == 0 { 236 return dst[:d] 237 } 238 if len(src) < minNonLiteralBlockSize { 239 d += emitLiteral(dst[d:], src) 240 return dst[:d] 241 } 242 243 n := encodeBlockBetterSnappy(dst[d:], src) 244 if n > 0 { 245 d += n 246 return dst[:d] 247 } 248 // Not compressible 249 d += emitLiteral(dst[d:], src) 250 return dst[:d] 251 } 252 253 // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub- 254 // slice of dst if dst was large enough to hold the entire encoded block. 255 // Otherwise, a newly allocated slice will be returned. 256 // 257 // The output is Snappy compatible and will likely decompress faster. 258 // 259 // The dst and src must not overlap. It is valid to pass a nil dst. 260 // 261 // The blocks will require the same amount of memory to decode as encoding, 262 // and does not make for concurrent decoding. 263 // Also note that blocks do not contain CRC information, so corruption may be undetected. 264 // 265 // If you need to encode larger amounts of data, consider using 266 // the streaming interface which gives all of these features. 267 func EncodeSnappyBest(dst, src []byte) []byte { 268 if n := MaxEncodedLen(len(src)); n < 0 { 269 panic(ErrTooLarge) 270 } else if cap(dst) < n { 271 dst = make([]byte, n) 272 } else { 273 dst = dst[:n] 274 } 275 276 // The block starts with the varint-encoded length of the decompressed bytes. 277 d := binary.PutUvarint(dst, uint64(len(src))) 278 279 if len(src) == 0 { 280 return dst[:d] 281 } 282 if len(src) < minNonLiteralBlockSize { 283 d += emitLiteral(dst[d:], src) 284 return dst[:d] 285 } 286 287 n := encodeBlockBestSnappy(dst[d:], src) 288 if n > 0 { 289 d += n 290 return dst[:d] 291 } 292 // Not compressible 293 d += emitLiteral(dst[d:], src) 294 return dst[:d] 295 } 296 297 // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination. 298 // If the destination is nil or too small, a new will be allocated. 299 // The blocks are not validated, so garbage in = garbage out. 300 // dst may not overlap block data. 301 // Any data in dst is preserved as is, so it will not be considered a block. 302 func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) { 303 totalSize := uint64(0) 304 compSize := 0 305 for _, b := range blocks { 306 l, hdr, err := decodedLen(b) 307 if err != nil { 308 return nil, err 309 } 310 totalSize += uint64(l) 311 compSize += len(b) - hdr 312 } 313 if totalSize == 0 { 314 dst = append(dst, 0) 315 return dst, nil 316 } 317 if totalSize > math.MaxUint32 { 318 return nil, ErrTooLarge 319 } 320 var tmp [binary.MaxVarintLen32]byte 321 hdrSize := binary.PutUvarint(tmp[:], totalSize) 322 wantSize := hdrSize + compSize 323 324 if cap(dst)-len(dst) < wantSize { 325 dst = append(make([]byte, 0, wantSize+len(dst)), dst...) 326 } 327 dst = append(dst, tmp[:hdrSize]...) 328 for _, b := range blocks { 329 _, hdr, err := decodedLen(b) 330 if err != nil { 331 return nil, err 332 } 333 dst = append(dst, b[hdr:]...) 334 } 335 return dst, nil 336 } 337 338 // inputMargin is the minimum number of extra input bytes to keep, inside 339 // encodeBlock's inner loop. On some architectures, this margin lets us 340 // implement a fast path for emitLiteral, where the copy of short (<= 16 byte) 341 // literals can be implemented as a single load to and store from a 16-byte 342 // register. That literal's actual length can be as short as 1 byte, so this 343 // can copy up to 15 bytes too much, but that's OK as subsequent iterations of 344 // the encoding loop will fix up the copy overrun, and this inputMargin ensures 345 // that we don't overrun the dst and src buffers. 346 const inputMargin = 8 347 348 // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that 349 // will be accepted by the encoder. 350 const minNonLiteralBlockSize = 32 351 352 const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits) 353 354 // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size. 355 // Blocks this big are highly discouraged, though. 356 // Half the size on 32 bit systems. 357 const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5 358 359 // MaxEncodedLen returns the maximum length of a snappy block, given its 360 // uncompressed length. 361 // 362 // It will return a negative value if srcLen is too large to encode. 363 // 32 bit platforms will have lower thresholds for rejecting big content. 364 func MaxEncodedLen(srcLen int) int { 365 n := uint64(srcLen) 366 if intReduction == 1 { 367 // 32 bits 368 if n > math.MaxInt32 { 369 // Also includes negative. 370 return -1 371 } 372 } else if n > 0xffffffff { 373 // 64 bits 374 // Also includes negative. 375 return -1 376 } 377 // Size of the varint encoded block size. 378 n = n + uint64((bits.Len64(n)+7)/7) 379 380 // Add maximum size of encoding block as literals. 381 n += uint64(literalExtraSize(int64(srcLen))) 382 if intReduction == 1 { 383 // 32 bits 384 if n > math.MaxInt32 { 385 return -1 386 } 387 } else if n > 0xffffffff { 388 // 64 bits 389 // Also includes negative. 390 return -1 391 } 392 return int(n) 393 }