encode.go - gtsocial-umbx - Unnamed repository; edit this file 'description' to name the repository.

encode.go (11779B)
      1 // Copyright 2011 The Snappy-Go Authors. All rights reserved.
      2 // Copyright (c) 2019 Klaus Post. All rights reserved.
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 package s2
      7 
      8 import (
      9 	"encoding/binary"
     10 	"math"
     11 	"math/bits"
     12 )
     13 
     14 // Encode returns the encoded form of src. The returned slice may be a sub-
     15 // slice of dst if dst was large enough to hold the entire encoded block.
     16 // Otherwise, a newly allocated slice will be returned.
     17 //
     18 // The dst and src must not overlap. It is valid to pass a nil dst.
     19 //
     20 // The blocks will require the same amount of memory to decode as encoding,
     21 // and does not make for concurrent decoding.
     22 // Also note that blocks do not contain CRC information, so corruption may be undetected.
     23 //
     24 // If you need to encode larger amounts of data, consider using
     25 // the streaming interface which gives all of these features.
     26 func Encode(dst, src []byte) []byte {
     27 	if n := MaxEncodedLen(len(src)); n < 0 {
     28 		panic(ErrTooLarge)
     29 	} else if cap(dst) < n {
     30 		dst = make([]byte, n)
     31 	} else {
     32 		dst = dst[:n]
     33 	}
     34 
     35 	// The block starts with the varint-encoded length of the decompressed bytes.
     36 	d := binary.PutUvarint(dst, uint64(len(src)))
     37 
     38 	if len(src) == 0 {
     39 		return dst[:d]
     40 	}
     41 	if len(src) < minNonLiteralBlockSize {
     42 		d += emitLiteral(dst[d:], src)
     43 		return dst[:d]
     44 	}
     45 	n := encodeBlock(dst[d:], src)
     46 	if n > 0 {
     47 		d += n
     48 		return dst[:d]
     49 	}
     50 	// Not compressible
     51 	d += emitLiteral(dst[d:], src)
     52 	return dst[:d]
     53 }
     54 
     55 // EstimateBlockSize will perform a very fast compression
     56 // without outputting the result and return the compressed output size.
     57 // The function returns -1 if no improvement could be achieved.
     58 // Using actual compression will most often produce better compression than the estimate.
     59 func EstimateBlockSize(src []byte) (d int) {
     60 	if len(src) < 6 || int64(len(src)) > 0xffffffff {
     61 		return -1
     62 	}
     63 	if len(src) <= 1024 {
     64 		d = calcBlockSizeSmall(src)
     65 	} else {
     66 		d = calcBlockSize(src)
     67 	}
     68 
     69 	if d == 0 {
     70 		return -1
     71 	}
     72 	// Size of the varint encoded block size.
     73 	d += (bits.Len64(uint64(len(src))) + 7) / 7
     74 
     75 	if d >= len(src) {
     76 		return -1
     77 	}
     78 	return d
     79 }
     80 
     81 // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
     82 // slice of dst if dst was large enough to hold the entire encoded block.
     83 // Otherwise, a newly allocated slice will be returned.
     84 //
     85 // EncodeBetter compresses better than Encode but typically with a
     86 // 10-40% speed decrease on both compression and decompression.
     87 //
     88 // The dst and src must not overlap. It is valid to pass a nil dst.
     89 //
     90 // The blocks will require the same amount of memory to decode as encoding,
     91 // and does not make for concurrent decoding.
     92 // Also note that blocks do not contain CRC information, so corruption may be undetected.
     93 //
     94 // If you need to encode larger amounts of data, consider using
     95 // the streaming interface which gives all of these features.
     96 func EncodeBetter(dst, src []byte) []byte {
     97 	if n := MaxEncodedLen(len(src)); n < 0 {
     98 		panic(ErrTooLarge)
     99 	} else if len(dst) < n {
    100 		dst = make([]byte, n)
    101 	}
    102 
    103 	// The block starts with the varint-encoded length of the decompressed bytes.
    104 	d := binary.PutUvarint(dst, uint64(len(src)))
    105 
    106 	if len(src) == 0 {
    107 		return dst[:d]
    108 	}
    109 	if len(src) < minNonLiteralBlockSize {
    110 		d += emitLiteral(dst[d:], src)
    111 		return dst[:d]
    112 	}
    113 	n := encodeBlockBetter(dst[d:], src)
    114 	if n > 0 {
    115 		d += n
    116 		return dst[:d]
    117 	}
    118 	// Not compressible
    119 	d += emitLiteral(dst[d:], src)
    120 	return dst[:d]
    121 }
    122 
    123 // EncodeBest returns the encoded form of src. The returned slice may be a sub-
    124 // slice of dst if dst was large enough to hold the entire encoded block.
    125 // Otherwise, a newly allocated slice will be returned.
    126 //
    127 // EncodeBest compresses as good as reasonably possible but with a
    128 // big speed decrease.
    129 //
    130 // The dst and src must not overlap. It is valid to pass a nil dst.
    131 //
    132 // The blocks will require the same amount of memory to decode as encoding,
    133 // and does not make for concurrent decoding.
    134 // Also note that blocks do not contain CRC information, so corruption may be undetected.
    135 //
    136 // If you need to encode larger amounts of data, consider using
    137 // the streaming interface which gives all of these features.
    138 func EncodeBest(dst, src []byte) []byte {
    139 	if n := MaxEncodedLen(len(src)); n < 0 {
    140 		panic(ErrTooLarge)
    141 	} else if len(dst) < n {
    142 		dst = make([]byte, n)
    143 	}
    144 
    145 	// The block starts with the varint-encoded length of the decompressed bytes.
    146 	d := binary.PutUvarint(dst, uint64(len(src)))
    147 
    148 	if len(src) == 0 {
    149 		return dst[:d]
    150 	}
    151 	if len(src) < minNonLiteralBlockSize {
    152 		d += emitLiteral(dst[d:], src)
    153 		return dst[:d]
    154 	}
    155 	n := encodeBlockBest(dst[d:], src, nil)
    156 	if n > 0 {
    157 		d += n
    158 		return dst[:d]
    159 	}
    160 	// Not compressible
    161 	d += emitLiteral(dst[d:], src)
    162 	return dst[:d]
    163 }
    164 
    165 // EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
    166 // slice of dst if dst was large enough to hold the entire encoded block.
    167 // Otherwise, a newly allocated slice will be returned.
    168 //
    169 // The output is Snappy compatible and will likely decompress faster.
    170 //
    171 // The dst and src must not overlap. It is valid to pass a nil dst.
    172 //
    173 // The blocks will require the same amount of memory to decode as encoding,
    174 // and does not make for concurrent decoding.
    175 // Also note that blocks do not contain CRC information, so corruption may be undetected.
    176 //
    177 // If you need to encode larger amounts of data, consider using
    178 // the streaming interface which gives all of these features.
    179 func EncodeSnappy(dst, src []byte) []byte {
    180 	if n := MaxEncodedLen(len(src)); n < 0 {
    181 		panic(ErrTooLarge)
    182 	} else if cap(dst) < n {
    183 		dst = make([]byte, n)
    184 	} else {
    185 		dst = dst[:n]
    186 	}
    187 
    188 	// The block starts with the varint-encoded length of the decompressed bytes.
    189 	d := binary.PutUvarint(dst, uint64(len(src)))
    190 
    191 	if len(src) == 0 {
    192 		return dst[:d]
    193 	}
    194 	if len(src) < minNonLiteralBlockSize {
    195 		d += emitLiteral(dst[d:], src)
    196 		return dst[:d]
    197 	}
    198 
    199 	n := encodeBlockSnappy(dst[d:], src)
    200 	if n > 0 {
    201 		d += n
    202 		return dst[:d]
    203 	}
    204 	// Not compressible
    205 	d += emitLiteral(dst[d:], src)
    206 	return dst[:d]
    207 }
    208 
    209 // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
    210 // slice of dst if dst was large enough to hold the entire encoded block.
    211 // Otherwise, a newly allocated slice will be returned.
    212 //
    213 // The output is Snappy compatible and will likely decompress faster.
    214 //
    215 // The dst and src must not overlap. It is valid to pass a nil dst.
    216 //
    217 // The blocks will require the same amount of memory to decode as encoding,
    218 // and does not make for concurrent decoding.
    219 // Also note that blocks do not contain CRC information, so corruption may be undetected.
    220 //
    221 // If you need to encode larger amounts of data, consider using
    222 // the streaming interface which gives all of these features.
    223 func EncodeSnappyBetter(dst, src []byte) []byte {
    224 	if n := MaxEncodedLen(len(src)); n < 0 {
    225 		panic(ErrTooLarge)
    226 	} else if cap(dst) < n {
    227 		dst = make([]byte, n)
    228 	} else {
    229 		dst = dst[:n]
    230 	}
    231 
    232 	// The block starts with the varint-encoded length of the decompressed bytes.
    233 	d := binary.PutUvarint(dst, uint64(len(src)))
    234 
    235 	if len(src) == 0 {
    236 		return dst[:d]
    237 	}
    238 	if len(src) < minNonLiteralBlockSize {
    239 		d += emitLiteral(dst[d:], src)
    240 		return dst[:d]
    241 	}
    242 
    243 	n := encodeBlockBetterSnappy(dst[d:], src)
    244 	if n > 0 {
    245 		d += n
    246 		return dst[:d]
    247 	}
    248 	// Not compressible
    249 	d += emitLiteral(dst[d:], src)
    250 	return dst[:d]
    251 }
    252 
    253 // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
    254 // slice of dst if dst was large enough to hold the entire encoded block.
    255 // Otherwise, a newly allocated slice will be returned.
    256 //
    257 // The output is Snappy compatible and will likely decompress faster.
    258 //
    259 // The dst and src must not overlap. It is valid to pass a nil dst.
    260 //
    261 // The blocks will require the same amount of memory to decode as encoding,
    262 // and does not make for concurrent decoding.
    263 // Also note that blocks do not contain CRC information, so corruption may be undetected.
    264 //
    265 // If you need to encode larger amounts of data, consider using
    266 // the streaming interface which gives all of these features.
    267 func EncodeSnappyBest(dst, src []byte) []byte {
    268 	if n := MaxEncodedLen(len(src)); n < 0 {
    269 		panic(ErrTooLarge)
    270 	} else if cap(dst) < n {
    271 		dst = make([]byte, n)
    272 	} else {
    273 		dst = dst[:n]
    274 	}
    275 
    276 	// The block starts with the varint-encoded length of the decompressed bytes.
    277 	d := binary.PutUvarint(dst, uint64(len(src)))
    278 
    279 	if len(src) == 0 {
    280 		return dst[:d]
    281 	}
    282 	if len(src) < minNonLiteralBlockSize {
    283 		d += emitLiteral(dst[d:], src)
    284 		return dst[:d]
    285 	}
    286 
    287 	n := encodeBlockBestSnappy(dst[d:], src)
    288 	if n > 0 {
    289 		d += n
    290 		return dst[:d]
    291 	}
    292 	// Not compressible
    293 	d += emitLiteral(dst[d:], src)
    294 	return dst[:d]
    295 }
    296 
    297 // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
    298 // If the destination is nil or too small, a new will be allocated.
    299 // The blocks are not validated, so garbage in = garbage out.
    300 // dst may not overlap block data.
    301 // Any data in dst is preserved as is, so it will not be considered a block.
    302 func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
    303 	totalSize := uint64(0)
    304 	compSize := 0
    305 	for _, b := range blocks {
    306 		l, hdr, err := decodedLen(b)
    307 		if err != nil {
    308 			return nil, err
    309 		}
    310 		totalSize += uint64(l)
    311 		compSize += len(b) - hdr
    312 	}
    313 	if totalSize == 0 {
    314 		dst = append(dst, 0)
    315 		return dst, nil
    316 	}
    317 	if totalSize > math.MaxUint32 {
    318 		return nil, ErrTooLarge
    319 	}
    320 	var tmp [binary.MaxVarintLen32]byte
    321 	hdrSize := binary.PutUvarint(tmp[:], totalSize)
    322 	wantSize := hdrSize + compSize
    323 
    324 	if cap(dst)-len(dst) < wantSize {
    325 		dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
    326 	}
    327 	dst = append(dst, tmp[:hdrSize]...)
    328 	for _, b := range blocks {
    329 		_, hdr, err := decodedLen(b)
    330 		if err != nil {
    331 			return nil, err
    332 		}
    333 		dst = append(dst, b[hdr:]...)
    334 	}
    335 	return dst, nil
    336 }
    337 
    338 // inputMargin is the minimum number of extra input bytes to keep, inside
    339 // encodeBlock's inner loop. On some architectures, this margin lets us
    340 // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
    341 // literals can be implemented as a single load to and store from a 16-byte
    342 // register. That literal's actual length can be as short as 1 byte, so this
    343 // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
    344 // the encoding loop will fix up the copy overrun, and this inputMargin ensures
    345 // that we don't overrun the dst and src buffers.
    346 const inputMargin = 8
    347 
    348 // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
    349 // will be accepted by the encoder.
    350 const minNonLiteralBlockSize = 32
    351 
    352 const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
    353 
    354 // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
    355 // Blocks this big are highly discouraged, though.
    356 // Half the size on 32 bit systems.
    357 const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
    358 
    359 // MaxEncodedLen returns the maximum length of a snappy block, given its
    360 // uncompressed length.
    361 //
    362 // It will return a negative value if srcLen is too large to encode.
    363 // 32 bit platforms will have lower thresholds for rejecting big content.
    364 func MaxEncodedLen(srcLen int) int {
    365 	n := uint64(srcLen)
    366 	if intReduction == 1 {
    367 		// 32 bits
    368 		if n > math.MaxInt32 {
    369 			// Also includes negative.
    370 			return -1
    371 		}
    372 	} else if n > 0xffffffff {
    373 		// 64 bits
    374 		// Also includes negative.
    375 		return -1
    376 	}
    377 	// Size of the varint encoded block size.
    378 	n = n + uint64((bits.Len64(n)+7)/7)
    379 
    380 	// Add maximum size of encoding block as literals.
    381 	n += uint64(literalExtraSize(int64(srcLen)))
    382 	if intReduction == 1 {
    383 		// 32 bits
    384 		if n > math.MaxInt32 {
    385 			return -1
    386 		}
    387 	} else if n > 0xffffffff {
    388 		// 64 bits
    389 		// Also includes negative.
    390 		return -1
    391 	}
    392 	return int(n)
    393 }
	gtsocial-umbx Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE