gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

s2.go (4732B)


      1 // Copyright 2011 The Snappy-Go Authors. All rights reserved.
      2 // Copyright (c) 2019 Klaus Post. All rights reserved.
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 // Package s2 implements the S2 compression format.
      7 //
      8 // S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
      9 // which is why it features concurrent compression for bigger payloads.
     10 //
     11 // Decoding is compatible with Snappy compressed content,
     12 // but content compressed with S2 cannot be decompressed by Snappy.
     13 //
     14 // For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
     15 //
     16 // There are actually two S2 formats: block and stream. They are related,
     17 // but different: trying to decompress block-compressed data as a S2 stream
     18 // will fail, and vice versa. The block format is the Decode and Encode
     19 // functions and the stream format is the Reader and Writer types.
     20 //
     21 // A "better" compression option is available. This will trade some compression
     22 // speed
     23 //
     24 // The block format, the more common case, is used when the complete size (the
     25 // number of bytes) of the original data is known upfront, at the time
     26 // compression starts. The stream format, also known as the framing format, is
     27 // for when that isn't always true.
     28 //
     29 // Blocks to not offer much data protection, so it is up to you to
     30 // add data validation of decompressed blocks.
     31 //
     32 // Streams perform CRC validation of the decompressed data.
     33 // Stream compression will also be performed on multiple CPU cores concurrently
     34 // significantly improving throughput.
     35 package s2
     36 
     37 import (
     38 	"bytes"
     39 	"hash/crc32"
     40 )
     41 
     42 /*
     43 Each encoded block begins with the varint-encoded length of the decoded data,
     44 followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
     45 first byte of each chunk is broken into its 2 least and 6 most significant bits
     46 called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
     47 Zero means a literal tag. All other values mean a copy tag.
     48 
     49 For literal tags:
     50   - If m < 60, the next 1 + m bytes are literal bytes.
     51   - Otherwise, let n be the little-endian unsigned integer denoted by the next
     52     m - 59 bytes. The next 1 + n bytes after that are literal bytes.
     53 
     54 For copy tags, length bytes are copied from offset bytes ago, in the style of
     55 Lempel-Ziv compression algorithms. In particular:
     56   - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
     57     The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
     58     of the offset. The next byte is bits 0-7 of the offset.
     59   - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
     60     The length is 1 + m. The offset is the little-endian unsigned integer
     61     denoted by the next 2 bytes.
     62   - For l == 3, the offset ranges in [0, 1<<32) and the length in
     63     [1, 65). The length is 1 + m. The offset is the little-endian unsigned
     64     integer denoted by the next 4 bytes.
     65 */
     66 const (
     67 	tagLiteral = 0x00
     68 	tagCopy1   = 0x01
     69 	tagCopy2   = 0x02
     70 	tagCopy4   = 0x03
     71 )
     72 
     73 const (
     74 	checksumSize     = 4
     75 	chunkHeaderSize  = 4
     76 	magicChunk       = "\xff\x06\x00\x00" + magicBody
     77 	magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
     78 	magicBodySnappy  = "sNaPpY"
     79 	magicBody        = "S2sTwO"
     80 
     81 	// maxBlockSize is the maximum size of the input to encodeBlock.
     82 	//
     83 	// For the framing format (Writer type instead of Encode function),
     84 	// this is the maximum uncompressed size of a block.
     85 	maxBlockSize = 4 << 20
     86 
     87 	// minBlockSize is the minimum size of block setting when creating a writer.
     88 	minBlockSize = 4 << 10
     89 
     90 	skippableFrameHeader = 4
     91 	maxChunkSize         = 1<<24 - 1 // 16777215
     92 
     93 	// Default block size
     94 	defaultBlockSize = 1 << 20
     95 
     96 	// maxSnappyBlockSize is the maximum snappy block size.
     97 	maxSnappyBlockSize = 1 << 16
     98 
     99 	obufHeaderLen = checksumSize + chunkHeaderSize
    100 )
    101 
    102 const (
    103 	chunkTypeCompressedData   = 0x00
    104 	chunkTypeUncompressedData = 0x01
    105 	ChunkTypeIndex            = 0x99
    106 	chunkTypePadding          = 0xfe
    107 	chunkTypeStreamIdentifier = 0xff
    108 )
    109 
    110 var crcTable = crc32.MakeTable(crc32.Castagnoli)
    111 
    112 // crc implements the checksum specified in section 3 of
    113 // https://github.com/google/snappy/blob/master/framing_format.txt
    114 func crc(b []byte) uint32 {
    115 	c := crc32.Update(0, crcTable, b)
    116 	return c>>15 | c<<17 + 0xa282ead8
    117 }
    118 
    119 // literalExtraSize returns the extra size of encoding n literals.
    120 // n should be >= 0 and <= math.MaxUint32.
    121 func literalExtraSize(n int64) int64 {
    122 	if n == 0 {
    123 		return 0
    124 	}
    125 	switch {
    126 	case n < 60:
    127 		return 1
    128 	case n < 1<<8:
    129 		return 2
    130 	case n < 1<<16:
    131 		return 3
    132 	case n < 1<<24:
    133 		return 4
    134 	default:
    135 		return 5
    136 	}
    137 }
    138 
    139 type byter interface {
    140 	Bytes() []byte
    141 }
    142 
    143 var _ byter = &bytes.Buffer{}