gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

stateless.go (7993B)


      1 package flate
      2 
      3 import (
      4 	"io"
      5 	"math"
      6 	"sync"
      7 )
      8 
      9 const (
     10 	maxStatelessBlock = math.MaxInt16
     11 	// dictionary will be taken from maxStatelessBlock, so limit it.
     12 	maxStatelessDict = 8 << 10
     13 
     14 	slTableBits  = 13
     15 	slTableSize  = 1 << slTableBits
     16 	slTableShift = 32 - slTableBits
     17 )
     18 
     19 type statelessWriter struct {
     20 	dst    io.Writer
     21 	closed bool
     22 }
     23 
     24 func (s *statelessWriter) Close() error {
     25 	if s.closed {
     26 		return nil
     27 	}
     28 	s.closed = true
     29 	// Emit EOF block
     30 	return StatelessDeflate(s.dst, nil, true, nil)
     31 }
     32 
     33 func (s *statelessWriter) Write(p []byte) (n int, err error) {
     34 	err = StatelessDeflate(s.dst, p, false, nil)
     35 	if err != nil {
     36 		return 0, err
     37 	}
     38 	return len(p), nil
     39 }
     40 
     41 func (s *statelessWriter) Reset(w io.Writer) {
     42 	s.dst = w
     43 	s.closed = false
     44 }
     45 
     46 // NewStatelessWriter will do compression but without maintaining any state
     47 // between Write calls.
     48 // There will be no memory kept between Write calls,
     49 // but compression and speed will be suboptimal.
     50 // Because of this, the size of actual Write calls will affect output size.
     51 func NewStatelessWriter(dst io.Writer) io.WriteCloser {
     52 	return &statelessWriter{dst: dst}
     53 }
     54 
     55 // bitWriterPool contains bit writers that can be reused.
     56 var bitWriterPool = sync.Pool{
     57 	New: func() interface{} {
     58 		return newHuffmanBitWriter(nil)
     59 	},
     60 }
     61 
     62 // StatelessDeflate allows compressing directly to a Writer without retaining state.
     63 // When returning everything will be flushed.
     64 // Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
     65 // Longer dictionaries will be truncated and will still produce valid output.
     66 // Sending nil dictionary is perfectly fine.
     67 func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
     68 	var dst tokens
     69 	bw := bitWriterPool.Get().(*huffmanBitWriter)
     70 	bw.reset(out)
     71 	defer func() {
     72 		// don't keep a reference to our output
     73 		bw.reset(nil)
     74 		bitWriterPool.Put(bw)
     75 	}()
     76 	if eof && len(in) == 0 {
     77 		// Just write an EOF block.
     78 		// Could be faster...
     79 		bw.writeStoredHeader(0, true)
     80 		bw.flush()
     81 		return bw.err
     82 	}
     83 
     84 	// Truncate dict
     85 	if len(dict) > maxStatelessDict {
     86 		dict = dict[len(dict)-maxStatelessDict:]
     87 	}
     88 
     89 	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
     90 	var inDict []byte
     91 
     92 	for len(in) > 0 {
     93 		todo := in
     94 		if len(inDict) > 0 {
     95 			if len(todo) > maxStatelessBlock-maxStatelessDict {
     96 				todo = todo[:maxStatelessBlock-maxStatelessDict]
     97 			}
     98 		} else if len(todo) > maxStatelessBlock-len(dict) {
     99 			todo = todo[:maxStatelessBlock-len(dict)]
    100 		}
    101 		inOrg := in
    102 		in = in[len(todo):]
    103 		uncompressed := todo
    104 		if len(dict) > 0 {
    105 			// combine dict and source
    106 			bufLen := len(todo) + len(dict)
    107 			combined := make([]byte, bufLen)
    108 			copy(combined, dict)
    109 			copy(combined[len(dict):], todo)
    110 			todo = combined
    111 		}
    112 		// Compress
    113 		if len(inDict) == 0 {
    114 			statelessEnc(&dst, todo, int16(len(dict)))
    115 		} else {
    116 			statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
    117 		}
    118 		isEof := eof && len(in) == 0
    119 
    120 		if dst.n == 0 {
    121 			bw.writeStoredHeader(len(uncompressed), isEof)
    122 			if bw.err != nil {
    123 				return bw.err
    124 			}
    125 			bw.writeBytes(uncompressed)
    126 		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
    127 			// If we removed less than 1/16th, huffman compress the block.
    128 			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
    129 		} else {
    130 			bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
    131 		}
    132 		if len(in) > 0 {
    133 			// Retain a dict if we have more
    134 			inDict = inOrg[len(uncompressed)-maxStatelessDict:]
    135 			dict = nil
    136 			dst.Reset()
    137 		}
    138 		if bw.err != nil {
    139 			return bw.err
    140 		}
    141 	}
    142 	if !eof {
    143 		// Align, only a stored block can do that.
    144 		bw.writeStoredHeader(0, false)
    145 	}
    146 	bw.flush()
    147 	return bw.err
    148 }
    149 
    150 func hashSL(u uint32) uint32 {
    151 	return (u * 0x1e35a7bd) >> slTableShift
    152 }
    153 
    154 func load3216(b []byte, i int16) uint32 {
    155 	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
    156 	b = b[i:]
    157 	b = b[:4]
    158 	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
    159 }
    160 
    161 func load6416(b []byte, i int16) uint64 {
    162 	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
    163 	b = b[i:]
    164 	b = b[:8]
    165 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
    166 		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
    167 }
    168 
    169 func statelessEnc(dst *tokens, src []byte, startAt int16) {
    170 	const (
    171 		inputMargin            = 12 - 1
    172 		minNonLiteralBlockSize = 1 + 1 + inputMargin
    173 	)
    174 
    175 	type tableEntry struct {
    176 		offset int16
    177 	}
    178 
    179 	var table [slTableSize]tableEntry
    180 
    181 	// This check isn't in the Snappy implementation, but there, the caller
    182 	// instead of the callee handles this case.
    183 	if len(src)-int(startAt) < minNonLiteralBlockSize {
    184 		// We do not fill the token table.
    185 		// This will be picked up by caller.
    186 		dst.n = 0
    187 		return
    188 	}
    189 	// Index until startAt
    190 	if startAt > 0 {
    191 		cv := load3232(src, 0)
    192 		for i := int16(0); i < startAt; i++ {
    193 			table[hashSL(cv)] = tableEntry{offset: i}
    194 			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
    195 		}
    196 	}
    197 
    198 	s := startAt + 1
    199 	nextEmit := startAt
    200 	// sLimit is when to stop looking for offset/length copies. The inputMargin
    201 	// lets us use a fast path for emitLiteral in the main loop, while we are
    202 	// looking for copies.
    203 	sLimit := int16(len(src) - inputMargin)
    204 
    205 	// nextEmit is where in src the next emitLiteral should start from.
    206 	cv := load3216(src, s)
    207 
    208 	for {
    209 		const skipLog = 5
    210 		const doEvery = 2
    211 
    212 		nextS := s
    213 		var candidate tableEntry
    214 		for {
    215 			nextHash := hashSL(cv)
    216 			candidate = table[nextHash]
    217 			nextS = s + doEvery + (s-nextEmit)>>skipLog
    218 			if nextS > sLimit || nextS <= 0 {
    219 				goto emitRemainder
    220 			}
    221 
    222 			now := load6416(src, nextS)
    223 			table[nextHash] = tableEntry{offset: s}
    224 			nextHash = hashSL(uint32(now))
    225 
    226 			if cv == load3216(src, candidate.offset) {
    227 				table[nextHash] = tableEntry{offset: nextS}
    228 				break
    229 			}
    230 
    231 			// Do one right away...
    232 			cv = uint32(now)
    233 			s = nextS
    234 			nextS++
    235 			candidate = table[nextHash]
    236 			now >>= 8
    237 			table[nextHash] = tableEntry{offset: s}
    238 
    239 			if cv == load3216(src, candidate.offset) {
    240 				table[nextHash] = tableEntry{offset: nextS}
    241 				break
    242 			}
    243 			cv = uint32(now)
    244 			s = nextS
    245 		}
    246 
    247 		// A 4-byte match has been found. We'll later see if more than 4 bytes
    248 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    249 		// them as literal bytes.
    250 		for {
    251 			// Invariant: we have a 4-byte match at s, and no need to emit any
    252 			// literal bytes prior to s.
    253 
    254 			// Extend the 4-byte match as long as possible.
    255 			t := candidate.offset
    256 			l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
    257 
    258 			// Extend backwards
    259 			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
    260 				s--
    261 				t--
    262 				l++
    263 			}
    264 			if nextEmit < s {
    265 				if false {
    266 					emitLiteral(dst, src[nextEmit:s])
    267 				} else {
    268 					for _, v := range src[nextEmit:s] {
    269 						dst.tokens[dst.n] = token(v)
    270 						dst.litHist[v]++
    271 						dst.n++
    272 					}
    273 				}
    274 			}
    275 
    276 			// Save the match found
    277 			dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
    278 			s += l
    279 			nextEmit = s
    280 			if nextS >= s {
    281 				s = nextS + 1
    282 			}
    283 			if s >= sLimit {
    284 				goto emitRemainder
    285 			}
    286 
    287 			// We could immediately start working at s now, but to improve
    288 			// compression we first update the hash table at s-2 and at s. If
    289 			// another emitCopy is not our next move, also calculate nextHash
    290 			// at s+1. At least on GOARCH=amd64, these three hash calculations
    291 			// are faster as one load64 call (with some shifts) instead of
    292 			// three load32 calls.
    293 			x := load6416(src, s-2)
    294 			o := s - 2
    295 			prevHash := hashSL(uint32(x))
    296 			table[prevHash] = tableEntry{offset: o}
    297 			x >>= 16
    298 			currHash := hashSL(uint32(x))
    299 			candidate = table[currHash]
    300 			table[currHash] = tableEntry{offset: o + 2}
    301 
    302 			if uint32(x) != load3216(src, candidate.offset) {
    303 				cv = uint32(x >> 8)
    304 				s++
    305 				break
    306 			}
    307 		}
    308 	}
    309 
    310 emitRemainder:
    311 	if int(nextEmit) < len(src) {
    312 		// If nothing was added, don't encode literals.
    313 		if dst.n == 0 {
    314 			return
    315 		}
    316 		emitLiteral(dst, src[nextEmit:])
    317 	}
    318 }