stateless.go (7993B)
1 package flate 2 3 import ( 4 "io" 5 "math" 6 "sync" 7 ) 8 9 const ( 10 maxStatelessBlock = math.MaxInt16 11 // dictionary will be taken from maxStatelessBlock, so limit it. 12 maxStatelessDict = 8 << 10 13 14 slTableBits = 13 15 slTableSize = 1 << slTableBits 16 slTableShift = 32 - slTableBits 17 ) 18 19 type statelessWriter struct { 20 dst io.Writer 21 closed bool 22 } 23 24 func (s *statelessWriter) Close() error { 25 if s.closed { 26 return nil 27 } 28 s.closed = true 29 // Emit EOF block 30 return StatelessDeflate(s.dst, nil, true, nil) 31 } 32 33 func (s *statelessWriter) Write(p []byte) (n int, err error) { 34 err = StatelessDeflate(s.dst, p, false, nil) 35 if err != nil { 36 return 0, err 37 } 38 return len(p), nil 39 } 40 41 func (s *statelessWriter) Reset(w io.Writer) { 42 s.dst = w 43 s.closed = false 44 } 45 46 // NewStatelessWriter will do compression but without maintaining any state 47 // between Write calls. 48 // There will be no memory kept between Write calls, 49 // but compression and speed will be suboptimal. 50 // Because of this, the size of actual Write calls will affect output size. 51 func NewStatelessWriter(dst io.Writer) io.WriteCloser { 52 return &statelessWriter{dst: dst} 53 } 54 55 // bitWriterPool contains bit writers that can be reused. 56 var bitWriterPool = sync.Pool{ 57 New: func() interface{} { 58 return newHuffmanBitWriter(nil) 59 }, 60 } 61 62 // StatelessDeflate allows compressing directly to a Writer without retaining state. 63 // When returning everything will be flushed. 64 // Up to 8KB of an optional dictionary can be given which is presumed to precede the block. 65 // Longer dictionaries will be truncated and will still produce valid output. 66 // Sending nil dictionary is perfectly fine. 67 func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { 68 var dst tokens 69 bw := bitWriterPool.Get().(*huffmanBitWriter) 70 bw.reset(out) 71 defer func() { 72 // don't keep a reference to our output 73 bw.reset(nil) 74 bitWriterPool.Put(bw) 75 }() 76 if eof && len(in) == 0 { 77 // Just write an EOF block. 78 // Could be faster... 79 bw.writeStoredHeader(0, true) 80 bw.flush() 81 return bw.err 82 } 83 84 // Truncate dict 85 if len(dict) > maxStatelessDict { 86 dict = dict[len(dict)-maxStatelessDict:] 87 } 88 89 // For subsequent loops, keep shallow dict reference to avoid alloc+copy. 90 var inDict []byte 91 92 for len(in) > 0 { 93 todo := in 94 if len(inDict) > 0 { 95 if len(todo) > maxStatelessBlock-maxStatelessDict { 96 todo = todo[:maxStatelessBlock-maxStatelessDict] 97 } 98 } else if len(todo) > maxStatelessBlock-len(dict) { 99 todo = todo[:maxStatelessBlock-len(dict)] 100 } 101 inOrg := in 102 in = in[len(todo):] 103 uncompressed := todo 104 if len(dict) > 0 { 105 // combine dict and source 106 bufLen := len(todo) + len(dict) 107 combined := make([]byte, bufLen) 108 copy(combined, dict) 109 copy(combined[len(dict):], todo) 110 todo = combined 111 } 112 // Compress 113 if len(inDict) == 0 { 114 statelessEnc(&dst, todo, int16(len(dict))) 115 } else { 116 statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) 117 } 118 isEof := eof && len(in) == 0 119 120 if dst.n == 0 { 121 bw.writeStoredHeader(len(uncompressed), isEof) 122 if bw.err != nil { 123 return bw.err 124 } 125 bw.writeBytes(uncompressed) 126 } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { 127 // If we removed less than 1/16th, huffman compress the block. 128 bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) 129 } else { 130 bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) 131 } 132 if len(in) > 0 { 133 // Retain a dict if we have more 134 inDict = inOrg[len(uncompressed)-maxStatelessDict:] 135 dict = nil 136 dst.Reset() 137 } 138 if bw.err != nil { 139 return bw.err 140 } 141 } 142 if !eof { 143 // Align, only a stored block can do that. 144 bw.writeStoredHeader(0, false) 145 } 146 bw.flush() 147 return bw.err 148 } 149 150 func hashSL(u uint32) uint32 { 151 return (u * 0x1e35a7bd) >> slTableShift 152 } 153 154 func load3216(b []byte, i int16) uint32 { 155 // Help the compiler eliminate bounds checks on the read so it can be done in a single read. 156 b = b[i:] 157 b = b[:4] 158 return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 159 } 160 161 func load6416(b []byte, i int16) uint64 { 162 // Help the compiler eliminate bounds checks on the read so it can be done in a single read. 163 b = b[i:] 164 b = b[:8] 165 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | 166 uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 167 } 168 169 func statelessEnc(dst *tokens, src []byte, startAt int16) { 170 const ( 171 inputMargin = 12 - 1 172 minNonLiteralBlockSize = 1 + 1 + inputMargin 173 ) 174 175 type tableEntry struct { 176 offset int16 177 } 178 179 var table [slTableSize]tableEntry 180 181 // This check isn't in the Snappy implementation, but there, the caller 182 // instead of the callee handles this case. 183 if len(src)-int(startAt) < minNonLiteralBlockSize { 184 // We do not fill the token table. 185 // This will be picked up by caller. 186 dst.n = 0 187 return 188 } 189 // Index until startAt 190 if startAt > 0 { 191 cv := load3232(src, 0) 192 for i := int16(0); i < startAt; i++ { 193 table[hashSL(cv)] = tableEntry{offset: i} 194 cv = (cv >> 8) | (uint32(src[i+4]) << 24) 195 } 196 } 197 198 s := startAt + 1 199 nextEmit := startAt 200 // sLimit is when to stop looking for offset/length copies. The inputMargin 201 // lets us use a fast path for emitLiteral in the main loop, while we are 202 // looking for copies. 203 sLimit := int16(len(src) - inputMargin) 204 205 // nextEmit is where in src the next emitLiteral should start from. 206 cv := load3216(src, s) 207 208 for { 209 const skipLog = 5 210 const doEvery = 2 211 212 nextS := s 213 var candidate tableEntry 214 for { 215 nextHash := hashSL(cv) 216 candidate = table[nextHash] 217 nextS = s + doEvery + (s-nextEmit)>>skipLog 218 if nextS > sLimit || nextS <= 0 { 219 goto emitRemainder 220 } 221 222 now := load6416(src, nextS) 223 table[nextHash] = tableEntry{offset: s} 224 nextHash = hashSL(uint32(now)) 225 226 if cv == load3216(src, candidate.offset) { 227 table[nextHash] = tableEntry{offset: nextS} 228 break 229 } 230 231 // Do one right away... 232 cv = uint32(now) 233 s = nextS 234 nextS++ 235 candidate = table[nextHash] 236 now >>= 8 237 table[nextHash] = tableEntry{offset: s} 238 239 if cv == load3216(src, candidate.offset) { 240 table[nextHash] = tableEntry{offset: nextS} 241 break 242 } 243 cv = uint32(now) 244 s = nextS 245 } 246 247 // A 4-byte match has been found. We'll later see if more than 4 bytes 248 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 249 // them as literal bytes. 250 for { 251 // Invariant: we have a 4-byte match at s, and no need to emit any 252 // literal bytes prior to s. 253 254 // Extend the 4-byte match as long as possible. 255 t := candidate.offset 256 l := int16(matchLen(src[s+4:], src[t+4:]) + 4) 257 258 // Extend backwards 259 for t > 0 && s > nextEmit && src[t-1] == src[s-1] { 260 s-- 261 t-- 262 l++ 263 } 264 if nextEmit < s { 265 if false { 266 emitLiteral(dst, src[nextEmit:s]) 267 } else { 268 for _, v := range src[nextEmit:s] { 269 dst.tokens[dst.n] = token(v) 270 dst.litHist[v]++ 271 dst.n++ 272 } 273 } 274 } 275 276 // Save the match found 277 dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) 278 s += l 279 nextEmit = s 280 if nextS >= s { 281 s = nextS + 1 282 } 283 if s >= sLimit { 284 goto emitRemainder 285 } 286 287 // We could immediately start working at s now, but to improve 288 // compression we first update the hash table at s-2 and at s. If 289 // another emitCopy is not our next move, also calculate nextHash 290 // at s+1. At least on GOARCH=amd64, these three hash calculations 291 // are faster as one load64 call (with some shifts) instead of 292 // three load32 calls. 293 x := load6416(src, s-2) 294 o := s - 2 295 prevHash := hashSL(uint32(x)) 296 table[prevHash] = tableEntry{offset: o} 297 x >>= 16 298 currHash := hashSL(uint32(x)) 299 candidate = table[currHash] 300 table[currHash] = tableEntry{offset: o + 2} 301 302 if uint32(x) != load3216(src, candidate.offset) { 303 cv = uint32(x >> 8) 304 s++ 305 break 306 } 307 } 308 } 309 310 emitRemainder: 311 if int(nextEmit) < len(src) { 312 // If nothing was added, don't encode literals. 313 if dst.n == 0 { 314 return 315 } 316 emitLiteral(dst, src[nextEmit:]) 317 } 318 }