deflate.go (28005B)
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Copyright (c) 2015 Klaus Post 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 package flate 7 8 import ( 9 "encoding/binary" 10 "fmt" 11 "io" 12 "math" 13 ) 14 15 const ( 16 NoCompression = 0 17 BestSpeed = 1 18 BestCompression = 9 19 DefaultCompression = -1 20 21 // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman 22 // entropy encoding. This mode is useful in compressing data that has 23 // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) 24 // that lacks an entropy encoder. Compression gains are achieved when 25 // certain bytes in the input stream occur more frequently than others. 26 // 27 // Note that HuffmanOnly produces a compressed output that is 28 // RFC 1951 compliant. That is, any valid DEFLATE decompressor will 29 // continue to be able to decompress this output. 30 HuffmanOnly = -2 31 ConstantCompression = HuffmanOnly // compatibility alias. 32 33 logWindowSize = 15 34 windowSize = 1 << logWindowSize 35 windowMask = windowSize - 1 36 logMaxOffsetSize = 15 // Standard DEFLATE 37 minMatchLength = 4 // The smallest match that the compressor looks for 38 maxMatchLength = 258 // The longest match for the compressor 39 minOffsetSize = 1 // The shortest offset that makes any sense 40 41 // The maximum number of tokens we will encode at the time. 42 // Smaller sizes usually creates less optimal blocks. 43 // Bigger can make context switching slow. 44 // We use this for levels 7-9, so we make it big. 45 maxFlateBlockTokens = 1 << 15 46 maxStoreBlockSize = 65535 47 hashBits = 17 // After 17 performance degrades 48 hashSize = 1 << hashBits 49 hashMask = (1 << hashBits) - 1 50 hashShift = (hashBits + minMatchLength - 1) / minMatchLength 51 maxHashOffset = 1 << 28 52 53 skipNever = math.MaxInt32 54 55 debugDeflate = false 56 ) 57 58 type compressionLevel struct { 59 good, lazy, nice, chain, fastSkipHashing, level int 60 } 61 62 // Compression levels have been rebalanced from zlib deflate defaults 63 // to give a bigger spread in speed and compression. 64 // See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ 65 var levels = []compressionLevel{ 66 {}, // 0 67 // Level 1-6 uses specialized algorithm - values not used 68 {0, 0, 0, 0, 0, 1}, 69 {0, 0, 0, 0, 0, 2}, 70 {0, 0, 0, 0, 0, 3}, 71 {0, 0, 0, 0, 0, 4}, 72 {0, 0, 0, 0, 0, 5}, 73 {0, 0, 0, 0, 0, 6}, 74 // Levels 7-9 use increasingly more lazy matching 75 // and increasingly stringent conditions for "good enough". 76 {8, 12, 16, 24, skipNever, 7}, 77 {16, 30, 40, 64, skipNever, 8}, 78 {32, 258, 258, 1024, skipNever, 9}, 79 } 80 81 // advancedState contains state for the advanced levels, with bigger hash tables, etc. 82 type advancedState struct { 83 // deflate state 84 length int 85 offset int 86 maxInsertIndex int 87 chainHead int 88 hashOffset int 89 90 ii uint16 // position of last match, intended to overflow to reset. 91 92 // input window: unprocessed data is window[index:windowEnd] 93 index int 94 estBitsPerByte int 95 hashMatch [maxMatchLength + minMatchLength]uint32 96 97 // Input hash chains 98 // hashHead[hashValue] contains the largest inputIndex with the specified hash value 99 // If hashHead[hashValue] is within the current window, then 100 // hashPrev[hashHead[hashValue] & windowMask] contains the previous index 101 // with the same hash value. 102 hashHead [hashSize]uint32 103 hashPrev [windowSize]uint32 104 } 105 106 type compressor struct { 107 compressionLevel 108 109 h *huffmanEncoder 110 w *huffmanBitWriter 111 112 // compression algorithm 113 fill func(*compressor, []byte) int // copy data to window 114 step func(*compressor) // process window 115 116 window []byte 117 windowEnd int 118 blockStart int // window index where current tokens start 119 err error 120 121 // queued output tokens 122 tokens tokens 123 fast fastEnc 124 state *advancedState 125 126 sync bool // requesting flush 127 byteAvailable bool // if true, still need to process window[index-1]. 128 } 129 130 func (d *compressor) fillDeflate(b []byte) int { 131 s := d.state 132 if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { 133 // shift the window by windowSize 134 //copy(d.window[:], d.window[windowSize:2*windowSize]) 135 *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) 136 s.index -= windowSize 137 d.windowEnd -= windowSize 138 if d.blockStart >= windowSize { 139 d.blockStart -= windowSize 140 } else { 141 d.blockStart = math.MaxInt32 142 } 143 s.hashOffset += windowSize 144 if s.hashOffset > maxHashOffset { 145 delta := s.hashOffset - 1 146 s.hashOffset -= delta 147 s.chainHead -= delta 148 // Iterate over slices instead of arrays to avoid copying 149 // the entire table onto the stack (Issue #18625). 150 for i, v := range s.hashPrev[:] { 151 if int(v) > delta { 152 s.hashPrev[i] = uint32(int(v) - delta) 153 } else { 154 s.hashPrev[i] = 0 155 } 156 } 157 for i, v := range s.hashHead[:] { 158 if int(v) > delta { 159 s.hashHead[i] = uint32(int(v) - delta) 160 } else { 161 s.hashHead[i] = 0 162 } 163 } 164 } 165 } 166 n := copy(d.window[d.windowEnd:], b) 167 d.windowEnd += n 168 return n 169 } 170 171 func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { 172 if index > 0 || eof { 173 var window []byte 174 if d.blockStart <= index { 175 window = d.window[d.blockStart:index] 176 } 177 d.blockStart = index 178 //d.w.writeBlock(tok, eof, window) 179 d.w.writeBlockDynamic(tok, eof, window, d.sync) 180 return d.w.err 181 } 182 return nil 183 } 184 185 // writeBlockSkip writes the current block and uses the number of tokens 186 // to determine if the block should be stored on no matches, or 187 // only huffman encoded. 188 func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { 189 if index > 0 || eof { 190 if d.blockStart <= index { 191 window := d.window[d.blockStart:index] 192 // If we removed less than a 64th of all literals 193 // we huffman compress the block. 194 if int(tok.n) > len(window)-int(tok.n>>6) { 195 d.w.writeBlockHuff(eof, window, d.sync) 196 } else { 197 // Write a dynamic huffman block. 198 d.w.writeBlockDynamic(tok, eof, window, d.sync) 199 } 200 } else { 201 d.w.writeBlock(tok, eof, nil) 202 } 203 d.blockStart = index 204 return d.w.err 205 } 206 return nil 207 } 208 209 // fillWindow will fill the current window with the supplied 210 // dictionary and calculate all hashes. 211 // This is much faster than doing a full encode. 212 // Should only be used after a start/reset. 213 func (d *compressor) fillWindow(b []byte) { 214 // Do not fill window if we are in store-only or huffman mode. 215 if d.level <= 0 { 216 return 217 } 218 if d.fast != nil { 219 // encode the last data, but discard the result 220 if len(b) > maxMatchOffset { 221 b = b[len(b)-maxMatchOffset:] 222 } 223 d.fast.Encode(&d.tokens, b) 224 d.tokens.Reset() 225 return 226 } 227 s := d.state 228 // If we are given too much, cut it. 229 if len(b) > windowSize { 230 b = b[len(b)-windowSize:] 231 } 232 // Add all to window. 233 n := copy(d.window[d.windowEnd:], b) 234 235 // Calculate 256 hashes at the time (more L1 cache hits) 236 loops := (n + 256 - minMatchLength) / 256 237 for j := 0; j < loops; j++ { 238 startindex := j * 256 239 end := startindex + 256 + minMatchLength - 1 240 if end > n { 241 end = n 242 } 243 tocheck := d.window[startindex:end] 244 dstSize := len(tocheck) - minMatchLength + 1 245 246 if dstSize <= 0 { 247 continue 248 } 249 250 dst := s.hashMatch[:dstSize] 251 bulkHash4(tocheck, dst) 252 var newH uint32 253 for i, val := range dst { 254 di := i + startindex 255 newH = val & hashMask 256 // Get previous value with the same hash. 257 // Our chain should point to the previous value. 258 s.hashPrev[di&windowMask] = s.hashHead[newH] 259 // Set the head of the hash chain to us. 260 s.hashHead[newH] = uint32(di + s.hashOffset) 261 } 262 } 263 // Update window information. 264 d.windowEnd += n 265 s.index = n 266 } 267 268 // Try to find a match starting at index whose length is greater than prevSize. 269 // We only look at chainCount possibilities before giving up. 270 // pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead 271 func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { 272 minMatchLook := maxMatchLength 273 if lookahead < minMatchLook { 274 minMatchLook = lookahead 275 } 276 277 win := d.window[0 : pos+minMatchLook] 278 279 // We quit when we get a match that's at least nice long 280 nice := len(win) - pos 281 if d.nice < nice { 282 nice = d.nice 283 } 284 285 // If we've got a match that's good enough, only look in 1/4 the chain. 286 tries := d.chain 287 length = minMatchLength - 1 288 289 wEnd := win[pos+length] 290 wPos := win[pos:] 291 minIndex := pos - windowSize 292 if minIndex < 0 { 293 minIndex = 0 294 } 295 offset = 0 296 297 if d.chain < 100 { 298 for i := prevHead; tries > 0; tries-- { 299 if wEnd == win[i+length] { 300 n := matchLen(win[i:i+minMatchLook], wPos) 301 if n > length { 302 length = n 303 offset = pos - i 304 ok = true 305 if n >= nice { 306 // The match is good enough that we don't try to find a better one. 307 break 308 } 309 wEnd = win[pos+n] 310 } 311 } 312 if i <= minIndex { 313 // hashPrev[i & windowMask] has already been overwritten, so stop now. 314 break 315 } 316 i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset 317 if i < minIndex { 318 break 319 } 320 } 321 return 322 } 323 324 // Minimum gain to accept a match. 325 cGain := 4 326 327 // Some like it higher (CSV), some like it lower (JSON) 328 const baseCost = 3 329 // Base is 4 bytes at with an additional cost. 330 // Matches must be better than this. 331 332 for i := prevHead; tries > 0; tries-- { 333 if wEnd == win[i+length] { 334 n := matchLen(win[i:i+minMatchLook], wPos) 335 if n > length { 336 // Calculate gain. Estimate 337 newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) 338 339 //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length) 340 if newGain > cGain { 341 length = n 342 offset = pos - i 343 cGain = newGain 344 ok = true 345 if n >= nice { 346 // The match is good enough that we don't try to find a better one. 347 break 348 } 349 wEnd = win[pos+n] 350 } 351 } 352 } 353 if i <= minIndex { 354 // hashPrev[i & windowMask] has already been overwritten, so stop now. 355 break 356 } 357 i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset 358 if i < minIndex { 359 break 360 } 361 } 362 return 363 } 364 365 func (d *compressor) writeStoredBlock(buf []byte) error { 366 if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { 367 return d.w.err 368 } 369 d.w.writeBytes(buf) 370 return d.w.err 371 } 372 373 // hash4 returns a hash representation of the first 4 bytes 374 // of the supplied slice. 375 // The caller must ensure that len(b) >= 4. 376 func hash4(b []byte) uint32 { 377 return hash4u(binary.LittleEndian.Uint32(b), hashBits) 378 } 379 380 // hash4 returns the hash of u to fit in a hash table with h bits. 381 // Preferably h should be a constant and should always be <32. 382 func hash4u(u uint32, h uint8) uint32 { 383 return (u * prime4bytes) >> (32 - h) 384 } 385 386 // bulkHash4 will compute hashes using the same 387 // algorithm as hash4 388 func bulkHash4(b []byte, dst []uint32) { 389 if len(b) < 4 { 390 return 391 } 392 hb := binary.LittleEndian.Uint32(b) 393 394 dst[0] = hash4u(hb, hashBits) 395 end := len(b) - 4 + 1 396 for i := 1; i < end; i++ { 397 hb = (hb >> 8) | uint32(b[i+3])<<24 398 dst[i] = hash4u(hb, hashBits) 399 } 400 } 401 402 func (d *compressor) initDeflate() { 403 d.window = make([]byte, 2*windowSize) 404 d.byteAvailable = false 405 d.err = nil 406 if d.state == nil { 407 return 408 } 409 s := d.state 410 s.index = 0 411 s.hashOffset = 1 412 s.length = minMatchLength - 1 413 s.offset = 0 414 s.chainHead = -1 415 } 416 417 // deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, 418 // meaning it always has lazy matching on. 419 func (d *compressor) deflateLazy() { 420 s := d.state 421 // Sanity enables additional runtime tests. 422 // It's intended to be used during development 423 // to supplement the currently ad-hoc unit tests. 424 const sanity = debugDeflate 425 426 if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { 427 return 428 } 429 if d.windowEnd != s.index && d.chain > 100 { 430 // Get literal huffman coder. 431 if d.h == nil { 432 d.h = newHuffmanEncoder(maxFlateBlockTokens) 433 } 434 var tmp [256]uint16 435 for _, v := range d.window[s.index:d.windowEnd] { 436 tmp[v]++ 437 } 438 d.h.generate(tmp[:], 15) 439 } 440 441 s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) 442 443 for { 444 if sanity && s.index > d.windowEnd { 445 panic("index > windowEnd") 446 } 447 lookahead := d.windowEnd - s.index 448 if lookahead < minMatchLength+maxMatchLength { 449 if !d.sync { 450 return 451 } 452 if sanity && s.index > d.windowEnd { 453 panic("index > windowEnd") 454 } 455 if lookahead == 0 { 456 // Flush current output block if any. 457 if d.byteAvailable { 458 // There is still one pending token that needs to be flushed 459 d.tokens.AddLiteral(d.window[s.index-1]) 460 d.byteAvailable = false 461 } 462 if d.tokens.n > 0 { 463 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 464 return 465 } 466 d.tokens.Reset() 467 } 468 return 469 } 470 } 471 if s.index < s.maxInsertIndex { 472 // Update the hash 473 hash := hash4(d.window[s.index:]) 474 ch := s.hashHead[hash] 475 s.chainHead = int(ch) 476 s.hashPrev[s.index&windowMask] = ch 477 s.hashHead[hash] = uint32(s.index + s.hashOffset) 478 } 479 prevLength := s.length 480 prevOffset := s.offset 481 s.length = minMatchLength - 1 482 s.offset = 0 483 minIndex := s.index - windowSize 484 if minIndex < 0 { 485 minIndex = 0 486 } 487 488 if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { 489 if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { 490 s.length = newLength 491 s.offset = newOffset 492 } 493 } 494 495 if prevLength >= minMatchLength && s.length <= prevLength { 496 // No better match, but check for better match at end... 497 // 498 // Skip forward a number of bytes. 499 // Offset of 2 seems to yield best results. 3 is sometimes better. 500 const checkOff = 2 501 502 // Check all, except full length 503 if prevLength < maxMatchLength-checkOff { 504 prevIndex := s.index - 1 505 if prevIndex+prevLength < s.maxInsertIndex { 506 end := lookahead 507 if lookahead > maxMatchLength+checkOff { 508 end = maxMatchLength + checkOff 509 } 510 end += prevIndex 511 512 // Hash at match end. 513 h := hash4(d.window[prevIndex+prevLength:]) 514 ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength 515 if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { 516 length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) 517 // It seems like a pure length metric is best. 518 if length > prevLength { 519 prevLength = length 520 prevOffset = prevIndex - ch2 521 522 // Extend back... 523 for i := checkOff - 1; i >= 0; i-- { 524 if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] { 525 // Emit tokens we "owe" 526 for j := 0; j <= i; j++ { 527 d.tokens.AddLiteral(d.window[prevIndex+j]) 528 if d.tokens.n == maxFlateBlockTokens { 529 // The block includes the current character 530 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 531 return 532 } 533 d.tokens.Reset() 534 } 535 s.index++ 536 if s.index < s.maxInsertIndex { 537 h := hash4(d.window[s.index:]) 538 ch := s.hashHead[h] 539 s.chainHead = int(ch) 540 s.hashPrev[s.index&windowMask] = ch 541 s.hashHead[h] = uint32(s.index + s.hashOffset) 542 } 543 } 544 break 545 } else { 546 prevLength++ 547 } 548 } 549 } else if false { 550 // Check one further ahead. 551 // Only rarely better, disabled for now. 552 prevIndex++ 553 h := hash4(d.window[prevIndex+prevLength:]) 554 ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength 555 if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { 556 length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) 557 // It seems like a pure length metric is best. 558 if length > prevLength+checkOff { 559 prevLength = length 560 prevOffset = prevIndex - ch2 561 prevIndex-- 562 563 // Extend back... 564 for i := checkOff; i >= 0; i-- { 565 if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] { 566 // Emit tokens we "owe" 567 for j := 0; j <= i; j++ { 568 d.tokens.AddLiteral(d.window[prevIndex+j]) 569 if d.tokens.n == maxFlateBlockTokens { 570 // The block includes the current character 571 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 572 return 573 } 574 d.tokens.Reset() 575 } 576 s.index++ 577 if s.index < s.maxInsertIndex { 578 h := hash4(d.window[s.index:]) 579 ch := s.hashHead[h] 580 s.chainHead = int(ch) 581 s.hashPrev[s.index&windowMask] = ch 582 s.hashHead[h] = uint32(s.index + s.hashOffset) 583 } 584 } 585 break 586 } else { 587 prevLength++ 588 } 589 } 590 } 591 } 592 } 593 } 594 } 595 } 596 // There was a match at the previous step, and the current match is 597 // not better. Output the previous match. 598 d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) 599 600 // Insert in the hash table all strings up to the end of the match. 601 // index and index-1 are already inserted. If there is not enough 602 // lookahead, the last two strings are not inserted into the hash 603 // table. 604 newIndex := s.index + prevLength - 1 605 // Calculate missing hashes 606 end := newIndex 607 if end > s.maxInsertIndex { 608 end = s.maxInsertIndex 609 } 610 end += minMatchLength - 1 611 startindex := s.index + 1 612 if startindex > s.maxInsertIndex { 613 startindex = s.maxInsertIndex 614 } 615 tocheck := d.window[startindex:end] 616 dstSize := len(tocheck) - minMatchLength + 1 617 if dstSize > 0 { 618 dst := s.hashMatch[:dstSize] 619 bulkHash4(tocheck, dst) 620 var newH uint32 621 for i, val := range dst { 622 di := i + startindex 623 newH = val & hashMask 624 // Get previous value with the same hash. 625 // Our chain should point to the previous value. 626 s.hashPrev[di&windowMask] = s.hashHead[newH] 627 // Set the head of the hash chain to us. 628 s.hashHead[newH] = uint32(di + s.hashOffset) 629 } 630 } 631 632 s.index = newIndex 633 d.byteAvailable = false 634 s.length = minMatchLength - 1 635 if d.tokens.n == maxFlateBlockTokens { 636 // The block includes the current character 637 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 638 return 639 } 640 d.tokens.Reset() 641 } 642 s.ii = 0 643 } else { 644 // Reset, if we got a match this run. 645 if s.length >= minMatchLength { 646 s.ii = 0 647 } 648 // We have a byte waiting. Emit it. 649 if d.byteAvailable { 650 s.ii++ 651 d.tokens.AddLiteral(d.window[s.index-1]) 652 if d.tokens.n == maxFlateBlockTokens { 653 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 654 return 655 } 656 d.tokens.Reset() 657 } 658 s.index++ 659 660 // If we have a long run of no matches, skip additional bytes 661 // Resets when s.ii overflows after 64KB. 662 if n := int(s.ii) - d.chain; n > 0 { 663 n = 1 + int(n>>6) 664 for j := 0; j < n; j++ { 665 if s.index >= d.windowEnd-1 { 666 break 667 } 668 d.tokens.AddLiteral(d.window[s.index-1]) 669 if d.tokens.n == maxFlateBlockTokens { 670 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 671 return 672 } 673 d.tokens.Reset() 674 } 675 // Index... 676 if s.index < s.maxInsertIndex { 677 h := hash4(d.window[s.index:]) 678 ch := s.hashHead[h] 679 s.chainHead = int(ch) 680 s.hashPrev[s.index&windowMask] = ch 681 s.hashHead[h] = uint32(s.index + s.hashOffset) 682 } 683 s.index++ 684 } 685 // Flush last byte 686 d.tokens.AddLiteral(d.window[s.index-1]) 687 d.byteAvailable = false 688 // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength 689 if d.tokens.n == maxFlateBlockTokens { 690 if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { 691 return 692 } 693 d.tokens.Reset() 694 } 695 } 696 } else { 697 s.index++ 698 d.byteAvailable = true 699 } 700 } 701 } 702 } 703 704 func (d *compressor) store() { 705 if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { 706 d.err = d.writeStoredBlock(d.window[:d.windowEnd]) 707 d.windowEnd = 0 708 } 709 } 710 711 // fillWindow will fill the buffer with data for huffman-only compression. 712 // The number of bytes copied is returned. 713 func (d *compressor) fillBlock(b []byte) int { 714 n := copy(d.window[d.windowEnd:], b) 715 d.windowEnd += n 716 return n 717 } 718 719 // storeHuff will compress and store the currently added data, 720 // if enough has been accumulated or we at the end of the stream. 721 // Any error that occurred will be in d.err 722 func (d *compressor) storeHuff() { 723 if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { 724 return 725 } 726 d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) 727 d.err = d.w.err 728 d.windowEnd = 0 729 } 730 731 // storeFast will compress and store the currently added data, 732 // if enough has been accumulated or we at the end of the stream. 733 // Any error that occurred will be in d.err 734 func (d *compressor) storeFast() { 735 // We only compress if we have maxStoreBlockSize. 736 if d.windowEnd < len(d.window) { 737 if !d.sync { 738 return 739 } 740 // Handle extremely small sizes. 741 if d.windowEnd < 128 { 742 if d.windowEnd == 0 { 743 return 744 } 745 if d.windowEnd <= 32 { 746 d.err = d.writeStoredBlock(d.window[:d.windowEnd]) 747 } else { 748 d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) 749 d.err = d.w.err 750 } 751 d.tokens.Reset() 752 d.windowEnd = 0 753 d.fast.Reset() 754 return 755 } 756 } 757 758 d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) 759 // If we made zero matches, store the block as is. 760 if d.tokens.n == 0 { 761 d.err = d.writeStoredBlock(d.window[:d.windowEnd]) 762 // If we removed less than 1/16th, huffman compress the block. 763 } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { 764 d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) 765 d.err = d.w.err 766 } else { 767 d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) 768 d.err = d.w.err 769 } 770 d.tokens.Reset() 771 d.windowEnd = 0 772 } 773 774 // write will add input byte to the stream. 775 // Unless an error occurs all bytes will be consumed. 776 func (d *compressor) write(b []byte) (n int, err error) { 777 if d.err != nil { 778 return 0, d.err 779 } 780 n = len(b) 781 for len(b) > 0 { 782 if d.windowEnd == len(d.window) || d.sync { 783 d.step(d) 784 } 785 b = b[d.fill(d, b):] 786 if d.err != nil { 787 return 0, d.err 788 } 789 } 790 return n, d.err 791 } 792 793 func (d *compressor) syncFlush() error { 794 d.sync = true 795 if d.err != nil { 796 return d.err 797 } 798 d.step(d) 799 if d.err == nil { 800 d.w.writeStoredHeader(0, false) 801 d.w.flush() 802 d.err = d.w.err 803 } 804 d.sync = false 805 return d.err 806 } 807 808 func (d *compressor) init(w io.Writer, level int) (err error) { 809 d.w = newHuffmanBitWriter(w) 810 811 switch { 812 case level == NoCompression: 813 d.window = make([]byte, maxStoreBlockSize) 814 d.fill = (*compressor).fillBlock 815 d.step = (*compressor).store 816 case level == ConstantCompression: 817 d.w.logNewTablePenalty = 10 818 d.window = make([]byte, 32<<10) 819 d.fill = (*compressor).fillBlock 820 d.step = (*compressor).storeHuff 821 case level == DefaultCompression: 822 level = 5 823 fallthrough 824 case level >= 1 && level <= 6: 825 d.w.logNewTablePenalty = 7 826 d.fast = newFastEnc(level) 827 d.window = make([]byte, maxStoreBlockSize) 828 d.fill = (*compressor).fillBlock 829 d.step = (*compressor).storeFast 830 case 7 <= level && level <= 9: 831 d.w.logNewTablePenalty = 8 832 d.state = &advancedState{} 833 d.compressionLevel = levels[level] 834 d.initDeflate() 835 d.fill = (*compressor).fillDeflate 836 d.step = (*compressor).deflateLazy 837 default: 838 return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) 839 } 840 d.level = level 841 return nil 842 } 843 844 // reset the state of the compressor. 845 func (d *compressor) reset(w io.Writer) { 846 d.w.reset(w) 847 d.sync = false 848 d.err = nil 849 // We only need to reset a few things for Snappy. 850 if d.fast != nil { 851 d.fast.Reset() 852 d.windowEnd = 0 853 d.tokens.Reset() 854 return 855 } 856 switch d.compressionLevel.chain { 857 case 0: 858 // level was NoCompression or ConstantCompresssion. 859 d.windowEnd = 0 860 default: 861 s := d.state 862 s.chainHead = -1 863 for i := range s.hashHead { 864 s.hashHead[i] = 0 865 } 866 for i := range s.hashPrev { 867 s.hashPrev[i] = 0 868 } 869 s.hashOffset = 1 870 s.index, d.windowEnd = 0, 0 871 d.blockStart, d.byteAvailable = 0, false 872 d.tokens.Reset() 873 s.length = minMatchLength - 1 874 s.offset = 0 875 s.ii = 0 876 s.maxInsertIndex = 0 877 } 878 } 879 880 func (d *compressor) close() error { 881 if d.err != nil { 882 return d.err 883 } 884 d.sync = true 885 d.step(d) 886 if d.err != nil { 887 return d.err 888 } 889 if d.w.writeStoredHeader(0, true); d.w.err != nil { 890 return d.w.err 891 } 892 d.w.flush() 893 d.w.reset(nil) 894 return d.w.err 895 } 896 897 // NewWriter returns a new Writer compressing data at the given level. 898 // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); 899 // higher levels typically run slower but compress more. 900 // Level 0 (NoCompression) does not attempt any compression; it only adds the 901 // necessary DEFLATE framing. 902 // Level -1 (DefaultCompression) uses the default compression level. 903 // Level -2 (ConstantCompression) will use Huffman compression only, giving 904 // a very fast compression for all types of input, but sacrificing considerable 905 // compression efficiency. 906 // 907 // If level is in the range [-2, 9] then the error returned will be nil. 908 // Otherwise the error returned will be non-nil. 909 func NewWriter(w io.Writer, level int) (*Writer, error) { 910 var dw Writer 911 if err := dw.d.init(w, level); err != nil { 912 return nil, err 913 } 914 return &dw, nil 915 } 916 917 // NewWriterDict is like NewWriter but initializes the new 918 // Writer with a preset dictionary. The returned Writer behaves 919 // as if the dictionary had been written to it without producing 920 // any compressed output. The compressed data written to w 921 // can only be decompressed by a Reader initialized with the 922 // same dictionary. 923 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { 924 zw, err := NewWriter(w, level) 925 if err != nil { 926 return nil, err 927 } 928 zw.d.fillWindow(dict) 929 zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. 930 return zw, err 931 } 932 933 // A Writer takes data written to it and writes the compressed 934 // form of that data to an underlying writer (see NewWriter). 935 type Writer struct { 936 d compressor 937 dict []byte 938 } 939 940 // Write writes data to w, which will eventually write the 941 // compressed form of data to its underlying writer. 942 func (w *Writer) Write(data []byte) (n int, err error) { 943 return w.d.write(data) 944 } 945 946 // Flush flushes any pending data to the underlying writer. 947 // It is useful mainly in compressed network protocols, to ensure that 948 // a remote reader has enough data to reconstruct a packet. 949 // Flush does not return until the data has been written. 950 // Calling Flush when there is no pending data still causes the Writer 951 // to emit a sync marker of at least 4 bytes. 952 // If the underlying writer returns an error, Flush returns that error. 953 // 954 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. 955 func (w *Writer) Flush() error { 956 // For more about flushing: 957 // http://www.bolet.org/~pornin/deflate-flush.html 958 return w.d.syncFlush() 959 } 960 961 // Close flushes and closes the writer. 962 func (w *Writer) Close() error { 963 return w.d.close() 964 } 965 966 // Reset discards the writer's state and makes it equivalent to 967 // the result of NewWriter or NewWriterDict called with dst 968 // and w's level and dictionary. 969 func (w *Writer) Reset(dst io.Writer) { 970 if len(w.dict) > 0 { 971 // w was created with NewWriterDict 972 w.d.reset(dst) 973 if dst != nil { 974 w.d.fillWindow(w.dict) 975 } 976 } else { 977 // w was created with NewWriter 978 w.d.reset(dst) 979 } 980 } 981 982 // ResetDict discards the writer's state and makes it equivalent to 983 // the result of NewWriter or NewWriterDict called with dst 984 // and w's level, but sets a specific dictionary. 985 func (w *Writer) ResetDict(dst io.Writer, dict []byte) { 986 w.dict = dict 987 w.d.reset(dst) 988 w.d.fillWindow(w.dict) 989 }