huffman_bit_writer.go (30951B)
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "encoding/binary" 9 "fmt" 10 "io" 11 "math" 12 ) 13 14 const ( 15 // The largest offset code. 16 offsetCodeCount = 30 17 18 // The special code used to mark the end of a block. 19 endBlockMarker = 256 20 21 // The first length code. 22 lengthCodesStart = 257 23 24 // The number of codegen codes. 25 codegenCodeCount = 19 26 badCode = 255 27 28 // maxPredefinedTokens is the maximum number of tokens 29 // where we check if fixed size is smaller. 30 maxPredefinedTokens = 250 31 32 // bufferFlushSize indicates the buffer size 33 // after which bytes are flushed to the writer. 34 // Should preferably be a multiple of 6, since 35 // we accumulate 6 bytes between writes to the buffer. 36 bufferFlushSize = 246 37 38 // bufferSize is the actual output byte buffer size. 39 // It must have additional headroom for a flush 40 // which can contain up to 8 bytes. 41 bufferSize = bufferFlushSize + 8 42 ) 43 44 // Minimum length code that emits bits. 45 const lengthExtraBitsMinCode = 8 46 47 // The number of extra bits needed by length code X - LENGTH_CODES_START. 48 var lengthExtraBits = [32]uint8{ 49 /* 257 */ 0, 0, 0, 50 /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 51 /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 52 /* 280 */ 4, 5, 5, 5, 5, 0, 53 } 54 55 // The length indicated by length code X - LENGTH_CODES_START. 56 var lengthBase = [32]uint8{ 57 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 58 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 59 64, 80, 96, 112, 128, 160, 192, 224, 255, 60 } 61 62 // Minimum offset code that emits bits. 63 const offsetExtraBitsMinCode = 4 64 65 // offset code word extra bits. 66 var offsetExtraBits = [32]int8{ 67 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 68 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 69 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 70 /* extended window */ 71 14, 14, 72 } 73 74 var offsetCombined = [32]uint32{} 75 76 func init() { 77 var offsetBase = [32]uint32{ 78 /* normal deflate */ 79 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, 80 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, 81 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, 82 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, 83 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, 84 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, 85 86 /* extended window */ 87 0x008000, 0x00c000, 88 } 89 90 for i := range offsetCombined[:] { 91 // Don't use extended window values... 92 if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { 93 continue 94 } 95 offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) 96 } 97 } 98 99 // The odd order in which the codegen code sizes are written. 100 var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 101 102 type huffmanBitWriter struct { 103 // writer is the underlying writer. 104 // Do not use it directly; use the write method, which ensures 105 // that Write errors are sticky. 106 writer io.Writer 107 108 // Data waiting to be written is bytes[0:nbytes] 109 // and then the low nbits of bits. 110 bits uint64 111 nbits uint8 112 nbytes uint8 113 lastHuffMan bool 114 literalEncoding *huffmanEncoder 115 tmpLitEncoding *huffmanEncoder 116 offsetEncoding *huffmanEncoder 117 codegenEncoding *huffmanEncoder 118 err error 119 lastHeader int 120 // Set between 0 (reused block can be up to 2x the size) 121 logNewTablePenalty uint 122 bytes [256 + 8]byte 123 literalFreq [lengthCodesStart + 32]uint16 124 offsetFreq [32]uint16 125 codegenFreq [codegenCodeCount]uint16 126 127 // codegen must have an extra space for the final symbol. 128 codegen [literalCount + offsetCodeCount + 1]uint8 129 } 130 131 // Huffman reuse. 132 // 133 // The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. 134 // 135 // This is controlled by several variables: 136 // 137 // If lastHeader is non-zero the Huffman table can be reused. 138 // This also indicates that a Huffman table has been generated that can output all 139 // possible symbols. 140 // It also indicates that an EOB has not yet been emitted, so if a new tabel is generated 141 // an EOB with the previous table must be written. 142 // 143 // If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. 144 // 145 // An incoming block estimates the output size of a new table using a 'fresh' by calculating the 146 // optimal size and adding a penalty in 'logNewTablePenalty'. 147 // A Huffman table is not optimal, which is why we add a penalty, and generating a new table 148 // is slower both for compression and decompression. 149 150 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { 151 return &huffmanBitWriter{ 152 writer: w, 153 literalEncoding: newHuffmanEncoder(literalCount), 154 tmpLitEncoding: newHuffmanEncoder(literalCount), 155 codegenEncoding: newHuffmanEncoder(codegenCodeCount), 156 offsetEncoding: newHuffmanEncoder(offsetCodeCount), 157 } 158 } 159 160 func (w *huffmanBitWriter) reset(writer io.Writer) { 161 w.writer = writer 162 w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil 163 w.lastHeader = 0 164 w.lastHuffMan = false 165 } 166 167 func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { 168 a := t.offHist[:offsetCodeCount] 169 b := w.offsetEncoding.codes 170 b = b[:len(a)] 171 for i, v := range a { 172 if v != 0 && b[i].zero() { 173 return false 174 } 175 } 176 177 a = t.extraHist[:literalCount-256] 178 b = w.literalEncoding.codes[256:literalCount] 179 b = b[:len(a)] 180 for i, v := range a { 181 if v != 0 && b[i].zero() { 182 return false 183 } 184 } 185 186 a = t.litHist[:256] 187 b = w.literalEncoding.codes[:len(a)] 188 for i, v := range a { 189 if v != 0 && b[i].zero() { 190 return false 191 } 192 } 193 return true 194 } 195 196 func (w *huffmanBitWriter) flush() { 197 if w.err != nil { 198 w.nbits = 0 199 return 200 } 201 if w.lastHeader > 0 { 202 // We owe an EOB 203 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 204 w.lastHeader = 0 205 } 206 n := w.nbytes 207 for w.nbits != 0 { 208 w.bytes[n] = byte(w.bits) 209 w.bits >>= 8 210 if w.nbits > 8 { // Avoid underflow 211 w.nbits -= 8 212 } else { 213 w.nbits = 0 214 } 215 n++ 216 } 217 w.bits = 0 218 w.write(w.bytes[:n]) 219 w.nbytes = 0 220 } 221 222 func (w *huffmanBitWriter) write(b []byte) { 223 if w.err != nil { 224 return 225 } 226 _, w.err = w.writer.Write(b) 227 } 228 229 func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { 230 w.bits |= uint64(b) << (w.nbits & 63) 231 w.nbits += nb 232 if w.nbits >= 48 { 233 w.writeOutBits() 234 } 235 } 236 237 func (w *huffmanBitWriter) writeBytes(bytes []byte) { 238 if w.err != nil { 239 return 240 } 241 n := w.nbytes 242 if w.nbits&7 != 0 { 243 w.err = InternalError("writeBytes with unfinished bits") 244 return 245 } 246 for w.nbits != 0 { 247 w.bytes[n] = byte(w.bits) 248 w.bits >>= 8 249 w.nbits -= 8 250 n++ 251 } 252 if n != 0 { 253 w.write(w.bytes[:n]) 254 } 255 w.nbytes = 0 256 w.write(bytes) 257 } 258 259 // RFC 1951 3.2.7 specifies a special run-length encoding for specifying 260 // the literal and offset lengths arrays (which are concatenated into a single 261 // array). This method generates that run-length encoding. 262 // 263 // The result is written into the codegen array, and the frequencies 264 // of each code is written into the codegenFreq array. 265 // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional 266 // information. Code badCode is an end marker 267 // 268 // numLiterals The number of literals in literalEncoding 269 // numOffsets The number of offsets in offsetEncoding 270 // litenc, offenc The literal and offset encoder to use 271 func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { 272 for i := range w.codegenFreq { 273 w.codegenFreq[i] = 0 274 } 275 // Note that we are using codegen both as a temporary variable for holding 276 // a copy of the frequencies, and as the place where we put the result. 277 // This is fine because the output is always shorter than the input used 278 // so far. 279 codegen := w.codegen[:] // cache 280 // Copy the concatenated code sizes to codegen. Put a marker at the end. 281 cgnl := codegen[:numLiterals] 282 for i := range cgnl { 283 cgnl[i] = litEnc.codes[i].len() 284 } 285 286 cgnl = codegen[numLiterals : numLiterals+numOffsets] 287 for i := range cgnl { 288 cgnl[i] = offEnc.codes[i].len() 289 } 290 codegen[numLiterals+numOffsets] = badCode 291 292 size := codegen[0] 293 count := 1 294 outIndex := 0 295 for inIndex := 1; size != badCode; inIndex++ { 296 // INVARIANT: We have seen "count" copies of size that have not yet 297 // had output generated for them. 298 nextSize := codegen[inIndex] 299 if nextSize == size { 300 count++ 301 continue 302 } 303 // We need to generate codegen indicating "count" of size. 304 if size != 0 { 305 codegen[outIndex] = size 306 outIndex++ 307 w.codegenFreq[size]++ 308 count-- 309 for count >= 3 { 310 n := 6 311 if n > count { 312 n = count 313 } 314 codegen[outIndex] = 16 315 outIndex++ 316 codegen[outIndex] = uint8(n - 3) 317 outIndex++ 318 w.codegenFreq[16]++ 319 count -= n 320 } 321 } else { 322 for count >= 11 { 323 n := 138 324 if n > count { 325 n = count 326 } 327 codegen[outIndex] = 18 328 outIndex++ 329 codegen[outIndex] = uint8(n - 11) 330 outIndex++ 331 w.codegenFreq[18]++ 332 count -= n 333 } 334 if count >= 3 { 335 // count >= 3 && count <= 10 336 codegen[outIndex] = 17 337 outIndex++ 338 codegen[outIndex] = uint8(count - 3) 339 outIndex++ 340 w.codegenFreq[17]++ 341 count = 0 342 } 343 } 344 count-- 345 for ; count >= 0; count-- { 346 codegen[outIndex] = size 347 outIndex++ 348 w.codegenFreq[size]++ 349 } 350 // Set up invariant for next time through the loop. 351 size = nextSize 352 count = 1 353 } 354 // Marker indicating the end of the codegen. 355 codegen[outIndex] = badCode 356 } 357 358 func (w *huffmanBitWriter) codegens() int { 359 numCodegens := len(w.codegenFreq) 360 for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { 361 numCodegens-- 362 } 363 return numCodegens 364 } 365 366 func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { 367 numCodegens = len(w.codegenFreq) 368 for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { 369 numCodegens-- 370 } 371 return 3 + 5 + 5 + 4 + (3 * numCodegens) + 372 w.codegenEncoding.bitLength(w.codegenFreq[:]) + 373 int(w.codegenFreq[16])*2 + 374 int(w.codegenFreq[17])*3 + 375 int(w.codegenFreq[18])*7, numCodegens 376 } 377 378 // dynamicSize returns the size of dynamically encoded data in bits. 379 func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { 380 size = litEnc.bitLength(w.literalFreq[:]) + 381 offEnc.bitLength(w.offsetFreq[:]) 382 return size 383 } 384 385 // dynamicSize returns the size of dynamically encoded data in bits. 386 func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { 387 header, numCodegens := w.headerSize() 388 size = header + 389 litEnc.bitLength(w.literalFreq[:]) + 390 offEnc.bitLength(w.offsetFreq[:]) + 391 extraBits 392 return size, numCodegens 393 } 394 395 // extraBitSize will return the number of bits that will be written 396 // as "extra" bits on matches. 397 func (w *huffmanBitWriter) extraBitSize() int { 398 total := 0 399 for i, n := range w.literalFreq[257:literalCount] { 400 total += int(n) * int(lengthExtraBits[i&31]) 401 } 402 for i, n := range w.offsetFreq[:offsetCodeCount] { 403 total += int(n) * int(offsetExtraBits[i&31]) 404 } 405 return total 406 } 407 408 // fixedSize returns the size of dynamically encoded data in bits. 409 func (w *huffmanBitWriter) fixedSize(extraBits int) int { 410 return 3 + 411 fixedLiteralEncoding.bitLength(w.literalFreq[:]) + 412 fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + 413 extraBits 414 } 415 416 // storedSize calculates the stored size, including header. 417 // The function returns the size in bits and whether the block 418 // fits inside a single block. 419 func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { 420 if in == nil { 421 return 0, false 422 } 423 if len(in) <= maxStoreBlockSize { 424 return (len(in) + 5) * 8, true 425 } 426 return 0, false 427 } 428 429 func (w *huffmanBitWriter) writeCode(c hcode) { 430 // The function does not get inlined if we "& 63" the shift. 431 w.bits |= c.code64() << (w.nbits & 63) 432 w.nbits += c.len() 433 if w.nbits >= 48 { 434 w.writeOutBits() 435 } 436 } 437 438 // writeOutBits will write bits to the buffer. 439 func (w *huffmanBitWriter) writeOutBits() { 440 bits := w.bits 441 w.bits >>= 48 442 w.nbits -= 48 443 n := w.nbytes 444 445 // We over-write, but faster... 446 binary.LittleEndian.PutUint64(w.bytes[n:], bits) 447 n += 6 448 449 if n >= bufferFlushSize { 450 if w.err != nil { 451 n = 0 452 return 453 } 454 w.write(w.bytes[:n]) 455 n = 0 456 } 457 458 w.nbytes = n 459 } 460 461 // Write the header of a dynamic Huffman block to the output stream. 462 // 463 // numLiterals The number of literals specified in codegen 464 // numOffsets The number of offsets specified in codegen 465 // numCodegens The number of codegens used in codegen 466 func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { 467 if w.err != nil { 468 return 469 } 470 var firstBits int32 = 4 471 if isEof { 472 firstBits = 5 473 } 474 w.writeBits(firstBits, 3) 475 w.writeBits(int32(numLiterals-257), 5) 476 w.writeBits(int32(numOffsets-1), 5) 477 w.writeBits(int32(numCodegens-4), 4) 478 479 for i := 0; i < numCodegens; i++ { 480 value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) 481 w.writeBits(int32(value), 3) 482 } 483 484 i := 0 485 for { 486 var codeWord = uint32(w.codegen[i]) 487 i++ 488 if codeWord == badCode { 489 break 490 } 491 w.writeCode(w.codegenEncoding.codes[codeWord]) 492 493 switch codeWord { 494 case 16: 495 w.writeBits(int32(w.codegen[i]), 2) 496 i++ 497 case 17: 498 w.writeBits(int32(w.codegen[i]), 3) 499 i++ 500 case 18: 501 w.writeBits(int32(w.codegen[i]), 7) 502 i++ 503 } 504 } 505 } 506 507 // writeStoredHeader will write a stored header. 508 // If the stored block is only used for EOF, 509 // it is replaced with a fixed huffman block. 510 func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { 511 if w.err != nil { 512 return 513 } 514 if w.lastHeader > 0 { 515 // We owe an EOB 516 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 517 w.lastHeader = 0 518 } 519 520 // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. 521 if length == 0 && isEof { 522 w.writeFixedHeader(isEof) 523 // EOB: 7 bits, value: 0 524 w.writeBits(0, 7) 525 w.flush() 526 return 527 } 528 529 var flag int32 530 if isEof { 531 flag = 1 532 } 533 w.writeBits(flag, 3) 534 w.flush() 535 w.writeBits(int32(length), 16) 536 w.writeBits(int32(^uint16(length)), 16) 537 } 538 539 func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { 540 if w.err != nil { 541 return 542 } 543 if w.lastHeader > 0 { 544 // We owe an EOB 545 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 546 w.lastHeader = 0 547 } 548 549 // Indicate that we are a fixed Huffman block 550 var value int32 = 2 551 if isEof { 552 value = 3 553 } 554 w.writeBits(value, 3) 555 } 556 557 // writeBlock will write a block of tokens with the smallest encoding. 558 // The original input can be supplied, and if the huffman encoded data 559 // is larger than the original bytes, the data will be written as a 560 // stored block. 561 // If the input is nil, the tokens will always be Huffman encoded. 562 func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { 563 if w.err != nil { 564 return 565 } 566 567 tokens.AddEOB() 568 if w.lastHeader > 0 { 569 // We owe an EOB 570 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 571 w.lastHeader = 0 572 } 573 numLiterals, numOffsets := w.indexTokens(tokens, false) 574 w.generate() 575 var extraBits int 576 storedSize, storable := w.storedSize(input) 577 if storable { 578 extraBits = w.extraBitSize() 579 } 580 581 // Figure out smallest code. 582 // Fixed Huffman baseline. 583 var literalEncoding = fixedLiteralEncoding 584 var offsetEncoding = fixedOffsetEncoding 585 var size = math.MaxInt32 586 if tokens.n < maxPredefinedTokens { 587 size = w.fixedSize(extraBits) 588 } 589 590 // Dynamic Huffman? 591 var numCodegens int 592 593 // Generate codegen and codegenFrequencies, which indicates how to encode 594 // the literalEncoding and the offsetEncoding. 595 w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) 596 w.codegenEncoding.generate(w.codegenFreq[:], 7) 597 dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) 598 599 if dynamicSize < size { 600 size = dynamicSize 601 literalEncoding = w.literalEncoding 602 offsetEncoding = w.offsetEncoding 603 } 604 605 // Stored bytes? 606 if storable && storedSize <= size { 607 w.writeStoredHeader(len(input), eof) 608 w.writeBytes(input) 609 return 610 } 611 612 // Huffman. 613 if literalEncoding == fixedLiteralEncoding { 614 w.writeFixedHeader(eof) 615 } else { 616 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) 617 } 618 619 // Write the tokens. 620 w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) 621 } 622 623 // writeBlockDynamic encodes a block using a dynamic Huffman table. 624 // This should be used if the symbols used have a disproportionate 625 // histogram distribution. 626 // If input is supplied and the compression savings are below 1/16th of the 627 // input size the block is stored. 628 func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { 629 if w.err != nil { 630 return 631 } 632 633 sync = sync || eof 634 if sync { 635 tokens.AddEOB() 636 } 637 638 // We cannot reuse pure huffman table, and must mark as EOF. 639 if (w.lastHuffMan || eof) && w.lastHeader > 0 { 640 // We will not try to reuse. 641 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 642 w.lastHeader = 0 643 w.lastHuffMan = false 644 } 645 646 // fillReuse enables filling of empty values. 647 // This will make encodings always reusable without testing. 648 // However, this does not appear to benefit on most cases. 649 const fillReuse = false 650 651 // Check if we can reuse... 652 if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { 653 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 654 w.lastHeader = 0 655 } 656 657 numLiterals, numOffsets := w.indexTokens(tokens, !sync) 658 extraBits := 0 659 ssize, storable := w.storedSize(input) 660 661 const usePrefs = true 662 if storable || w.lastHeader > 0 { 663 extraBits = w.extraBitSize() 664 } 665 666 var size int 667 668 // Check if we should reuse. 669 if w.lastHeader > 0 { 670 // Estimate size for using a new table. 671 // Use the previous header size as the best estimate. 672 newSize := w.lastHeader + tokens.EstimatedBits() 673 newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty 674 675 // The estimated size is calculated as an optimal table. 676 // We add a penalty to make it more realistic and re-use a bit more. 677 reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits 678 679 // Check if a new table is better. 680 if newSize < reuseSize { 681 // Write the EOB we owe. 682 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 683 size = newSize 684 w.lastHeader = 0 685 } else { 686 size = reuseSize 687 } 688 689 if tokens.n < maxPredefinedTokens { 690 if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { 691 // Check if we get a reasonable size decrease. 692 if storable && ssize <= size { 693 w.writeStoredHeader(len(input), eof) 694 w.writeBytes(input) 695 return 696 } 697 w.writeFixedHeader(eof) 698 if !sync { 699 tokens.AddEOB() 700 } 701 w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) 702 return 703 } 704 } 705 // Check if we get a reasonable size decrease. 706 if storable && ssize <= size { 707 w.writeStoredHeader(len(input), eof) 708 w.writeBytes(input) 709 return 710 } 711 } 712 713 // We want a new block/table 714 if w.lastHeader == 0 { 715 if fillReuse && !sync { 716 w.fillTokens() 717 numLiterals, numOffsets = maxNumLit, maxNumDist 718 } else { 719 w.literalFreq[endBlockMarker] = 1 720 } 721 722 w.generate() 723 // Generate codegen and codegenFrequencies, which indicates how to encode 724 // the literalEncoding and the offsetEncoding. 725 w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) 726 w.codegenEncoding.generate(w.codegenFreq[:], 7) 727 728 var numCodegens int 729 if fillReuse && !sync { 730 // Reindex for accurate size... 731 w.indexTokens(tokens, true) 732 } 733 size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) 734 735 // Store predefined, if we don't get a reasonable improvement. 736 if tokens.n < maxPredefinedTokens { 737 if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { 738 // Store bytes, if we don't get an improvement. 739 if storable && ssize <= preSize { 740 w.writeStoredHeader(len(input), eof) 741 w.writeBytes(input) 742 return 743 } 744 w.writeFixedHeader(eof) 745 if !sync { 746 tokens.AddEOB() 747 } 748 w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) 749 return 750 } 751 } 752 753 if storable && ssize <= size { 754 // Store bytes, if we don't get an improvement. 755 w.writeStoredHeader(len(input), eof) 756 w.writeBytes(input) 757 return 758 } 759 760 // Write Huffman table. 761 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) 762 if !sync { 763 w.lastHeader, _ = w.headerSize() 764 } 765 w.lastHuffMan = false 766 } 767 768 if sync { 769 w.lastHeader = 0 770 } 771 // Write the tokens. 772 w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) 773 } 774 775 func (w *huffmanBitWriter) fillTokens() { 776 for i, v := range w.literalFreq[:literalCount] { 777 if v == 0 { 778 w.literalFreq[i] = 1 779 } 780 } 781 for i, v := range w.offsetFreq[:offsetCodeCount] { 782 if v == 0 { 783 w.offsetFreq[i] = 1 784 } 785 } 786 } 787 788 // indexTokens indexes a slice of tokens, and updates 789 // literalFreq and offsetFreq, and generates literalEncoding 790 // and offsetEncoding. 791 // The number of literal and offset tokens is returned. 792 func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { 793 //copy(w.literalFreq[:], t.litHist[:]) 794 *(*[256]uint16)(w.literalFreq[:]) = t.litHist 795 //copy(w.literalFreq[256:], t.extraHist[:]) 796 *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist 797 w.offsetFreq = t.offHist 798 799 if t.n == 0 { 800 return 801 } 802 if filled { 803 return maxNumLit, maxNumDist 804 } 805 // get the number of literals 806 numLiterals = len(w.literalFreq) 807 for w.literalFreq[numLiterals-1] == 0 { 808 numLiterals-- 809 } 810 // get the number of offsets 811 numOffsets = len(w.offsetFreq) 812 for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { 813 numOffsets-- 814 } 815 if numOffsets == 0 { 816 // We haven't found a single match. If we want to go with the dynamic encoding, 817 // we should count at least one offset to be sure that the offset huffman tree could be encoded. 818 w.offsetFreq[0] = 1 819 numOffsets = 1 820 } 821 return 822 } 823 824 func (w *huffmanBitWriter) generate() { 825 w.literalEncoding.generate(w.literalFreq[:literalCount], 15) 826 w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) 827 } 828 829 // writeTokens writes a slice of tokens to the output. 830 // codes for literal and offset encoding must be supplied. 831 func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { 832 if w.err != nil { 833 return 834 } 835 if len(tokens) == 0 { 836 return 837 } 838 839 // Only last token should be endBlockMarker. 840 var deferEOB bool 841 if tokens[len(tokens)-1] == endBlockMarker { 842 tokens = tokens[:len(tokens)-1] 843 deferEOB = true 844 } 845 846 // Create slices up to the next power of two to avoid bounds checks. 847 lits := leCodes[:256] 848 offs := oeCodes[:32] 849 lengths := leCodes[lengthCodesStart:] 850 lengths = lengths[:32] 851 852 // Go 1.16 LOVES having these on stack. 853 bits, nbits, nbytes := w.bits, w.nbits, w.nbytes 854 855 for _, t := range tokens { 856 if t < 256 { 857 //w.writeCode(lits[t.literal()]) 858 c := lits[t] 859 bits |= c.code64() << (nbits & 63) 860 nbits += c.len() 861 if nbits >= 48 { 862 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 863 //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits 864 bits >>= 48 865 nbits -= 48 866 nbytes += 6 867 if nbytes >= bufferFlushSize { 868 if w.err != nil { 869 nbytes = 0 870 return 871 } 872 _, w.err = w.writer.Write(w.bytes[:nbytes]) 873 nbytes = 0 874 } 875 } 876 continue 877 } 878 879 // Write the length 880 length := t.length() 881 lengthCode := lengthCode(length) & 31 882 if false { 883 w.writeCode(lengths[lengthCode]) 884 } else { 885 // inlined 886 c := lengths[lengthCode] 887 bits |= c.code64() << (nbits & 63) 888 nbits += c.len() 889 if nbits >= 48 { 890 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 891 //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits 892 bits >>= 48 893 nbits -= 48 894 nbytes += 6 895 if nbytes >= bufferFlushSize { 896 if w.err != nil { 897 nbytes = 0 898 return 899 } 900 _, w.err = w.writer.Write(w.bytes[:nbytes]) 901 nbytes = 0 902 } 903 } 904 } 905 906 if lengthCode >= lengthExtraBitsMinCode { 907 extraLengthBits := lengthExtraBits[lengthCode] 908 //w.writeBits(extraLength, extraLengthBits) 909 extraLength := int32(length - lengthBase[lengthCode]) 910 bits |= uint64(extraLength) << (nbits & 63) 911 nbits += extraLengthBits 912 if nbits >= 48 { 913 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 914 //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits 915 bits >>= 48 916 nbits -= 48 917 nbytes += 6 918 if nbytes >= bufferFlushSize { 919 if w.err != nil { 920 nbytes = 0 921 return 922 } 923 _, w.err = w.writer.Write(w.bytes[:nbytes]) 924 nbytes = 0 925 } 926 } 927 } 928 // Write the offset 929 offset := t.offset() 930 offsetCode := (offset >> 16) & 31 931 if false { 932 w.writeCode(offs[offsetCode]) 933 } else { 934 // inlined 935 c := offs[offsetCode] 936 bits |= c.code64() << (nbits & 63) 937 nbits += c.len() 938 if nbits >= 48 { 939 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 940 //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits 941 bits >>= 48 942 nbits -= 48 943 nbytes += 6 944 if nbytes >= bufferFlushSize { 945 if w.err != nil { 946 nbytes = 0 947 return 948 } 949 _, w.err = w.writer.Write(w.bytes[:nbytes]) 950 nbytes = 0 951 } 952 } 953 } 954 955 if offsetCode >= offsetExtraBitsMinCode { 956 offsetComb := offsetCombined[offsetCode] 957 //w.writeBits(extraOffset, extraOffsetBits) 958 bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) 959 nbits += uint8(offsetComb) 960 if nbits >= 48 { 961 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 962 //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits 963 bits >>= 48 964 nbits -= 48 965 nbytes += 6 966 if nbytes >= bufferFlushSize { 967 if w.err != nil { 968 nbytes = 0 969 return 970 } 971 _, w.err = w.writer.Write(w.bytes[:nbytes]) 972 nbytes = 0 973 } 974 } 975 } 976 } 977 // Restore... 978 w.bits, w.nbits, w.nbytes = bits, nbits, nbytes 979 980 if deferEOB { 981 w.writeCode(leCodes[endBlockMarker]) 982 } 983 } 984 985 // huffOffset is a static offset encoder used for huffman only encoding. 986 // It can be reused since we will not be encoding offset values. 987 var huffOffset *huffmanEncoder 988 989 func init() { 990 w := newHuffmanBitWriter(nil) 991 w.offsetFreq[0] = 1 992 huffOffset = newHuffmanEncoder(offsetCodeCount) 993 huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) 994 } 995 996 // writeBlockHuff encodes a block of bytes as either 997 // Huffman encoded literals or uncompressed bytes if the 998 // results only gains very little from compression. 999 func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { 1000 if w.err != nil { 1001 return 1002 } 1003 1004 // Clear histogram 1005 for i := range w.literalFreq[:] { 1006 w.literalFreq[i] = 0 1007 } 1008 if !w.lastHuffMan { 1009 for i := range w.offsetFreq[:] { 1010 w.offsetFreq[i] = 0 1011 } 1012 } 1013 1014 const numLiterals = endBlockMarker + 1 1015 const numOffsets = 1 1016 1017 // Add everything as literals 1018 // We have to estimate the header size. 1019 // Assume header is around 70 bytes: 1020 // https://stackoverflow.com/a/25454430 1021 const guessHeaderSizeBits = 70 * 8 1022 histogram(input, w.literalFreq[:numLiterals]) 1023 ssize, storable := w.storedSize(input) 1024 if storable && len(input) > 1024 { 1025 // Quick check for incompressible content. 1026 abs := float64(0) 1027 avg := float64(len(input)) / 256 1028 max := float64(len(input) * 2) 1029 for _, v := range w.literalFreq[:256] { 1030 diff := float64(v) - avg 1031 abs += diff * diff 1032 if abs > max { 1033 break 1034 } 1035 } 1036 if abs < max { 1037 if debugDeflate { 1038 fmt.Println("stored", abs, "<", max) 1039 } 1040 // No chance we can compress this... 1041 w.writeStoredHeader(len(input), eof) 1042 w.writeBytes(input) 1043 return 1044 } 1045 } 1046 w.literalFreq[endBlockMarker] = 1 1047 w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) 1048 estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) 1049 if estBits < math.MaxInt32 { 1050 estBits += w.lastHeader 1051 if w.lastHeader == 0 { 1052 estBits += guessHeaderSizeBits 1053 } 1054 estBits += estBits >> w.logNewTablePenalty 1055 } 1056 1057 // Store bytes, if we don't get a reasonable improvement. 1058 if storable && ssize <= estBits { 1059 if debugDeflate { 1060 fmt.Println("stored,", ssize, "<=", estBits) 1061 } 1062 w.writeStoredHeader(len(input), eof) 1063 w.writeBytes(input) 1064 return 1065 } 1066 1067 if w.lastHeader > 0 { 1068 reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) 1069 1070 if estBits < reuseSize { 1071 if debugDeflate { 1072 fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") 1073 } 1074 // We owe an EOB 1075 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 1076 w.lastHeader = 0 1077 } else if debugDeflate { 1078 fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) 1079 } 1080 } 1081 1082 count := 0 1083 if w.lastHeader == 0 { 1084 // Use the temp encoding, so swap. 1085 w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding 1086 // Generate codegen and codegenFrequencies, which indicates how to encode 1087 // the literalEncoding and the offsetEncoding. 1088 w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) 1089 w.codegenEncoding.generate(w.codegenFreq[:], 7) 1090 numCodegens := w.codegens() 1091 1092 // Huffman. 1093 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) 1094 w.lastHuffMan = true 1095 w.lastHeader, _ = w.headerSize() 1096 if debugDeflate { 1097 count += w.lastHeader 1098 fmt.Println("header:", count/8) 1099 } 1100 } 1101 1102 encoding := w.literalEncoding.codes[:256] 1103 // Go 1.16 LOVES having these on stack. At least 1.5x the speed. 1104 bits, nbits, nbytes := w.bits, w.nbits, w.nbytes 1105 1106 if debugDeflate { 1107 count -= int(nbytes)*8 + int(nbits) 1108 } 1109 // Unroll, write 3 codes/loop. 1110 // Fastest number of unrolls. 1111 for len(input) > 3 { 1112 // We must have at least 48 bits free. 1113 if nbits >= 8 { 1114 n := nbits >> 3 1115 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 1116 bits >>= (n * 8) & 63 1117 nbits -= n * 8 1118 nbytes += n 1119 } 1120 if nbytes >= bufferFlushSize { 1121 if w.err != nil { 1122 nbytes = 0 1123 return 1124 } 1125 if debugDeflate { 1126 count += int(nbytes) * 8 1127 } 1128 _, w.err = w.writer.Write(w.bytes[:nbytes]) 1129 nbytes = 0 1130 } 1131 a, b := encoding[input[0]], encoding[input[1]] 1132 bits |= a.code64() << (nbits & 63) 1133 bits |= b.code64() << ((nbits + a.len()) & 63) 1134 c := encoding[input[2]] 1135 nbits += b.len() + a.len() 1136 bits |= c.code64() << (nbits & 63) 1137 nbits += c.len() 1138 input = input[3:] 1139 } 1140 1141 // Remaining... 1142 for _, t := range input { 1143 if nbits >= 48 { 1144 binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) 1145 //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits 1146 bits >>= 48 1147 nbits -= 48 1148 nbytes += 6 1149 if nbytes >= bufferFlushSize { 1150 if w.err != nil { 1151 nbytes = 0 1152 return 1153 } 1154 if debugDeflate { 1155 count += int(nbytes) * 8 1156 } 1157 _, w.err = w.writer.Write(w.bytes[:nbytes]) 1158 nbytes = 0 1159 } 1160 } 1161 // Bitwriting inlined, ~30% speedup 1162 c := encoding[t] 1163 bits |= c.code64() << (nbits & 63) 1164 1165 nbits += c.len() 1166 if debugDeflate { 1167 count += int(c.len()) 1168 } 1169 } 1170 // Restore... 1171 w.bits, w.nbits, w.nbytes = bits, nbits, nbytes 1172 1173 if debugDeflate { 1174 nb := count + int(nbytes)*8 + int(nbits) 1175 fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") 1176 } 1177 // Flush if needed to have space. 1178 if w.nbits >= 48 { 1179 w.writeOutBits() 1180 } 1181 1182 if eof || sync { 1183 w.writeCode(w.literalEncoding.codes[endBlockMarker]) 1184 w.lastHeader = 0 1185 w.lastHuffMan = false 1186 } 1187 }