gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

huffman_bit_writer.go (30951B)


      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package flate
      6 
      7 import (
      8 	"encoding/binary"
      9 	"fmt"
     10 	"io"
     11 	"math"
     12 )
     13 
     14 const (
     15 	// The largest offset code.
     16 	offsetCodeCount = 30
     17 
     18 	// The special code used to mark the end of a block.
     19 	endBlockMarker = 256
     20 
     21 	// The first length code.
     22 	lengthCodesStart = 257
     23 
     24 	// The number of codegen codes.
     25 	codegenCodeCount = 19
     26 	badCode          = 255
     27 
     28 	// maxPredefinedTokens is the maximum number of tokens
     29 	// where we check if fixed size is smaller.
     30 	maxPredefinedTokens = 250
     31 
     32 	// bufferFlushSize indicates the buffer size
     33 	// after which bytes are flushed to the writer.
     34 	// Should preferably be a multiple of 6, since
     35 	// we accumulate 6 bytes between writes to the buffer.
     36 	bufferFlushSize = 246
     37 
     38 	// bufferSize is the actual output byte buffer size.
     39 	// It must have additional headroom for a flush
     40 	// which can contain up to 8 bytes.
     41 	bufferSize = bufferFlushSize + 8
     42 )
     43 
     44 // Minimum length code that emits bits.
     45 const lengthExtraBitsMinCode = 8
     46 
     47 // The number of extra bits needed by length code X - LENGTH_CODES_START.
     48 var lengthExtraBits = [32]uint8{
     49 	/* 257 */ 0, 0, 0,
     50 	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
     51 	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
     52 	/* 280 */ 4, 5, 5, 5, 5, 0,
     53 }
     54 
     55 // The length indicated by length code X - LENGTH_CODES_START.
     56 var lengthBase = [32]uint8{
     57 	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
     58 	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
     59 	64, 80, 96, 112, 128, 160, 192, 224, 255,
     60 }
     61 
     62 // Minimum offset code that emits bits.
     63 const offsetExtraBitsMinCode = 4
     64 
     65 // offset code word extra bits.
     66 var offsetExtraBits = [32]int8{
     67 	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
     68 	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
     69 	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
     70 	/* extended window */
     71 	14, 14,
     72 }
     73 
     74 var offsetCombined = [32]uint32{}
     75 
     76 func init() {
     77 	var offsetBase = [32]uint32{
     78 		/* normal deflate */
     79 		0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
     80 		0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
     81 		0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
     82 		0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
     83 		0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
     84 		0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
     85 
     86 		/* extended window */
     87 		0x008000, 0x00c000,
     88 	}
     89 
     90 	for i := range offsetCombined[:] {
     91 		// Don't use extended window values...
     92 		if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
     93 			continue
     94 		}
     95 		offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
     96 	}
     97 }
     98 
     99 // The odd order in which the codegen code sizes are written.
    100 var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
    101 
    102 type huffmanBitWriter struct {
    103 	// writer is the underlying writer.
    104 	// Do not use it directly; use the write method, which ensures
    105 	// that Write errors are sticky.
    106 	writer io.Writer
    107 
    108 	// Data waiting to be written is bytes[0:nbytes]
    109 	// and then the low nbits of bits.
    110 	bits            uint64
    111 	nbits           uint8
    112 	nbytes          uint8
    113 	lastHuffMan     bool
    114 	literalEncoding *huffmanEncoder
    115 	tmpLitEncoding  *huffmanEncoder
    116 	offsetEncoding  *huffmanEncoder
    117 	codegenEncoding *huffmanEncoder
    118 	err             error
    119 	lastHeader      int
    120 	// Set between 0 (reused block can be up to 2x the size)
    121 	logNewTablePenalty uint
    122 	bytes              [256 + 8]byte
    123 	literalFreq        [lengthCodesStart + 32]uint16
    124 	offsetFreq         [32]uint16
    125 	codegenFreq        [codegenCodeCount]uint16
    126 
    127 	// codegen must have an extra space for the final symbol.
    128 	codegen [literalCount + offsetCodeCount + 1]uint8
    129 }
    130 
    131 // Huffman reuse.
    132 //
    133 // The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
    134 //
    135 // This is controlled by several variables:
    136 //
    137 // If lastHeader is non-zero the Huffman table can be reused.
    138 // This also indicates that a Huffman table has been generated that can output all
    139 // possible symbols.
    140 // It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
    141 // an EOB with the previous table must be written.
    142 //
    143 // If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
    144 //
    145 // An incoming block estimates the output size of a new table using a 'fresh' by calculating the
    146 // optimal size and adding a penalty in 'logNewTablePenalty'.
    147 // A Huffman table is not optimal, which is why we add a penalty, and generating a new table
    148 // is slower both for compression and decompression.
    149 
    150 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
    151 	return &huffmanBitWriter{
    152 		writer:          w,
    153 		literalEncoding: newHuffmanEncoder(literalCount),
    154 		tmpLitEncoding:  newHuffmanEncoder(literalCount),
    155 		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
    156 		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
    157 	}
    158 }
    159 
    160 func (w *huffmanBitWriter) reset(writer io.Writer) {
    161 	w.writer = writer
    162 	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
    163 	w.lastHeader = 0
    164 	w.lastHuffMan = false
    165 }
    166 
    167 func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
    168 	a := t.offHist[:offsetCodeCount]
    169 	b := w.offsetEncoding.codes
    170 	b = b[:len(a)]
    171 	for i, v := range a {
    172 		if v != 0 && b[i].zero() {
    173 			return false
    174 		}
    175 	}
    176 
    177 	a = t.extraHist[:literalCount-256]
    178 	b = w.literalEncoding.codes[256:literalCount]
    179 	b = b[:len(a)]
    180 	for i, v := range a {
    181 		if v != 0 && b[i].zero() {
    182 			return false
    183 		}
    184 	}
    185 
    186 	a = t.litHist[:256]
    187 	b = w.literalEncoding.codes[:len(a)]
    188 	for i, v := range a {
    189 		if v != 0 && b[i].zero() {
    190 			return false
    191 		}
    192 	}
    193 	return true
    194 }
    195 
    196 func (w *huffmanBitWriter) flush() {
    197 	if w.err != nil {
    198 		w.nbits = 0
    199 		return
    200 	}
    201 	if w.lastHeader > 0 {
    202 		// We owe an EOB
    203 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
    204 		w.lastHeader = 0
    205 	}
    206 	n := w.nbytes
    207 	for w.nbits != 0 {
    208 		w.bytes[n] = byte(w.bits)
    209 		w.bits >>= 8
    210 		if w.nbits > 8 { // Avoid underflow
    211 			w.nbits -= 8
    212 		} else {
    213 			w.nbits = 0
    214 		}
    215 		n++
    216 	}
    217 	w.bits = 0
    218 	w.write(w.bytes[:n])
    219 	w.nbytes = 0
    220 }
    221 
    222 func (w *huffmanBitWriter) write(b []byte) {
    223 	if w.err != nil {
    224 		return
    225 	}
    226 	_, w.err = w.writer.Write(b)
    227 }
    228 
    229 func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
    230 	w.bits |= uint64(b) << (w.nbits & 63)
    231 	w.nbits += nb
    232 	if w.nbits >= 48 {
    233 		w.writeOutBits()
    234 	}
    235 }
    236 
    237 func (w *huffmanBitWriter) writeBytes(bytes []byte) {
    238 	if w.err != nil {
    239 		return
    240 	}
    241 	n := w.nbytes
    242 	if w.nbits&7 != 0 {
    243 		w.err = InternalError("writeBytes with unfinished bits")
    244 		return
    245 	}
    246 	for w.nbits != 0 {
    247 		w.bytes[n] = byte(w.bits)
    248 		w.bits >>= 8
    249 		w.nbits -= 8
    250 		n++
    251 	}
    252 	if n != 0 {
    253 		w.write(w.bytes[:n])
    254 	}
    255 	w.nbytes = 0
    256 	w.write(bytes)
    257 }
    258 
    259 // RFC 1951 3.2.7 specifies a special run-length encoding for specifying
    260 // the literal and offset lengths arrays (which are concatenated into a single
    261 // array).  This method generates that run-length encoding.
    262 //
    263 // The result is written into the codegen array, and the frequencies
    264 // of each code is written into the codegenFreq array.
    265 // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
    266 // information. Code badCode is an end marker
    267 //
    268 //	numLiterals      The number of literals in literalEncoding
    269 //	numOffsets       The number of offsets in offsetEncoding
    270 //	litenc, offenc   The literal and offset encoder to use
    271 func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
    272 	for i := range w.codegenFreq {
    273 		w.codegenFreq[i] = 0
    274 	}
    275 	// Note that we are using codegen both as a temporary variable for holding
    276 	// a copy of the frequencies, and as the place where we put the result.
    277 	// This is fine because the output is always shorter than the input used
    278 	// so far.
    279 	codegen := w.codegen[:] // cache
    280 	// Copy the concatenated code sizes to codegen. Put a marker at the end.
    281 	cgnl := codegen[:numLiterals]
    282 	for i := range cgnl {
    283 		cgnl[i] = litEnc.codes[i].len()
    284 	}
    285 
    286 	cgnl = codegen[numLiterals : numLiterals+numOffsets]
    287 	for i := range cgnl {
    288 		cgnl[i] = offEnc.codes[i].len()
    289 	}
    290 	codegen[numLiterals+numOffsets] = badCode
    291 
    292 	size := codegen[0]
    293 	count := 1
    294 	outIndex := 0
    295 	for inIndex := 1; size != badCode; inIndex++ {
    296 		// INVARIANT: We have seen "count" copies of size that have not yet
    297 		// had output generated for them.
    298 		nextSize := codegen[inIndex]
    299 		if nextSize == size {
    300 			count++
    301 			continue
    302 		}
    303 		// We need to generate codegen indicating "count" of size.
    304 		if size != 0 {
    305 			codegen[outIndex] = size
    306 			outIndex++
    307 			w.codegenFreq[size]++
    308 			count--
    309 			for count >= 3 {
    310 				n := 6
    311 				if n > count {
    312 					n = count
    313 				}
    314 				codegen[outIndex] = 16
    315 				outIndex++
    316 				codegen[outIndex] = uint8(n - 3)
    317 				outIndex++
    318 				w.codegenFreq[16]++
    319 				count -= n
    320 			}
    321 		} else {
    322 			for count >= 11 {
    323 				n := 138
    324 				if n > count {
    325 					n = count
    326 				}
    327 				codegen[outIndex] = 18
    328 				outIndex++
    329 				codegen[outIndex] = uint8(n - 11)
    330 				outIndex++
    331 				w.codegenFreq[18]++
    332 				count -= n
    333 			}
    334 			if count >= 3 {
    335 				// count >= 3 && count <= 10
    336 				codegen[outIndex] = 17
    337 				outIndex++
    338 				codegen[outIndex] = uint8(count - 3)
    339 				outIndex++
    340 				w.codegenFreq[17]++
    341 				count = 0
    342 			}
    343 		}
    344 		count--
    345 		for ; count >= 0; count-- {
    346 			codegen[outIndex] = size
    347 			outIndex++
    348 			w.codegenFreq[size]++
    349 		}
    350 		// Set up invariant for next time through the loop.
    351 		size = nextSize
    352 		count = 1
    353 	}
    354 	// Marker indicating the end of the codegen.
    355 	codegen[outIndex] = badCode
    356 }
    357 
    358 func (w *huffmanBitWriter) codegens() int {
    359 	numCodegens := len(w.codegenFreq)
    360 	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
    361 		numCodegens--
    362 	}
    363 	return numCodegens
    364 }
    365 
    366 func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
    367 	numCodegens = len(w.codegenFreq)
    368 	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
    369 		numCodegens--
    370 	}
    371 	return 3 + 5 + 5 + 4 + (3 * numCodegens) +
    372 		w.codegenEncoding.bitLength(w.codegenFreq[:]) +
    373 		int(w.codegenFreq[16])*2 +
    374 		int(w.codegenFreq[17])*3 +
    375 		int(w.codegenFreq[18])*7, numCodegens
    376 }
    377 
    378 // dynamicSize returns the size of dynamically encoded data in bits.
    379 func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
    380 	size = litEnc.bitLength(w.literalFreq[:]) +
    381 		offEnc.bitLength(w.offsetFreq[:])
    382 	return size
    383 }
    384 
    385 // dynamicSize returns the size of dynamically encoded data in bits.
    386 func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
    387 	header, numCodegens := w.headerSize()
    388 	size = header +
    389 		litEnc.bitLength(w.literalFreq[:]) +
    390 		offEnc.bitLength(w.offsetFreq[:]) +
    391 		extraBits
    392 	return size, numCodegens
    393 }
    394 
    395 // extraBitSize will return the number of bits that will be written
    396 // as "extra" bits on matches.
    397 func (w *huffmanBitWriter) extraBitSize() int {
    398 	total := 0
    399 	for i, n := range w.literalFreq[257:literalCount] {
    400 		total += int(n) * int(lengthExtraBits[i&31])
    401 	}
    402 	for i, n := range w.offsetFreq[:offsetCodeCount] {
    403 		total += int(n) * int(offsetExtraBits[i&31])
    404 	}
    405 	return total
    406 }
    407 
    408 // fixedSize returns the size of dynamically encoded data in bits.
    409 func (w *huffmanBitWriter) fixedSize(extraBits int) int {
    410 	return 3 +
    411 		fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
    412 		fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
    413 		extraBits
    414 }
    415 
    416 // storedSize calculates the stored size, including header.
    417 // The function returns the size in bits and whether the block
    418 // fits inside a single block.
    419 func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
    420 	if in == nil {
    421 		return 0, false
    422 	}
    423 	if len(in) <= maxStoreBlockSize {
    424 		return (len(in) + 5) * 8, true
    425 	}
    426 	return 0, false
    427 }
    428 
    429 func (w *huffmanBitWriter) writeCode(c hcode) {
    430 	// The function does not get inlined if we "& 63" the shift.
    431 	w.bits |= c.code64() << (w.nbits & 63)
    432 	w.nbits += c.len()
    433 	if w.nbits >= 48 {
    434 		w.writeOutBits()
    435 	}
    436 }
    437 
    438 // writeOutBits will write bits to the buffer.
    439 func (w *huffmanBitWriter) writeOutBits() {
    440 	bits := w.bits
    441 	w.bits >>= 48
    442 	w.nbits -= 48
    443 	n := w.nbytes
    444 
    445 	// We over-write, but faster...
    446 	binary.LittleEndian.PutUint64(w.bytes[n:], bits)
    447 	n += 6
    448 
    449 	if n >= bufferFlushSize {
    450 		if w.err != nil {
    451 			n = 0
    452 			return
    453 		}
    454 		w.write(w.bytes[:n])
    455 		n = 0
    456 	}
    457 
    458 	w.nbytes = n
    459 }
    460 
    461 // Write the header of a dynamic Huffman block to the output stream.
    462 //
    463 //	numLiterals  The number of literals specified in codegen
    464 //	numOffsets   The number of offsets specified in codegen
    465 //	numCodegens  The number of codegens used in codegen
    466 func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
    467 	if w.err != nil {
    468 		return
    469 	}
    470 	var firstBits int32 = 4
    471 	if isEof {
    472 		firstBits = 5
    473 	}
    474 	w.writeBits(firstBits, 3)
    475 	w.writeBits(int32(numLiterals-257), 5)
    476 	w.writeBits(int32(numOffsets-1), 5)
    477 	w.writeBits(int32(numCodegens-4), 4)
    478 
    479 	for i := 0; i < numCodegens; i++ {
    480 		value := uint(w.codegenEncoding.codes[codegenOrder[i]].len())
    481 		w.writeBits(int32(value), 3)
    482 	}
    483 
    484 	i := 0
    485 	for {
    486 		var codeWord = uint32(w.codegen[i])
    487 		i++
    488 		if codeWord == badCode {
    489 			break
    490 		}
    491 		w.writeCode(w.codegenEncoding.codes[codeWord])
    492 
    493 		switch codeWord {
    494 		case 16:
    495 			w.writeBits(int32(w.codegen[i]), 2)
    496 			i++
    497 		case 17:
    498 			w.writeBits(int32(w.codegen[i]), 3)
    499 			i++
    500 		case 18:
    501 			w.writeBits(int32(w.codegen[i]), 7)
    502 			i++
    503 		}
    504 	}
    505 }
    506 
    507 // writeStoredHeader will write a stored header.
    508 // If the stored block is only used for EOF,
    509 // it is replaced with a fixed huffman block.
    510 func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
    511 	if w.err != nil {
    512 		return
    513 	}
    514 	if w.lastHeader > 0 {
    515 		// We owe an EOB
    516 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
    517 		w.lastHeader = 0
    518 	}
    519 
    520 	// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
    521 	if length == 0 && isEof {
    522 		w.writeFixedHeader(isEof)
    523 		// EOB: 7 bits, value: 0
    524 		w.writeBits(0, 7)
    525 		w.flush()
    526 		return
    527 	}
    528 
    529 	var flag int32
    530 	if isEof {
    531 		flag = 1
    532 	}
    533 	w.writeBits(flag, 3)
    534 	w.flush()
    535 	w.writeBits(int32(length), 16)
    536 	w.writeBits(int32(^uint16(length)), 16)
    537 }
    538 
    539 func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
    540 	if w.err != nil {
    541 		return
    542 	}
    543 	if w.lastHeader > 0 {
    544 		// We owe an EOB
    545 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
    546 		w.lastHeader = 0
    547 	}
    548 
    549 	// Indicate that we are a fixed Huffman block
    550 	var value int32 = 2
    551 	if isEof {
    552 		value = 3
    553 	}
    554 	w.writeBits(value, 3)
    555 }
    556 
    557 // writeBlock will write a block of tokens with the smallest encoding.
    558 // The original input can be supplied, and if the huffman encoded data
    559 // is larger than the original bytes, the data will be written as a
    560 // stored block.
    561 // If the input is nil, the tokens will always be Huffman encoded.
    562 func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
    563 	if w.err != nil {
    564 		return
    565 	}
    566 
    567 	tokens.AddEOB()
    568 	if w.lastHeader > 0 {
    569 		// We owe an EOB
    570 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
    571 		w.lastHeader = 0
    572 	}
    573 	numLiterals, numOffsets := w.indexTokens(tokens, false)
    574 	w.generate()
    575 	var extraBits int
    576 	storedSize, storable := w.storedSize(input)
    577 	if storable {
    578 		extraBits = w.extraBitSize()
    579 	}
    580 
    581 	// Figure out smallest code.
    582 	// Fixed Huffman baseline.
    583 	var literalEncoding = fixedLiteralEncoding
    584 	var offsetEncoding = fixedOffsetEncoding
    585 	var size = math.MaxInt32
    586 	if tokens.n < maxPredefinedTokens {
    587 		size = w.fixedSize(extraBits)
    588 	}
    589 
    590 	// Dynamic Huffman?
    591 	var numCodegens int
    592 
    593 	// Generate codegen and codegenFrequencies, which indicates how to encode
    594 	// the literalEncoding and the offsetEncoding.
    595 	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
    596 	w.codegenEncoding.generate(w.codegenFreq[:], 7)
    597 	dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
    598 
    599 	if dynamicSize < size {
    600 		size = dynamicSize
    601 		literalEncoding = w.literalEncoding
    602 		offsetEncoding = w.offsetEncoding
    603 	}
    604 
    605 	// Stored bytes?
    606 	if storable && storedSize <= size {
    607 		w.writeStoredHeader(len(input), eof)
    608 		w.writeBytes(input)
    609 		return
    610 	}
    611 
    612 	// Huffman.
    613 	if literalEncoding == fixedLiteralEncoding {
    614 		w.writeFixedHeader(eof)
    615 	} else {
    616 		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
    617 	}
    618 
    619 	// Write the tokens.
    620 	w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
    621 }
    622 
    623 // writeBlockDynamic encodes a block using a dynamic Huffman table.
    624 // This should be used if the symbols used have a disproportionate
    625 // histogram distribution.
    626 // If input is supplied and the compression savings are below 1/16th of the
    627 // input size the block is stored.
    628 func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
    629 	if w.err != nil {
    630 		return
    631 	}
    632 
    633 	sync = sync || eof
    634 	if sync {
    635 		tokens.AddEOB()
    636 	}
    637 
    638 	// We cannot reuse pure huffman table, and must mark as EOF.
    639 	if (w.lastHuffMan || eof) && w.lastHeader > 0 {
    640 		// We will not try to reuse.
    641 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
    642 		w.lastHeader = 0
    643 		w.lastHuffMan = false
    644 	}
    645 
    646 	// fillReuse enables filling of empty values.
    647 	// This will make encodings always reusable without testing.
    648 	// However, this does not appear to benefit on most cases.
    649 	const fillReuse = false
    650 
    651 	// Check if we can reuse...
    652 	if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) {
    653 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
    654 		w.lastHeader = 0
    655 	}
    656 
    657 	numLiterals, numOffsets := w.indexTokens(tokens, !sync)
    658 	extraBits := 0
    659 	ssize, storable := w.storedSize(input)
    660 
    661 	const usePrefs = true
    662 	if storable || w.lastHeader > 0 {
    663 		extraBits = w.extraBitSize()
    664 	}
    665 
    666 	var size int
    667 
    668 	// Check if we should reuse.
    669 	if w.lastHeader > 0 {
    670 		// Estimate size for using a new table.
    671 		// Use the previous header size as the best estimate.
    672 		newSize := w.lastHeader + tokens.EstimatedBits()
    673 		newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty
    674 
    675 		// The estimated size is calculated as an optimal table.
    676 		// We add a penalty to make it more realistic and re-use a bit more.
    677 		reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits
    678 
    679 		// Check if a new table is better.
    680 		if newSize < reuseSize {
    681 			// Write the EOB we owe.
    682 			w.writeCode(w.literalEncoding.codes[endBlockMarker])
    683 			size = newSize
    684 			w.lastHeader = 0
    685 		} else {
    686 			size = reuseSize
    687 		}
    688 
    689 		if tokens.n < maxPredefinedTokens {
    690 			if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
    691 				// Check if we get a reasonable size decrease.
    692 				if storable && ssize <= size {
    693 					w.writeStoredHeader(len(input), eof)
    694 					w.writeBytes(input)
    695 					return
    696 				}
    697 				w.writeFixedHeader(eof)
    698 				if !sync {
    699 					tokens.AddEOB()
    700 				}
    701 				w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
    702 				return
    703 			}
    704 		}
    705 		// Check if we get a reasonable size decrease.
    706 		if storable && ssize <= size {
    707 			w.writeStoredHeader(len(input), eof)
    708 			w.writeBytes(input)
    709 			return
    710 		}
    711 	}
    712 
    713 	// We want a new block/table
    714 	if w.lastHeader == 0 {
    715 		if fillReuse && !sync {
    716 			w.fillTokens()
    717 			numLiterals, numOffsets = maxNumLit, maxNumDist
    718 		} else {
    719 			w.literalFreq[endBlockMarker] = 1
    720 		}
    721 
    722 		w.generate()
    723 		// Generate codegen and codegenFrequencies, which indicates how to encode
    724 		// the literalEncoding and the offsetEncoding.
    725 		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
    726 		w.codegenEncoding.generate(w.codegenFreq[:], 7)
    727 
    728 		var numCodegens int
    729 		if fillReuse && !sync {
    730 			// Reindex for accurate size...
    731 			w.indexTokens(tokens, true)
    732 		}
    733 		size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
    734 
    735 		// Store predefined, if we don't get a reasonable improvement.
    736 		if tokens.n < maxPredefinedTokens {
    737 			if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
    738 				// Store bytes, if we don't get an improvement.
    739 				if storable && ssize <= preSize {
    740 					w.writeStoredHeader(len(input), eof)
    741 					w.writeBytes(input)
    742 					return
    743 				}
    744 				w.writeFixedHeader(eof)
    745 				if !sync {
    746 					tokens.AddEOB()
    747 				}
    748 				w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
    749 				return
    750 			}
    751 		}
    752 
    753 		if storable && ssize <= size {
    754 			// Store bytes, if we don't get an improvement.
    755 			w.writeStoredHeader(len(input), eof)
    756 			w.writeBytes(input)
    757 			return
    758 		}
    759 
    760 		// Write Huffman table.
    761 		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
    762 		if !sync {
    763 			w.lastHeader, _ = w.headerSize()
    764 		}
    765 		w.lastHuffMan = false
    766 	}
    767 
    768 	if sync {
    769 		w.lastHeader = 0
    770 	}
    771 	// Write the tokens.
    772 	w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
    773 }
    774 
    775 func (w *huffmanBitWriter) fillTokens() {
    776 	for i, v := range w.literalFreq[:literalCount] {
    777 		if v == 0 {
    778 			w.literalFreq[i] = 1
    779 		}
    780 	}
    781 	for i, v := range w.offsetFreq[:offsetCodeCount] {
    782 		if v == 0 {
    783 			w.offsetFreq[i] = 1
    784 		}
    785 	}
    786 }
    787 
    788 // indexTokens indexes a slice of tokens, and updates
    789 // literalFreq and offsetFreq, and generates literalEncoding
    790 // and offsetEncoding.
    791 // The number of literal and offset tokens is returned.
    792 func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
    793 	//copy(w.literalFreq[:], t.litHist[:])
    794 	*(*[256]uint16)(w.literalFreq[:]) = t.litHist
    795 	//copy(w.literalFreq[256:], t.extraHist[:])
    796 	*(*[32]uint16)(w.literalFreq[256:]) = t.extraHist
    797 	w.offsetFreq = t.offHist
    798 
    799 	if t.n == 0 {
    800 		return
    801 	}
    802 	if filled {
    803 		return maxNumLit, maxNumDist
    804 	}
    805 	// get the number of literals
    806 	numLiterals = len(w.literalFreq)
    807 	for w.literalFreq[numLiterals-1] == 0 {
    808 		numLiterals--
    809 	}
    810 	// get the number of offsets
    811 	numOffsets = len(w.offsetFreq)
    812 	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
    813 		numOffsets--
    814 	}
    815 	if numOffsets == 0 {
    816 		// We haven't found a single match. If we want to go with the dynamic encoding,
    817 		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
    818 		w.offsetFreq[0] = 1
    819 		numOffsets = 1
    820 	}
    821 	return
    822 }
    823 
    824 func (w *huffmanBitWriter) generate() {
    825 	w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
    826 	w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
    827 }
    828 
    829 // writeTokens writes a slice of tokens to the output.
    830 // codes for literal and offset encoding must be supplied.
    831 func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
    832 	if w.err != nil {
    833 		return
    834 	}
    835 	if len(tokens) == 0 {
    836 		return
    837 	}
    838 
    839 	// Only last token should be endBlockMarker.
    840 	var deferEOB bool
    841 	if tokens[len(tokens)-1] == endBlockMarker {
    842 		tokens = tokens[:len(tokens)-1]
    843 		deferEOB = true
    844 	}
    845 
    846 	// Create slices up to the next power of two to avoid bounds checks.
    847 	lits := leCodes[:256]
    848 	offs := oeCodes[:32]
    849 	lengths := leCodes[lengthCodesStart:]
    850 	lengths = lengths[:32]
    851 
    852 	// Go 1.16 LOVES having these on stack.
    853 	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
    854 
    855 	for _, t := range tokens {
    856 		if t < 256 {
    857 			//w.writeCode(lits[t.literal()])
    858 			c := lits[t]
    859 			bits |= c.code64() << (nbits & 63)
    860 			nbits += c.len()
    861 			if nbits >= 48 {
    862 				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
    863 				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
    864 				bits >>= 48
    865 				nbits -= 48
    866 				nbytes += 6
    867 				if nbytes >= bufferFlushSize {
    868 					if w.err != nil {
    869 						nbytes = 0
    870 						return
    871 					}
    872 					_, w.err = w.writer.Write(w.bytes[:nbytes])
    873 					nbytes = 0
    874 				}
    875 			}
    876 			continue
    877 		}
    878 
    879 		// Write the length
    880 		length := t.length()
    881 		lengthCode := lengthCode(length) & 31
    882 		if false {
    883 			w.writeCode(lengths[lengthCode])
    884 		} else {
    885 			// inlined
    886 			c := lengths[lengthCode]
    887 			bits |= c.code64() << (nbits & 63)
    888 			nbits += c.len()
    889 			if nbits >= 48 {
    890 				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
    891 				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
    892 				bits >>= 48
    893 				nbits -= 48
    894 				nbytes += 6
    895 				if nbytes >= bufferFlushSize {
    896 					if w.err != nil {
    897 						nbytes = 0
    898 						return
    899 					}
    900 					_, w.err = w.writer.Write(w.bytes[:nbytes])
    901 					nbytes = 0
    902 				}
    903 			}
    904 		}
    905 
    906 		if lengthCode >= lengthExtraBitsMinCode {
    907 			extraLengthBits := lengthExtraBits[lengthCode]
    908 			//w.writeBits(extraLength, extraLengthBits)
    909 			extraLength := int32(length - lengthBase[lengthCode])
    910 			bits |= uint64(extraLength) << (nbits & 63)
    911 			nbits += extraLengthBits
    912 			if nbits >= 48 {
    913 				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
    914 				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
    915 				bits >>= 48
    916 				nbits -= 48
    917 				nbytes += 6
    918 				if nbytes >= bufferFlushSize {
    919 					if w.err != nil {
    920 						nbytes = 0
    921 						return
    922 					}
    923 					_, w.err = w.writer.Write(w.bytes[:nbytes])
    924 					nbytes = 0
    925 				}
    926 			}
    927 		}
    928 		// Write the offset
    929 		offset := t.offset()
    930 		offsetCode := (offset >> 16) & 31
    931 		if false {
    932 			w.writeCode(offs[offsetCode])
    933 		} else {
    934 			// inlined
    935 			c := offs[offsetCode]
    936 			bits |= c.code64() << (nbits & 63)
    937 			nbits += c.len()
    938 			if nbits >= 48 {
    939 				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
    940 				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
    941 				bits >>= 48
    942 				nbits -= 48
    943 				nbytes += 6
    944 				if nbytes >= bufferFlushSize {
    945 					if w.err != nil {
    946 						nbytes = 0
    947 						return
    948 					}
    949 					_, w.err = w.writer.Write(w.bytes[:nbytes])
    950 					nbytes = 0
    951 				}
    952 			}
    953 		}
    954 
    955 		if offsetCode >= offsetExtraBitsMinCode {
    956 			offsetComb := offsetCombined[offsetCode]
    957 			//w.writeBits(extraOffset, extraOffsetBits)
    958 			bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
    959 			nbits += uint8(offsetComb)
    960 			if nbits >= 48 {
    961 				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
    962 				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
    963 				bits >>= 48
    964 				nbits -= 48
    965 				nbytes += 6
    966 				if nbytes >= bufferFlushSize {
    967 					if w.err != nil {
    968 						nbytes = 0
    969 						return
    970 					}
    971 					_, w.err = w.writer.Write(w.bytes[:nbytes])
    972 					nbytes = 0
    973 				}
    974 			}
    975 		}
    976 	}
    977 	// Restore...
    978 	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
    979 
    980 	if deferEOB {
    981 		w.writeCode(leCodes[endBlockMarker])
    982 	}
    983 }
    984 
    985 // huffOffset is a static offset encoder used for huffman only encoding.
    986 // It can be reused since we will not be encoding offset values.
    987 var huffOffset *huffmanEncoder
    988 
    989 func init() {
    990 	w := newHuffmanBitWriter(nil)
    991 	w.offsetFreq[0] = 1
    992 	huffOffset = newHuffmanEncoder(offsetCodeCount)
    993 	huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
    994 }
    995 
    996 // writeBlockHuff encodes a block of bytes as either
    997 // Huffman encoded literals or uncompressed bytes if the
    998 // results only gains very little from compression.
    999 func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
   1000 	if w.err != nil {
   1001 		return
   1002 	}
   1003 
   1004 	// Clear histogram
   1005 	for i := range w.literalFreq[:] {
   1006 		w.literalFreq[i] = 0
   1007 	}
   1008 	if !w.lastHuffMan {
   1009 		for i := range w.offsetFreq[:] {
   1010 			w.offsetFreq[i] = 0
   1011 		}
   1012 	}
   1013 
   1014 	const numLiterals = endBlockMarker + 1
   1015 	const numOffsets = 1
   1016 
   1017 	// Add everything as literals
   1018 	// We have to estimate the header size.
   1019 	// Assume header is around 70 bytes:
   1020 	// https://stackoverflow.com/a/25454430
   1021 	const guessHeaderSizeBits = 70 * 8
   1022 	histogram(input, w.literalFreq[:numLiterals])
   1023 	ssize, storable := w.storedSize(input)
   1024 	if storable && len(input) > 1024 {
   1025 		// Quick check for incompressible content.
   1026 		abs := float64(0)
   1027 		avg := float64(len(input)) / 256
   1028 		max := float64(len(input) * 2)
   1029 		for _, v := range w.literalFreq[:256] {
   1030 			diff := float64(v) - avg
   1031 			abs += diff * diff
   1032 			if abs > max {
   1033 				break
   1034 			}
   1035 		}
   1036 		if abs < max {
   1037 			if debugDeflate {
   1038 				fmt.Println("stored", abs, "<", max)
   1039 			}
   1040 			// No chance we can compress this...
   1041 			w.writeStoredHeader(len(input), eof)
   1042 			w.writeBytes(input)
   1043 			return
   1044 		}
   1045 	}
   1046 	w.literalFreq[endBlockMarker] = 1
   1047 	w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
   1048 	estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
   1049 	if estBits < math.MaxInt32 {
   1050 		estBits += w.lastHeader
   1051 		if w.lastHeader == 0 {
   1052 			estBits += guessHeaderSizeBits
   1053 		}
   1054 		estBits += estBits >> w.logNewTablePenalty
   1055 	}
   1056 
   1057 	// Store bytes, if we don't get a reasonable improvement.
   1058 	if storable && ssize <= estBits {
   1059 		if debugDeflate {
   1060 			fmt.Println("stored,", ssize, "<=", estBits)
   1061 		}
   1062 		w.writeStoredHeader(len(input), eof)
   1063 		w.writeBytes(input)
   1064 		return
   1065 	}
   1066 
   1067 	if w.lastHeader > 0 {
   1068 		reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])
   1069 
   1070 		if estBits < reuseSize {
   1071 			if debugDeflate {
   1072 				fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes")
   1073 			}
   1074 			// We owe an EOB
   1075 			w.writeCode(w.literalEncoding.codes[endBlockMarker])
   1076 			w.lastHeader = 0
   1077 		} else if debugDeflate {
   1078 			fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
   1079 		}
   1080 	}
   1081 
   1082 	count := 0
   1083 	if w.lastHeader == 0 {
   1084 		// Use the temp encoding, so swap.
   1085 		w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
   1086 		// Generate codegen and codegenFrequencies, which indicates how to encode
   1087 		// the literalEncoding and the offsetEncoding.
   1088 		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
   1089 		w.codegenEncoding.generate(w.codegenFreq[:], 7)
   1090 		numCodegens := w.codegens()
   1091 
   1092 		// Huffman.
   1093 		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
   1094 		w.lastHuffMan = true
   1095 		w.lastHeader, _ = w.headerSize()
   1096 		if debugDeflate {
   1097 			count += w.lastHeader
   1098 			fmt.Println("header:", count/8)
   1099 		}
   1100 	}
   1101 
   1102 	encoding := w.literalEncoding.codes[:256]
   1103 	// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
   1104 	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
   1105 
   1106 	if debugDeflate {
   1107 		count -= int(nbytes)*8 + int(nbits)
   1108 	}
   1109 	// Unroll, write 3 codes/loop.
   1110 	// Fastest number of unrolls.
   1111 	for len(input) > 3 {
   1112 		// We must have at least 48 bits free.
   1113 		if nbits >= 8 {
   1114 			n := nbits >> 3
   1115 			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   1116 			bits >>= (n * 8) & 63
   1117 			nbits -= n * 8
   1118 			nbytes += n
   1119 		}
   1120 		if nbytes >= bufferFlushSize {
   1121 			if w.err != nil {
   1122 				nbytes = 0
   1123 				return
   1124 			}
   1125 			if debugDeflate {
   1126 				count += int(nbytes) * 8
   1127 			}
   1128 			_, w.err = w.writer.Write(w.bytes[:nbytes])
   1129 			nbytes = 0
   1130 		}
   1131 		a, b := encoding[input[0]], encoding[input[1]]
   1132 		bits |= a.code64() << (nbits & 63)
   1133 		bits |= b.code64() << ((nbits + a.len()) & 63)
   1134 		c := encoding[input[2]]
   1135 		nbits += b.len() + a.len()
   1136 		bits |= c.code64() << (nbits & 63)
   1137 		nbits += c.len()
   1138 		input = input[3:]
   1139 	}
   1140 
   1141 	// Remaining...
   1142 	for _, t := range input {
   1143 		if nbits >= 48 {
   1144 			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   1145 			//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
   1146 			bits >>= 48
   1147 			nbits -= 48
   1148 			nbytes += 6
   1149 			if nbytes >= bufferFlushSize {
   1150 				if w.err != nil {
   1151 					nbytes = 0
   1152 					return
   1153 				}
   1154 				if debugDeflate {
   1155 					count += int(nbytes) * 8
   1156 				}
   1157 				_, w.err = w.writer.Write(w.bytes[:nbytes])
   1158 				nbytes = 0
   1159 			}
   1160 		}
   1161 		// Bitwriting inlined, ~30% speedup
   1162 		c := encoding[t]
   1163 		bits |= c.code64() << (nbits & 63)
   1164 
   1165 		nbits += c.len()
   1166 		if debugDeflate {
   1167 			count += int(c.len())
   1168 		}
   1169 	}
   1170 	// Restore...
   1171 	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
   1172 
   1173 	if debugDeflate {
   1174 		nb := count + int(nbytes)*8 + int(nbits)
   1175 		fmt.Println("wrote", nb, "bits,", nb/8, "bytes.")
   1176 	}
   1177 	// Flush if needed to have space.
   1178 	if w.nbits >= 48 {
   1179 		w.writeOutBits()
   1180 	}
   1181 
   1182 	if eof || sync {
   1183 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   1184 		w.lastHeader = 0
   1185 		w.lastHuffMan = false
   1186 	}
   1187 }