gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

huffman_code.go (11106B)


      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package flate
      6 
      7 import (
      8 	"math"
      9 	"math/bits"
     10 )
     11 
     12 const (
     13 	maxBitsLimit = 16
     14 	// number of valid literals
     15 	literalCount = 286
     16 )
     17 
     18 // hcode is a huffman code with a bit code and bit length.
     19 type hcode uint32
     20 
     21 func (h hcode) len() uint8 {
     22 	return uint8(h)
     23 }
     24 
     25 func (h hcode) code64() uint64 {
     26 	return uint64(h >> 8)
     27 }
     28 
     29 func (h hcode) zero() bool {
     30 	return h == 0
     31 }
     32 
     33 type huffmanEncoder struct {
     34 	codes    []hcode
     35 	bitCount [17]int32
     36 
     37 	// Allocate a reusable buffer with the longest possible frequency table.
     38 	// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
     39 	// The largest of these is literalCount, so we allocate for that case.
     40 	freqcache [literalCount + 1]literalNode
     41 }
     42 
     43 type literalNode struct {
     44 	literal uint16
     45 	freq    uint16
     46 }
     47 
     48 // A levelInfo describes the state of the constructed tree for a given depth.
     49 type levelInfo struct {
     50 	// Our level.  for better printing
     51 	level int32
     52 
     53 	// The frequency of the last node at this level
     54 	lastFreq int32
     55 
     56 	// The frequency of the next character to add to this level
     57 	nextCharFreq int32
     58 
     59 	// The frequency of the next pair (from level below) to add to this level.
     60 	// Only valid if the "needed" value of the next lower level is 0.
     61 	nextPairFreq int32
     62 
     63 	// The number of chains remaining to generate for this level before moving
     64 	// up to the next level
     65 	needed int32
     66 }
     67 
     68 // set sets the code and length of an hcode.
     69 func (h *hcode) set(code uint16, length uint8) {
     70 	*h = hcode(length) | (hcode(code) << 8)
     71 }
     72 
     73 func newhcode(code uint16, length uint8) hcode {
     74 	return hcode(length) | (hcode(code) << 8)
     75 }
     76 
     77 func reverseBits(number uint16, bitLength byte) uint16 {
     78 	return bits.Reverse16(number << ((16 - bitLength) & 15))
     79 }
     80 
     81 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
     82 
     83 func newHuffmanEncoder(size int) *huffmanEncoder {
     84 	// Make capacity to next power of two.
     85 	c := uint(bits.Len32(uint32(size - 1)))
     86 	return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
     87 }
     88 
     89 // Generates a HuffmanCode corresponding to the fixed literal table
     90 func generateFixedLiteralEncoding() *huffmanEncoder {
     91 	h := newHuffmanEncoder(literalCount)
     92 	codes := h.codes
     93 	var ch uint16
     94 	for ch = 0; ch < literalCount; ch++ {
     95 		var bits uint16
     96 		var size uint8
     97 		switch {
     98 		case ch < 144:
     99 			// size 8, 000110000  .. 10111111
    100 			bits = ch + 48
    101 			size = 8
    102 		case ch < 256:
    103 			// size 9, 110010000 .. 111111111
    104 			bits = ch + 400 - 144
    105 			size = 9
    106 		case ch < 280:
    107 			// size 7, 0000000 .. 0010111
    108 			bits = ch - 256
    109 			size = 7
    110 		default:
    111 			// size 8, 11000000 .. 11000111
    112 			bits = ch + 192 - 280
    113 			size = 8
    114 		}
    115 		codes[ch] = newhcode(reverseBits(bits, size), size)
    116 	}
    117 	return h
    118 }
    119 
    120 func generateFixedOffsetEncoding() *huffmanEncoder {
    121 	h := newHuffmanEncoder(30)
    122 	codes := h.codes
    123 	for ch := range codes {
    124 		codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
    125 	}
    126 	return h
    127 }
    128 
    129 var fixedLiteralEncoding = generateFixedLiteralEncoding()
    130 var fixedOffsetEncoding = generateFixedOffsetEncoding()
    131 
    132 func (h *huffmanEncoder) bitLength(freq []uint16) int {
    133 	var total int
    134 	for i, f := range freq {
    135 		if f != 0 {
    136 			total += int(f) * int(h.codes[i].len())
    137 		}
    138 	}
    139 	return total
    140 }
    141 
    142 func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
    143 	var total int
    144 	for _, f := range b {
    145 		total += int(h.codes[f].len())
    146 	}
    147 	return total
    148 }
    149 
    150 // canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
    151 func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
    152 	var total int
    153 	for i, f := range freq {
    154 		if f != 0 {
    155 			code := h.codes[i]
    156 			if code.zero() {
    157 				return math.MaxInt32
    158 			}
    159 			total += int(f) * int(code.len())
    160 		}
    161 	}
    162 	return total
    163 }
    164 
    165 // Return the number of literals assigned to each bit size in the Huffman encoding
    166 //
    167 // This method is only called when list.length >= 3
    168 // The cases of 0, 1, and 2 literals are handled by special case code.
    169 //
    170 // list  An array of the literals with non-zero frequencies
    171 //
    172 //	and their associated frequencies. The array is in order of increasing
    173 //	frequency, and has as its last element a special element with frequency
    174 //	MaxInt32
    175 //
    176 // maxBits     The maximum number of bits that should be used to encode any literal.
    177 //
    178 //	Must be less than 16.
    179 //
    180 // return      An integer array in which array[i] indicates the number of literals
    181 //
    182 //	that should be encoded in i bits.
    183 func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
    184 	if maxBits >= maxBitsLimit {
    185 		panic("flate: maxBits too large")
    186 	}
    187 	n := int32(len(list))
    188 	list = list[0 : n+1]
    189 	list[n] = maxNode()
    190 
    191 	// The tree can't have greater depth than n - 1, no matter what. This
    192 	// saves a little bit of work in some small cases
    193 	if maxBits > n-1 {
    194 		maxBits = n - 1
    195 	}
    196 
    197 	// Create information about each of the levels.
    198 	// A bogus "Level 0" whose sole purpose is so that
    199 	// level1.prev.needed==0.  This makes level1.nextPairFreq
    200 	// be a legitimate value that never gets chosen.
    201 	var levels [maxBitsLimit]levelInfo
    202 	// leafCounts[i] counts the number of literals at the left
    203 	// of ancestors of the rightmost node at level i.
    204 	// leafCounts[i][j] is the number of literals at the left
    205 	// of the level j ancestor.
    206 	var leafCounts [maxBitsLimit][maxBitsLimit]int32
    207 
    208 	// Descending to only have 1 bounds check.
    209 	l2f := int32(list[2].freq)
    210 	l1f := int32(list[1].freq)
    211 	l0f := int32(list[0].freq) + int32(list[1].freq)
    212 
    213 	for level := int32(1); level <= maxBits; level++ {
    214 		// For every level, the first two items are the first two characters.
    215 		// We initialize the levels as if we had already figured this out.
    216 		levels[level] = levelInfo{
    217 			level:        level,
    218 			lastFreq:     l1f,
    219 			nextCharFreq: l2f,
    220 			nextPairFreq: l0f,
    221 		}
    222 		leafCounts[level][level] = 2
    223 		if level == 1 {
    224 			levels[level].nextPairFreq = math.MaxInt32
    225 		}
    226 	}
    227 
    228 	// We need a total of 2*n - 2 items at top level and have already generated 2.
    229 	levels[maxBits].needed = 2*n - 4
    230 
    231 	level := uint32(maxBits)
    232 	for level < 16 {
    233 		l := &levels[level]
    234 		if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
    235 			// We've run out of both leafs and pairs.
    236 			// End all calculations for this level.
    237 			// To make sure we never come back to this level or any lower level,
    238 			// set nextPairFreq impossibly large.
    239 			l.needed = 0
    240 			levels[level+1].nextPairFreq = math.MaxInt32
    241 			level++
    242 			continue
    243 		}
    244 
    245 		prevFreq := l.lastFreq
    246 		if l.nextCharFreq < l.nextPairFreq {
    247 			// The next item on this row is a leaf node.
    248 			n := leafCounts[level][level] + 1
    249 			l.lastFreq = l.nextCharFreq
    250 			// Lower leafCounts are the same of the previous node.
    251 			leafCounts[level][level] = n
    252 			e := list[n]
    253 			if e.literal < math.MaxUint16 {
    254 				l.nextCharFreq = int32(e.freq)
    255 			} else {
    256 				l.nextCharFreq = math.MaxInt32
    257 			}
    258 		} else {
    259 			// The next item on this row is a pair from the previous row.
    260 			// nextPairFreq isn't valid until we generate two
    261 			// more values in the level below
    262 			l.lastFreq = l.nextPairFreq
    263 			// Take leaf counts from the lower level, except counts[level] remains the same.
    264 			if true {
    265 				save := leafCounts[level][level]
    266 				leafCounts[level] = leafCounts[level-1]
    267 				leafCounts[level][level] = save
    268 			} else {
    269 				copy(leafCounts[level][:level], leafCounts[level-1][:level])
    270 			}
    271 			levels[l.level-1].needed = 2
    272 		}
    273 
    274 		if l.needed--; l.needed == 0 {
    275 			// We've done everything we need to do for this level.
    276 			// Continue calculating one level up. Fill in nextPairFreq
    277 			// of that level with the sum of the two nodes we've just calculated on
    278 			// this level.
    279 			if l.level == maxBits {
    280 				// All done!
    281 				break
    282 			}
    283 			levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
    284 			level++
    285 		} else {
    286 			// If we stole from below, move down temporarily to replenish it.
    287 			for levels[level-1].needed > 0 {
    288 				level--
    289 			}
    290 		}
    291 	}
    292 
    293 	// Somethings is wrong if at the end, the top level is null or hasn't used
    294 	// all of the leaves.
    295 	if leafCounts[maxBits][maxBits] != n {
    296 		panic("leafCounts[maxBits][maxBits] != n")
    297 	}
    298 
    299 	bitCount := h.bitCount[:maxBits+1]
    300 	bits := 1
    301 	counts := &leafCounts[maxBits]
    302 	for level := maxBits; level > 0; level-- {
    303 		// chain.leafCount gives the number of literals requiring at least "bits"
    304 		// bits to encode.
    305 		bitCount[bits] = counts[level] - counts[level-1]
    306 		bits++
    307 	}
    308 	return bitCount
    309 }
    310 
    311 // Look at the leaves and assign them a bit count and an encoding as specified
    312 // in RFC 1951 3.2.2
    313 func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
    314 	code := uint16(0)
    315 	for n, bits := range bitCount {
    316 		code <<= 1
    317 		if n == 0 || bits == 0 {
    318 			continue
    319 		}
    320 		// The literals list[len(list)-bits] .. list[len(list)-bits]
    321 		// are encoded using "bits" bits, and get the values
    322 		// code, code + 1, ....  The code values are
    323 		// assigned in literal order (not frequency order).
    324 		chunk := list[len(list)-int(bits):]
    325 
    326 		sortByLiteral(chunk)
    327 		for _, node := range chunk {
    328 			h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
    329 			code++
    330 		}
    331 		list = list[0 : len(list)-int(bits)]
    332 	}
    333 }
    334 
    335 // Update this Huffman Code object to be the minimum code for the specified frequency count.
    336 //
    337 // freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
    338 // maxBits  The maximum number of bits to use for any literal.
    339 func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
    340 	list := h.freqcache[:len(freq)+1]
    341 	codes := h.codes[:len(freq)]
    342 	// Number of non-zero literals
    343 	count := 0
    344 	// Set list to be the set of all non-zero literals and their frequencies
    345 	for i, f := range freq {
    346 		if f != 0 {
    347 			list[count] = literalNode{uint16(i), f}
    348 			count++
    349 		} else {
    350 			codes[i] = 0
    351 		}
    352 	}
    353 	list[count] = literalNode{}
    354 
    355 	list = list[:count]
    356 	if count <= 2 {
    357 		// Handle the small cases here, because they are awkward for the general case code. With
    358 		// two or fewer literals, everything has bit length 1.
    359 		for i, node := range list {
    360 			// "list" is in order of increasing literal value.
    361 			h.codes[node.literal].set(uint16(i), 1)
    362 		}
    363 		return
    364 	}
    365 	sortByFreq(list)
    366 
    367 	// Get the number of literals for each bit count
    368 	bitCount := h.bitCounts(list, maxBits)
    369 	// And do the assignment
    370 	h.assignEncodingAndSize(bitCount, list)
    371 }
    372 
    373 // atLeastOne clamps the result between 1 and 15.
    374 func atLeastOne(v float32) float32 {
    375 	if v < 1 {
    376 		return 1
    377 	}
    378 	if v > 15 {
    379 		return 15
    380 	}
    381 	return v
    382 }
    383 
    384 func histogram(b []byte, h []uint16) {
    385 	if true && len(b) >= 8<<10 {
    386 		// Split for bigger inputs
    387 		histogramSplit(b, h)
    388 	} else {
    389 		h = h[:256]
    390 		for _, t := range b {
    391 			h[t]++
    392 		}
    393 	}
    394 }
    395 
    396 func histogramSplit(b []byte, h []uint16) {
    397 	// Tested, and slightly faster than 2-way.
    398 	// Writing to separate arrays and combining is also slightly slower.
    399 	h = h[:256]
    400 	for len(b)&3 != 0 {
    401 		h[b[0]]++
    402 		b = b[1:]
    403 	}
    404 	n := len(b) / 4
    405 	x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
    406 	y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
    407 	for i, t := range x {
    408 		v0 := &h[t]
    409 		v1 := &h[y[i]]
    410 		v3 := &h[w[i]]
    411 		v2 := &h[z[i]]
    412 		*v0++
    413 		*v1++
    414 		*v2++
    415 		*v3++
    416 	}
    417 }