gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

token.go (11051B)


      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package flate
      6 
      7 import (
      8 	"bytes"
      9 	"encoding/binary"
     10 	"fmt"
     11 	"io"
     12 	"math"
     13 )
     14 
     15 const (
     16 	// bits 0-16  	xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
     17 	// bits 16-22	offsetcode - 5 bits
     18 	// bits 22-30   xlength = length - MIN_MATCH_LENGTH - 8 bits
     19 	// bits 30-32   type   0 = literal  1=EOF  2=Match   3=Unused - 2 bits
     20 	lengthShift         = 22
     21 	offsetMask          = 1<<lengthShift - 1
     22 	typeMask            = 3 << 30
     23 	literalType         = 0 << 30
     24 	matchType           = 1 << 30
     25 	matchOffsetOnlyMask = 0xffff
     26 )
     27 
     28 // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
     29 // is lengthCodes[length - MIN_MATCH_LENGTH]
     30 var lengthCodes = [256]uint8{
     31 	0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
     32 	9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
     33 	13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
     34 	15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
     35 	17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
     36 	18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
     37 	19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
     38 	20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
     39 	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
     40 	21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
     41 	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
     42 	22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
     43 	23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
     44 	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
     45 	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
     46 	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
     47 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
     48 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
     49 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
     50 	25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
     51 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
     52 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
     53 	26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
     54 	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
     55 	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
     56 	27, 27, 27, 27, 27, 28,
     57 }
     58 
     59 // lengthCodes1 is length codes, but starting at 1.
     60 var lengthCodes1 = [256]uint8{
     61 	1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
     62 	10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
     63 	14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
     64 	16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
     65 	18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
     66 	19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
     67 	20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
     68 	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
     69 	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
     70 	22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
     71 	23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
     72 	23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
     73 	24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
     74 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
     75 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
     76 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
     77 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
     78 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
     79 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
     80 	26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
     81 	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
     82 	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
     83 	27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
     84 	28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
     85 	28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
     86 	28, 28, 28, 28, 28, 29,
     87 }
     88 
     89 var offsetCodes = [256]uint32{
     90 	0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
     91 	8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
     92 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
     93 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
     94 	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
     95 	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
     96 	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
     97 	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
     98 	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
     99 	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
    100 	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
    101 	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
    102 	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    103 	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    104 	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    105 	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    106 }
    107 
    108 // offsetCodes14 are offsetCodes, but with 14 added.
    109 var offsetCodes14 = [256]uint32{
    110 	14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
    111 	22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
    112 	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    113 	25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
    114 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
    115 	26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
    116 	27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
    117 	27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
    118 	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
    119 	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
    120 	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
    121 	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
    122 	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
    123 	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
    124 	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
    125 	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
    126 }
    127 
    128 type token uint32
    129 
    130 type tokens struct {
    131 	extraHist [32]uint16  // codes 256->maxnumlit
    132 	offHist   [32]uint16  // offset codes
    133 	litHist   [256]uint16 // codes 0->255
    134 	nFilled   int
    135 	n         uint16 // Must be able to contain maxStoreBlockSize
    136 	tokens    [maxStoreBlockSize + 1]token
    137 }
    138 
    139 func (t *tokens) Reset() {
    140 	if t.n == 0 {
    141 		return
    142 	}
    143 	t.n = 0
    144 	t.nFilled = 0
    145 	for i := range t.litHist[:] {
    146 		t.litHist[i] = 0
    147 	}
    148 	for i := range t.extraHist[:] {
    149 		t.extraHist[i] = 0
    150 	}
    151 	for i := range t.offHist[:] {
    152 		t.offHist[i] = 0
    153 	}
    154 }
    155 
    156 func (t *tokens) Fill() {
    157 	if t.n == 0 {
    158 		return
    159 	}
    160 	for i, v := range t.litHist[:] {
    161 		if v == 0 {
    162 			t.litHist[i] = 1
    163 			t.nFilled++
    164 		}
    165 	}
    166 	for i, v := range t.extraHist[:literalCount-256] {
    167 		if v == 0 {
    168 			t.nFilled++
    169 			t.extraHist[i] = 1
    170 		}
    171 	}
    172 	for i, v := range t.offHist[:offsetCodeCount] {
    173 		if v == 0 {
    174 			t.offHist[i] = 1
    175 		}
    176 	}
    177 }
    178 
    179 func indexTokens(in []token) tokens {
    180 	var t tokens
    181 	t.indexTokens(in)
    182 	return t
    183 }
    184 
    185 func (t *tokens) indexTokens(in []token) {
    186 	t.Reset()
    187 	for _, tok := range in {
    188 		if tok < matchType {
    189 			t.AddLiteral(tok.literal())
    190 			continue
    191 		}
    192 		t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
    193 	}
    194 }
    195 
    196 // emitLiteral writes a literal chunk and returns the number of bytes written.
    197 func emitLiteral(dst *tokens, lit []byte) {
    198 	for _, v := range lit {
    199 		dst.tokens[dst.n] = token(v)
    200 		dst.litHist[v]++
    201 		dst.n++
    202 	}
    203 }
    204 
    205 func (t *tokens) AddLiteral(lit byte) {
    206 	t.tokens[t.n] = token(lit)
    207 	t.litHist[lit]++
    208 	t.n++
    209 }
    210 
    211 // from https://stackoverflow.com/a/28730362
    212 func mFastLog2(val float32) float32 {
    213 	ux := int32(math.Float32bits(val))
    214 	log2 := (float32)(((ux >> 23) & 255) - 128)
    215 	ux &= -0x7f800001
    216 	ux += 127 << 23
    217 	uval := math.Float32frombits(uint32(ux))
    218 	log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
    219 	return log2
    220 }
    221 
    222 // EstimatedBits will return an minimum size estimated by an *optimal*
    223 // compression of the block.
    224 // The size of the block
    225 func (t *tokens) EstimatedBits() int {
    226 	shannon := float32(0)
    227 	bits := int(0)
    228 	nMatches := 0
    229 	total := int(t.n) + t.nFilled
    230 	if total > 0 {
    231 		invTotal := 1.0 / float32(total)
    232 		for _, v := range t.litHist[:] {
    233 			if v > 0 {
    234 				n := float32(v)
    235 				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
    236 			}
    237 		}
    238 		// Just add 15 for EOB
    239 		shannon += 15
    240 		for i, v := range t.extraHist[1 : literalCount-256] {
    241 			if v > 0 {
    242 				n := float32(v)
    243 				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
    244 				bits += int(lengthExtraBits[i&31]) * int(v)
    245 				nMatches += int(v)
    246 			}
    247 		}
    248 	}
    249 	if nMatches > 0 {
    250 		invTotal := 1.0 / float32(nMatches)
    251 		for i, v := range t.offHist[:offsetCodeCount] {
    252 			if v > 0 {
    253 				n := float32(v)
    254 				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
    255 				bits += int(offsetExtraBits[i&31]) * int(v)
    256 			}
    257 		}
    258 	}
    259 	return int(shannon) + bits
    260 }
    261 
    262 // AddMatch adds a match to the tokens.
    263 // This function is very sensitive to inlining and right on the border.
    264 func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
    265 	if debugDeflate {
    266 		if xlength >= maxMatchLength+baseMatchLength {
    267 			panic(fmt.Errorf("invalid length: %v", xlength))
    268 		}
    269 		if xoffset >= maxMatchOffset+baseMatchOffset {
    270 			panic(fmt.Errorf("invalid offset: %v", xoffset))
    271 		}
    272 	}
    273 	oCode := offsetCode(xoffset)
    274 	xoffset |= oCode << 16
    275 
    276 	t.extraHist[lengthCodes1[uint8(xlength)]]++
    277 	t.offHist[oCode&31]++
    278 	t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
    279 	t.n++
    280 }
    281 
    282 // AddMatchLong adds a match to the tokens, potentially longer than max match length.
    283 // Length should NOT have the base subtracted, only offset should.
    284 func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
    285 	if debugDeflate {
    286 		if xoffset >= maxMatchOffset+baseMatchOffset {
    287 			panic(fmt.Errorf("invalid offset: %v", xoffset))
    288 		}
    289 	}
    290 	oc := offsetCode(xoffset)
    291 	xoffset |= oc << 16
    292 	for xlength > 0 {
    293 		xl := xlength
    294 		if xl > 258 {
    295 			// We need to have at least baseMatchLength left over for next loop.
    296 			if xl > 258+baseMatchLength {
    297 				xl = 258
    298 			} else {
    299 				xl = 258 - baseMatchLength
    300 			}
    301 		}
    302 		xlength -= xl
    303 		xl -= baseMatchLength
    304 		t.extraHist[lengthCodes1[uint8(xl)]]++
    305 		t.offHist[oc&31]++
    306 		t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
    307 		t.n++
    308 	}
    309 }
    310 
    311 func (t *tokens) AddEOB() {
    312 	t.tokens[t.n] = token(endBlockMarker)
    313 	t.extraHist[0]++
    314 	t.n++
    315 }
    316 
    317 func (t *tokens) Slice() []token {
    318 	return t.tokens[:t.n]
    319 }
    320 
    321 // VarInt returns the tokens as varint encoded bytes.
    322 func (t *tokens) VarInt() []byte {
    323 	var b = make([]byte, binary.MaxVarintLen32*int(t.n))
    324 	var off int
    325 	for _, v := range t.tokens[:t.n] {
    326 		off += binary.PutUvarint(b[off:], uint64(v))
    327 	}
    328 	return b[:off]
    329 }
    330 
    331 // FromVarInt restores t to the varint encoded tokens provided.
    332 // Any data in t is removed.
    333 func (t *tokens) FromVarInt(b []byte) error {
    334 	var buf = bytes.NewReader(b)
    335 	var toks []token
    336 	for {
    337 		r, err := binary.ReadUvarint(buf)
    338 		if err == io.EOF {
    339 			break
    340 		}
    341 		if err != nil {
    342 			return err
    343 		}
    344 		toks = append(toks, token(r))
    345 	}
    346 	t.indexTokens(toks)
    347 	return nil
    348 }
    349 
    350 // Returns the type of a token
    351 func (t token) typ() uint32 { return uint32(t) & typeMask }
    352 
    353 // Returns the literal of a literal token
    354 func (t token) literal() uint8 { return uint8(t) }
    355 
    356 // Returns the extra offset of a match token
    357 func (t token) offset() uint32 { return uint32(t) & offsetMask }
    358 
    359 func (t token) length() uint8 { return uint8(t >> lengthShift) }
    360 
    361 // Convert length to code.
    362 func lengthCode(len uint8) uint8 { return lengthCodes[len] }
    363 
    364 // Returns the offset code corresponding to a specific offset
    365 func offsetCode(off uint32) uint32 {
    366 	if false {
    367 		if off < uint32(len(offsetCodes)) {
    368 			return offsetCodes[off&255]
    369 		} else if off>>7 < uint32(len(offsetCodes)) {
    370 			return offsetCodes[(off>>7)&255] + 14
    371 		} else {
    372 			return offsetCodes[(off>>14)&255] + 28
    373 		}
    374 	}
    375 	if off < uint32(len(offsetCodes)) {
    376 		return offsetCodes[uint8(off)]
    377 	}
    378 	return offsetCodes14[uint8(off>>7)]
    379 }