gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

level2.go (6015B)


      1 package flate
      2 
      3 import "fmt"
      4 
      5 // fastGen maintains the table for matches,
      6 // and the previous byte block for level 2.
      7 // This is the generic implementation.
      8 type fastEncL2 struct {
      9 	fastGen
     10 	table [bTableSize]tableEntry
     11 }
     12 
     13 // EncodeL2 uses a similar algorithm to level 1, but is capable
     14 // of matching across blocks giving better compression at a small slowdown.
     15 func (e *fastEncL2) Encode(dst *tokens, src []byte) {
     16 	const (
     17 		inputMargin            = 12 - 1
     18 		minNonLiteralBlockSize = 1 + 1 + inputMargin
     19 		hashBytes              = 5
     20 	)
     21 
     22 	if debugDeflate && e.cur < 0 {
     23 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
     24 	}
     25 
     26 	// Protect against e.cur wraparound.
     27 	for e.cur >= bufferReset {
     28 		if len(e.hist) == 0 {
     29 			for i := range e.table[:] {
     30 				e.table[i] = tableEntry{}
     31 			}
     32 			e.cur = maxMatchOffset
     33 			break
     34 		}
     35 		// Shift down everything in the table that isn't already too far away.
     36 		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
     37 		for i := range e.table[:] {
     38 			v := e.table[i].offset
     39 			if v <= minOff {
     40 				v = 0
     41 			} else {
     42 				v = v - e.cur + maxMatchOffset
     43 			}
     44 			e.table[i].offset = v
     45 		}
     46 		e.cur = maxMatchOffset
     47 	}
     48 
     49 	s := e.addBlock(src)
     50 
     51 	// This check isn't in the Snappy implementation, but there, the caller
     52 	// instead of the callee handles this case.
     53 	if len(src) < minNonLiteralBlockSize {
     54 		// We do not fill the token table.
     55 		// This will be picked up by caller.
     56 		dst.n = uint16(len(src))
     57 		return
     58 	}
     59 
     60 	// Override src
     61 	src = e.hist
     62 	nextEmit := s
     63 
     64 	// sLimit is when to stop looking for offset/length copies. The inputMargin
     65 	// lets us use a fast path for emitLiteral in the main loop, while we are
     66 	// looking for copies.
     67 	sLimit := int32(len(src) - inputMargin)
     68 
     69 	// nextEmit is where in src the next emitLiteral should start from.
     70 	cv := load6432(src, s)
     71 	for {
     72 		// When should we start skipping if we haven't found matches in a long while.
     73 		const skipLog = 5
     74 		const doEvery = 2
     75 
     76 		nextS := s
     77 		var candidate tableEntry
     78 		for {
     79 			nextHash := hashLen(cv, bTableBits, hashBytes)
     80 			s = nextS
     81 			nextS = s + doEvery + (s-nextEmit)>>skipLog
     82 			if nextS > sLimit {
     83 				goto emitRemainder
     84 			}
     85 			candidate = e.table[nextHash]
     86 			now := load6432(src, nextS)
     87 			e.table[nextHash] = tableEntry{offset: s + e.cur}
     88 			nextHash = hashLen(now, bTableBits, hashBytes)
     89 
     90 			offset := s - (candidate.offset - e.cur)
     91 			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
     92 				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
     93 				break
     94 			}
     95 
     96 			// Do one right away...
     97 			cv = now
     98 			s = nextS
     99 			nextS++
    100 			candidate = e.table[nextHash]
    101 			now >>= 8
    102 			e.table[nextHash] = tableEntry{offset: s + e.cur}
    103 
    104 			offset = s - (candidate.offset - e.cur)
    105 			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
    106 				break
    107 			}
    108 			cv = now
    109 		}
    110 
    111 		// A 4-byte match has been found. We'll later see if more than 4 bytes
    112 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    113 		// them as literal bytes.
    114 
    115 		// Call emitCopy, and then see if another emitCopy could be our next
    116 		// move. Repeat until we find no match for the input immediately after
    117 		// what was consumed by the last emitCopy call.
    118 		//
    119 		// If we exit this loop normally then we need to call emitLiteral next,
    120 		// though we don't yet know how big the literal will be. We handle that
    121 		// by proceeding to the next iteration of the main loop. We also can
    122 		// exit this loop via goto if we get close to exhausting the input.
    123 		for {
    124 			// Invariant: we have a 4-byte match at s, and no need to emit any
    125 			// literal bytes prior to s.
    126 
    127 			// Extend the 4-byte match as long as possible.
    128 			t := candidate.offset - e.cur
    129 			l := e.matchlenLong(s+4, t+4, src) + 4
    130 
    131 			// Extend backwards
    132 			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
    133 				s--
    134 				t--
    135 				l++
    136 			}
    137 			if nextEmit < s {
    138 				if false {
    139 					emitLiteral(dst, src[nextEmit:s])
    140 				} else {
    141 					for _, v := range src[nextEmit:s] {
    142 						dst.tokens[dst.n] = token(v)
    143 						dst.litHist[v]++
    144 						dst.n++
    145 					}
    146 				}
    147 			}
    148 
    149 			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
    150 			s += l
    151 			nextEmit = s
    152 			if nextS >= s {
    153 				s = nextS + 1
    154 			}
    155 
    156 			if s >= sLimit {
    157 				// Index first pair after match end.
    158 				if int(s+l+8) < len(src) {
    159 					cv := load6432(src, s)
    160 					e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
    161 				}
    162 				goto emitRemainder
    163 			}
    164 
    165 			// Store every second hash in-between, but offset by 1.
    166 			for i := s - l + 2; i < s-5; i += 7 {
    167 				x := load6432(src, i)
    168 				nextHash := hashLen(x, bTableBits, hashBytes)
    169 				e.table[nextHash] = tableEntry{offset: e.cur + i}
    170 				// Skip one
    171 				x >>= 16
    172 				nextHash = hashLen(x, bTableBits, hashBytes)
    173 				e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
    174 				// Skip one
    175 				x >>= 16
    176 				nextHash = hashLen(x, bTableBits, hashBytes)
    177 				e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
    178 			}
    179 
    180 			// We could immediately start working at s now, but to improve
    181 			// compression we first update the hash table at s-2 to s. If
    182 			// another emitCopy is not our next move, also calculate nextHash
    183 			// at s+1. At least on GOARCH=amd64, these three hash calculations
    184 			// are faster as one load64 call (with some shifts) instead of
    185 			// three load32 calls.
    186 			x := load6432(src, s-2)
    187 			o := e.cur + s - 2
    188 			prevHash := hashLen(x, bTableBits, hashBytes)
    189 			prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
    190 			e.table[prevHash] = tableEntry{offset: o}
    191 			e.table[prevHash2] = tableEntry{offset: o + 1}
    192 			currHash := hashLen(x>>16, bTableBits, hashBytes)
    193 			candidate = e.table[currHash]
    194 			e.table[currHash] = tableEntry{offset: o + 2}
    195 
    196 			offset := s - (candidate.offset - e.cur)
    197 			if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
    198 				cv = x >> 24
    199 				s++
    200 				break
    201 			}
    202 		}
    203 	}
    204 
    205 emitRemainder:
    206 	if int(nextEmit) < len(src) {
    207 		// If nothing was added, don't encode literals.
    208 		if dst.n == 0 {
    209 			return
    210 		}
    211 
    212 		emitLiteral(dst, src[nextEmit:])
    213 	}
    214 }