level3.go (6322B)
1 package flate 2 3 import "fmt" 4 5 // fastEncL3 6 type fastEncL3 struct { 7 fastGen 8 table [1 << 16]tableEntryPrev 9 } 10 11 // Encode uses a similar algorithm to level 2, will check up to two candidates. 12 func (e *fastEncL3) Encode(dst *tokens, src []byte) { 13 const ( 14 inputMargin = 12 - 1 15 minNonLiteralBlockSize = 1 + 1 + inputMargin 16 tableBits = 16 17 tableSize = 1 << tableBits 18 hashBytes = 5 19 ) 20 21 if debugDeflate && e.cur < 0 { 22 panic(fmt.Sprint("e.cur < 0: ", e.cur)) 23 } 24 25 // Protect against e.cur wraparound. 26 for e.cur >= bufferReset { 27 if len(e.hist) == 0 { 28 for i := range e.table[:] { 29 e.table[i] = tableEntryPrev{} 30 } 31 e.cur = maxMatchOffset 32 break 33 } 34 // Shift down everything in the table that isn't already too far away. 35 minOff := e.cur + int32(len(e.hist)) - maxMatchOffset 36 for i := range e.table[:] { 37 v := e.table[i] 38 if v.Cur.offset <= minOff { 39 v.Cur.offset = 0 40 } else { 41 v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset 42 } 43 if v.Prev.offset <= minOff { 44 v.Prev.offset = 0 45 } else { 46 v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset 47 } 48 e.table[i] = v 49 } 50 e.cur = maxMatchOffset 51 } 52 53 s := e.addBlock(src) 54 55 // Skip if too small. 56 if len(src) < minNonLiteralBlockSize { 57 // We do not fill the token table. 58 // This will be picked up by caller. 59 dst.n = uint16(len(src)) 60 return 61 } 62 63 // Override src 64 src = e.hist 65 nextEmit := s 66 67 // sLimit is when to stop looking for offset/length copies. The inputMargin 68 // lets us use a fast path for emitLiteral in the main loop, while we are 69 // looking for copies. 70 sLimit := int32(len(src) - inputMargin) 71 72 // nextEmit is where in src the next emitLiteral should start from. 73 cv := load6432(src, s) 74 for { 75 const skipLog = 7 76 nextS := s 77 var candidate tableEntry 78 for { 79 nextHash := hashLen(cv, tableBits, hashBytes) 80 s = nextS 81 nextS = s + 1 + (s-nextEmit)>>skipLog 82 if nextS > sLimit { 83 goto emitRemainder 84 } 85 candidates := e.table[nextHash] 86 now := load6432(src, nextS) 87 88 // Safe offset distance until s + 4... 89 minOffset := e.cur + s - (maxMatchOffset - 4) 90 e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} 91 92 // Check both candidates 93 candidate = candidates.Cur 94 if candidate.offset < minOffset { 95 cv = now 96 // Previous will also be invalid, we have nothing. 97 continue 98 } 99 100 if uint32(cv) == load3232(src, candidate.offset-e.cur) { 101 if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) { 102 break 103 } 104 // Both match and are valid, pick longest. 105 offset := s - (candidate.offset - e.cur) 106 o2 := s - (candidates.Prev.offset - e.cur) 107 l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) 108 if l2 > l1 { 109 candidate = candidates.Prev 110 } 111 break 112 } else { 113 // We only check if value mismatches. 114 // Offset will always be invalid in other cases. 115 candidate = candidates.Prev 116 if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { 117 break 118 } 119 } 120 cv = now 121 } 122 123 // Call emitCopy, and then see if another emitCopy could be our next 124 // move. Repeat until we find no match for the input immediately after 125 // what was consumed by the last emitCopy call. 126 // 127 // If we exit this loop normally then we need to call emitLiteral next, 128 // though we don't yet know how big the literal will be. We handle that 129 // by proceeding to the next iteration of the main loop. We also can 130 // exit this loop via goto if we get close to exhausting the input. 131 for { 132 // Invariant: we have a 4-byte match at s, and no need to emit any 133 // literal bytes prior to s. 134 135 // Extend the 4-byte match as long as possible. 136 // 137 t := candidate.offset - e.cur 138 l := e.matchlenLong(s+4, t+4, src) + 4 139 140 // Extend backwards 141 for t > 0 && s > nextEmit && src[t-1] == src[s-1] { 142 s-- 143 t-- 144 l++ 145 } 146 if nextEmit < s { 147 if false { 148 emitLiteral(dst, src[nextEmit:s]) 149 } else { 150 for _, v := range src[nextEmit:s] { 151 dst.tokens[dst.n] = token(v) 152 dst.litHist[v]++ 153 dst.n++ 154 } 155 } 156 } 157 158 dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) 159 s += l 160 nextEmit = s 161 if nextS >= s { 162 s = nextS + 1 163 } 164 165 if s >= sLimit { 166 t += l 167 // Index first pair after match end. 168 if int(t+8) < len(src) && t > 0 { 169 cv = load6432(src, t) 170 nextHash := hashLen(cv, tableBits, hashBytes) 171 e.table[nextHash] = tableEntryPrev{ 172 Prev: e.table[nextHash].Cur, 173 Cur: tableEntry{offset: e.cur + t}, 174 } 175 } 176 goto emitRemainder 177 } 178 179 // Store every 5th hash in-between. 180 for i := s - l + 2; i < s-5; i += 6 { 181 nextHash := hashLen(load6432(src, i), tableBits, hashBytes) 182 e.table[nextHash] = tableEntryPrev{ 183 Prev: e.table[nextHash].Cur, 184 Cur: tableEntry{offset: e.cur + i}} 185 } 186 // We could immediately start working at s now, but to improve 187 // compression we first update the hash table at s-2 to s. 188 x := load6432(src, s-2) 189 prevHash := hashLen(x, tableBits, hashBytes) 190 191 e.table[prevHash] = tableEntryPrev{ 192 Prev: e.table[prevHash].Cur, 193 Cur: tableEntry{offset: e.cur + s - 2}, 194 } 195 x >>= 8 196 prevHash = hashLen(x, tableBits, hashBytes) 197 198 e.table[prevHash] = tableEntryPrev{ 199 Prev: e.table[prevHash].Cur, 200 Cur: tableEntry{offset: e.cur + s - 1}, 201 } 202 x >>= 8 203 currHash := hashLen(x, tableBits, hashBytes) 204 candidates := e.table[currHash] 205 cv = x 206 e.table[currHash] = tableEntryPrev{ 207 Prev: candidates.Cur, 208 Cur: tableEntry{offset: s + e.cur}, 209 } 210 211 // Check both candidates 212 candidate = candidates.Cur 213 minOffset := e.cur + s - (maxMatchOffset - 4) 214 215 if candidate.offset > minOffset { 216 if uint32(cv) == load3232(src, candidate.offset-e.cur) { 217 // Found a match... 218 continue 219 } 220 candidate = candidates.Prev 221 if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { 222 // Match at prev... 223 continue 224 } 225 } 226 cv = x >> 8 227 s++ 228 break 229 } 230 } 231 232 emitRemainder: 233 if int(nextEmit) < len(src) { 234 // If nothing was added, don't encode literals. 235 if dst.n == 0 { 236 return 237 } 238 239 emitLiteral(dst, src[nextEmit:]) 240 } 241 }