dict_decoder.go (6221B)
1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 // dictDecoder implements the LZ77 sliding dictionary as used in decompression. 8 // LZ77 decompresses data through sequences of two forms of commands: 9 // 10 // - Literal insertions: Runs of one or more symbols are inserted into the data 11 // stream as is. This is accomplished through the writeByte method for a 12 // single symbol, or combinations of writeSlice/writeMark for multiple symbols. 13 // Any valid stream must start with a literal insertion if no preset dictionary 14 // is used. 15 // 16 // - Backward copies: Runs of one or more symbols are copied from previously 17 // emitted data. Backward copies come as the tuple (dist, length) where dist 18 // determines how far back in the stream to copy from and length determines how 19 // many bytes to copy. Note that it is valid for the length to be greater than 20 // the distance. Since LZ77 uses forward copies, that situation is used to 21 // perform a form of run-length encoding on repeated runs of symbols. 22 // The writeCopy and tryWriteCopy are used to implement this command. 23 // 24 // For performance reasons, this implementation performs little to no sanity 25 // checks about the arguments. As such, the invariants documented for each 26 // method call must be respected. 27 type dictDecoder struct { 28 hist []byte // Sliding window history 29 30 // Invariant: 0 <= rdPos <= wrPos <= len(hist) 31 wrPos int // Current output position in buffer 32 rdPos int // Have emitted hist[:rdPos] already 33 full bool // Has a full window length been written yet? 34 } 35 36 // init initializes dictDecoder to have a sliding window dictionary of the given 37 // size. If a preset dict is provided, it will initialize the dictionary with 38 // the contents of dict. 39 func (dd *dictDecoder) init(size int, dict []byte) { 40 *dd = dictDecoder{hist: dd.hist} 41 42 if cap(dd.hist) < size { 43 dd.hist = make([]byte, size) 44 } 45 dd.hist = dd.hist[:size] 46 47 if len(dict) > len(dd.hist) { 48 dict = dict[len(dict)-len(dd.hist):] 49 } 50 dd.wrPos = copy(dd.hist, dict) 51 if dd.wrPos == len(dd.hist) { 52 dd.wrPos = 0 53 dd.full = true 54 } 55 dd.rdPos = dd.wrPos 56 } 57 58 // histSize reports the total amount of historical data in the dictionary. 59 func (dd *dictDecoder) histSize() int { 60 if dd.full { 61 return len(dd.hist) 62 } 63 return dd.wrPos 64 } 65 66 // availRead reports the number of bytes that can be flushed by readFlush. 67 func (dd *dictDecoder) availRead() int { 68 return dd.wrPos - dd.rdPos 69 } 70 71 // availWrite reports the available amount of output buffer space. 72 func (dd *dictDecoder) availWrite() int { 73 return len(dd.hist) - dd.wrPos 74 } 75 76 // writeSlice returns a slice of the available buffer to write data to. 77 // 78 // This invariant will be kept: len(s) <= availWrite() 79 func (dd *dictDecoder) writeSlice() []byte { 80 return dd.hist[dd.wrPos:] 81 } 82 83 // writeMark advances the writer pointer by cnt. 84 // 85 // This invariant must be kept: 0 <= cnt <= availWrite() 86 func (dd *dictDecoder) writeMark(cnt int) { 87 dd.wrPos += cnt 88 } 89 90 // writeByte writes a single byte to the dictionary. 91 // 92 // This invariant must be kept: 0 < availWrite() 93 func (dd *dictDecoder) writeByte(c byte) { 94 dd.hist[dd.wrPos] = c 95 dd.wrPos++ 96 } 97 98 // writeCopy copies a string at a given (dist, length) to the output. 99 // This returns the number of bytes copied and may be less than the requested 100 // length if the available space in the output buffer is too small. 101 // 102 // This invariant must be kept: 0 < dist <= histSize() 103 func (dd *dictDecoder) writeCopy(dist, length int) int { 104 dstBase := dd.wrPos 105 dstPos := dstBase 106 srcPos := dstPos - dist 107 endPos := dstPos + length 108 if endPos > len(dd.hist) { 109 endPos = len(dd.hist) 110 } 111 112 // Copy non-overlapping section after destination position. 113 // 114 // This section is non-overlapping in that the copy length for this section 115 // is always less than or equal to the backwards distance. This can occur 116 // if a distance refers to data that wraps-around in the buffer. 117 // Thus, a backwards copy is performed here; that is, the exact bytes in 118 // the source prior to the copy is placed in the destination. 119 if srcPos < 0 { 120 srcPos += len(dd.hist) 121 dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) 122 srcPos = 0 123 } 124 125 // Copy possibly overlapping section before destination position. 126 // 127 // This section can overlap if the copy length for this section is larger 128 // than the backwards distance. This is allowed by LZ77 so that repeated 129 // strings can be succinctly represented using (dist, length) pairs. 130 // Thus, a forwards copy is performed here; that is, the bytes copied is 131 // possibly dependent on the resulting bytes in the destination as the copy 132 // progresses along. This is functionally equivalent to the following: 133 // 134 // for i := 0; i < endPos-dstPos; i++ { 135 // dd.hist[dstPos+i] = dd.hist[srcPos+i] 136 // } 137 // dstPos = endPos 138 // 139 for dstPos < endPos { 140 dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) 141 } 142 143 dd.wrPos = dstPos 144 return dstPos - dstBase 145 } 146 147 // tryWriteCopy tries to copy a string at a given (distance, length) to the 148 // output. This specialized version is optimized for short distances. 149 // 150 // This method is designed to be inlined for performance reasons. 151 // 152 // This invariant must be kept: 0 < dist <= histSize() 153 func (dd *dictDecoder) tryWriteCopy(dist, length int) int { 154 dstPos := dd.wrPos 155 endPos := dstPos + length 156 if dstPos < dist || endPos > len(dd.hist) { 157 return 0 158 } 159 dstBase := dstPos 160 srcPos := dstPos - dist 161 162 // Copy possibly overlapping section before destination position. 163 loop: 164 dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) 165 if dstPos < endPos { 166 goto loop // Avoid for-loop so that this function can be inlined 167 } 168 169 dd.wrPos = dstPos 170 return dstPos - dstBase 171 } 172 173 // readFlush returns a slice of the historical buffer that is ready to be 174 // emitted to the user. The data returned by readFlush must be fully consumed 175 // before calling any other dictDecoder methods. 176 func (dd *dictDecoder) readFlush() []byte { 177 toRead := dd.hist[dd.rdPos:dd.wrPos] 178 dd.rdPos = dd.wrPos 179 if dd.wrPos == len(dd.hist) { 180 dd.wrPos, dd.rdPos = 0, 0 181 dd.full = true 182 } 183 return toRead 184 }