gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

reader.go (7893B)


      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package lzw implements the Lempel-Ziv-Welch compressed data format,
      6 // described in T. A. Welch, “A Technique for High-Performance Data
      7 // Compression”, Computer, 17(6) (June 1984), pp 8-19.
      8 //
      9 // In particular, it implements LZW as used by the TIFF file format, including
     10 // an "off by one" algorithmic difference when compared to standard LZW.
     11 package lzw // import "golang.org/x/image/tiff/lzw"
     12 
     13 /*
     14 This file was branched from src/pkg/compress/lzw/reader.go in the
     15 standard library. Differences from the original are marked with "NOTE".
     16 
     17 The tif_lzw.c file in the libtiff C library has this comment:
     18 
     19 ----
     20 The 5.0 spec describes a different algorithm than Aldus
     21 implements. Specifically, Aldus does code length transitions
     22 one code earlier than should be done (for real LZW).
     23 Earlier versions of this library implemented the correct
     24 LZW algorithm, but emitted codes in a bit order opposite
     25 to the TIFF spec. Thus, to maintain compatibility w/ Aldus
     26 we interpret MSB-LSB ordered codes to be images written w/
     27 old versions of this library, but otherwise adhere to the
     28 Aldus "off by one" algorithm.
     29 ----
     30 
     31 The Go code doesn't read (invalid) TIFF files written by old versions of
     32 libtiff, but the LZW algorithm in this package still differs from the one in
     33 Go's standard package library to accommodate this "off by one" in valid TIFFs.
     34 */
     35 
     36 import (
     37 	"bufio"
     38 	"errors"
     39 	"fmt"
     40 	"io"
     41 )
     42 
     43 // Order specifies the bit ordering in an LZW data stream.
     44 type Order int
     45 
     46 const (
     47 	// LSB means Least Significant Bits first, as used in the GIF file format.
     48 	LSB Order = iota
     49 	// MSB means Most Significant Bits first, as used in the TIFF and PDF
     50 	// file formats.
     51 	MSB
     52 )
     53 
     54 const (
     55 	maxWidth           = 12
     56 	decoderInvalidCode = 0xffff
     57 	flushBuffer        = 1 << maxWidth
     58 )
     59 
     60 // decoder is the state from which the readXxx method converts a byte
     61 // stream into a code stream.
     62 type decoder struct {
     63 	r        io.ByteReader
     64 	bits     uint32
     65 	nBits    uint
     66 	width    uint
     67 	read     func(*decoder) (uint16, error) // readLSB or readMSB
     68 	litWidth int                            // width in bits of literal codes
     69 	err      error
     70 
     71 	// The first 1<<litWidth codes are literal codes.
     72 	// The next two codes mean clear and EOF.
     73 	// Other valid codes are in the range [lo, hi] where lo := clear + 2,
     74 	// with the upper bound incrementing on each code seen.
     75 	// overflow is the code at which hi overflows the code width. NOTE: TIFF's LZW is "off by one".
     76 	// last is the most recently seen code, or decoderInvalidCode.
     77 	clear, eof, hi, overflow, last uint16
     78 
     79 	// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
     80 	//   suffix[c] is the last of these bytes.
     81 	//   prefix[c] is the code for all but the last byte.
     82 	//   This code can either be a literal code or another code in [lo, c).
     83 	// The c == hi case is a special case.
     84 	suffix [1 << maxWidth]uint8
     85 	prefix [1 << maxWidth]uint16
     86 
     87 	// output is the temporary output buffer.
     88 	// Literal codes are accumulated from the start of the buffer.
     89 	// Non-literal codes decode to a sequence of suffixes that are first
     90 	// written right-to-left from the end of the buffer before being copied
     91 	// to the start of the buffer.
     92 	// It is flushed when it contains >= 1<<maxWidth bytes,
     93 	// so that there is always room to decode an entire code.
     94 	output [2 * 1 << maxWidth]byte
     95 	o      int    // write index into output
     96 	toRead []byte // bytes to return from Read
     97 }
     98 
     99 // readLSB returns the next code for "Least Significant Bits first" data.
    100 func (d *decoder) readLSB() (uint16, error) {
    101 	for d.nBits < d.width {
    102 		x, err := d.r.ReadByte()
    103 		if err != nil {
    104 			return 0, err
    105 		}
    106 		d.bits |= uint32(x) << d.nBits
    107 		d.nBits += 8
    108 	}
    109 	code := uint16(d.bits & (1<<d.width - 1))
    110 	d.bits >>= d.width
    111 	d.nBits -= d.width
    112 	return code, nil
    113 }
    114 
    115 // readMSB returns the next code for "Most Significant Bits first" data.
    116 func (d *decoder) readMSB() (uint16, error) {
    117 	for d.nBits < d.width {
    118 		x, err := d.r.ReadByte()
    119 		if err != nil {
    120 			return 0, err
    121 		}
    122 		d.bits |= uint32(x) << (24 - d.nBits)
    123 		d.nBits += 8
    124 	}
    125 	code := uint16(d.bits >> (32 - d.width))
    126 	d.bits <<= d.width
    127 	d.nBits -= d.width
    128 	return code, nil
    129 }
    130 
    131 func (d *decoder) Read(b []byte) (int, error) {
    132 	for {
    133 		if len(d.toRead) > 0 {
    134 			n := copy(b, d.toRead)
    135 			d.toRead = d.toRead[n:]
    136 			return n, nil
    137 		}
    138 		if d.err != nil {
    139 			return 0, d.err
    140 		}
    141 		d.decode()
    142 	}
    143 }
    144 
    145 // decode decompresses bytes from r and leaves them in d.toRead.
    146 // read specifies how to decode bytes into codes.
    147 // litWidth is the width in bits of literal codes.
    148 func (d *decoder) decode() {
    149 	// Loop over the code stream, converting codes into decompressed bytes.
    150 loop:
    151 	for {
    152 		code, err := d.read(d)
    153 		if err != nil {
    154 			if err == io.EOF {
    155 				err = io.ErrUnexpectedEOF
    156 			}
    157 			d.err = err
    158 			break
    159 		}
    160 		switch {
    161 		case code < d.clear:
    162 			// We have a literal code.
    163 			d.output[d.o] = uint8(code)
    164 			d.o++
    165 			if d.last != decoderInvalidCode {
    166 				// Save what the hi code expands to.
    167 				d.suffix[d.hi] = uint8(code)
    168 				d.prefix[d.hi] = d.last
    169 			}
    170 		case code == d.clear:
    171 			d.width = 1 + uint(d.litWidth)
    172 			d.hi = d.eof
    173 			d.overflow = 1 << d.width
    174 			d.last = decoderInvalidCode
    175 			continue
    176 		case code == d.eof:
    177 			d.err = io.EOF
    178 			break loop
    179 		case code <= d.hi:
    180 			c, i := code, len(d.output)-1
    181 			if code == d.hi && d.last != decoderInvalidCode {
    182 				// code == hi is a special case which expands to the last expansion
    183 				// followed by the head of the last expansion. To find the head, we walk
    184 				// the prefix chain until we find a literal code.
    185 				c = d.last
    186 				for c >= d.clear {
    187 					c = d.prefix[c]
    188 				}
    189 				d.output[i] = uint8(c)
    190 				i--
    191 				c = d.last
    192 			}
    193 			// Copy the suffix chain into output and then write that to w.
    194 			for c >= d.clear {
    195 				d.output[i] = d.suffix[c]
    196 				i--
    197 				c = d.prefix[c]
    198 			}
    199 			d.output[i] = uint8(c)
    200 			d.o += copy(d.output[d.o:], d.output[i:])
    201 			if d.last != decoderInvalidCode {
    202 				// Save what the hi code expands to.
    203 				d.suffix[d.hi] = uint8(c)
    204 				d.prefix[d.hi] = d.last
    205 			}
    206 		default:
    207 			d.err = errors.New("lzw: invalid code")
    208 			break loop
    209 		}
    210 		d.last, d.hi = code, d.hi+1
    211 		if d.hi+1 >= d.overflow { // NOTE: the "+1" is where TIFF's LZW differs from the standard algorithm.
    212 			if d.width == maxWidth {
    213 				d.last = decoderInvalidCode
    214 			} else {
    215 				d.width++
    216 				d.overflow <<= 1
    217 			}
    218 		}
    219 		if d.o >= flushBuffer {
    220 			break
    221 		}
    222 	}
    223 	// Flush pending output.
    224 	d.toRead = d.output[:d.o]
    225 	d.o = 0
    226 }
    227 
    228 var errClosed = errors.New("lzw: reader/writer is closed")
    229 
    230 func (d *decoder) Close() error {
    231 	d.err = errClosed // in case any Reads come along
    232 	return nil
    233 }
    234 
    235 // NewReader creates a new io.ReadCloser.
    236 // Reads from the returned io.ReadCloser read and decompress data from r.
    237 // If r does not also implement io.ByteReader,
    238 // the decompressor may read more data than necessary from r.
    239 // It is the caller's responsibility to call Close on the ReadCloser when
    240 // finished reading.
    241 // The number of bits to use for literal codes, litWidth, must be in the
    242 // range [2,8] and is typically 8. It must equal the litWidth
    243 // used during compression.
    244 func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
    245 	d := new(decoder)
    246 	switch order {
    247 	case LSB:
    248 		d.read = (*decoder).readLSB
    249 	case MSB:
    250 		d.read = (*decoder).readMSB
    251 	default:
    252 		d.err = errors.New("lzw: unknown order")
    253 		return d
    254 	}
    255 	if litWidth < 2 || 8 < litWidth {
    256 		d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
    257 		return d
    258 	}
    259 	if br, ok := r.(io.ByteReader); ok {
    260 		d.r = br
    261 	} else {
    262 		d.r = bufio.NewReader(r)
    263 	}
    264 	d.litWidth = litWidth
    265 	d.width = 1 + uint(litWidth)
    266 	d.clear = uint16(1) << uint(litWidth)
    267 	d.eof, d.hi = d.clear+1, d.clear+1
    268 	d.overflow = uint16(1) << d.width
    269 	d.last = decoderInvalidCode
    270 
    271 	return d
    272 }