reader.go (4831B)
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package zlib implements reading and writing of zlib format compressed data, 7 as specified in RFC 1950. 8 9 The implementation provides filters that uncompress during reading 10 and compress during writing. For example, to write compressed data 11 to a buffer: 12 13 var b bytes.Buffer 14 w := zlib.NewWriter(&b) 15 w.Write([]byte("hello, world\n")) 16 w.Close() 17 18 and to read that data back: 19 20 r, err := zlib.NewReader(&b) 21 io.Copy(os.Stdout, r) 22 r.Close() 23 */ 24 package zlib 25 26 import ( 27 "bufio" 28 "compress/zlib" 29 "hash" 30 "hash/adler32" 31 "io" 32 33 "github.com/klauspost/compress/flate" 34 ) 35 36 const zlibDeflate = 8 37 38 var ( 39 // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. 40 ErrChecksum = zlib.ErrChecksum 41 // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. 42 ErrDictionary = zlib.ErrDictionary 43 // ErrHeader is returned when reading ZLIB data that has an invalid header. 44 ErrHeader = zlib.ErrHeader 45 ) 46 47 type reader struct { 48 r flate.Reader 49 decompressor io.ReadCloser 50 digest hash.Hash32 51 err error 52 scratch [4]byte 53 } 54 55 // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to 56 // to switch to a new underlying Reader. This permits reusing a ReadCloser 57 // instead of allocating a new one. 58 type Resetter interface { 59 // Reset discards any buffered data and resets the Resetter as if it was 60 // newly initialized with the given reader. 61 Reset(r io.Reader, dict []byte) error 62 } 63 64 // NewReader creates a new ReadCloser. 65 // Reads from the returned ReadCloser read and decompress data from r. 66 // If r does not implement io.ByteReader, the decompressor may read more 67 // data than necessary from r. 68 // It is the caller's responsibility to call Close on the ReadCloser when done. 69 // 70 // The ReadCloser returned by NewReader also implements Resetter. 71 func NewReader(r io.Reader) (io.ReadCloser, error) { 72 return NewReaderDict(r, nil) 73 } 74 75 // NewReaderDict is like NewReader but uses a preset dictionary. 76 // NewReaderDict ignores the dictionary if the compressed data does not refer to it. 77 // If the compressed data refers to a different dictionary, NewReaderDict returns ErrDictionary. 78 // 79 // The ReadCloser returned by NewReaderDict also implements Resetter. 80 func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { 81 z := new(reader) 82 err := z.Reset(r, dict) 83 if err != nil { 84 return nil, err 85 } 86 return z, nil 87 } 88 89 func (z *reader) Read(p []byte) (int, error) { 90 if z.err != nil { 91 return 0, z.err 92 } 93 94 var n int 95 n, z.err = z.decompressor.Read(p) 96 z.digest.Write(p[0:n]) 97 if z.err != io.EOF { 98 // In the normal case we return here. 99 return n, z.err 100 } 101 102 // Finished file; check checksum. 103 if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { 104 if err == io.EOF { 105 err = io.ErrUnexpectedEOF 106 } 107 z.err = err 108 return n, z.err 109 } 110 // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). 111 checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) 112 if checksum != z.digest.Sum32() { 113 z.err = ErrChecksum 114 return n, z.err 115 } 116 return n, io.EOF 117 } 118 119 // Calling Close does not close the wrapped io.Reader originally passed to NewReader. 120 // In order for the ZLIB checksum to be verified, the reader must be 121 // fully consumed until the io.EOF. 122 func (z *reader) Close() error { 123 if z.err != nil && z.err != io.EOF { 124 return z.err 125 } 126 z.err = z.decompressor.Close() 127 return z.err 128 } 129 130 func (z *reader) Reset(r io.Reader, dict []byte) error { 131 *z = reader{decompressor: z.decompressor, digest: z.digest} 132 if fr, ok := r.(flate.Reader); ok { 133 z.r = fr 134 } else { 135 z.r = bufio.NewReader(r) 136 } 137 138 // Read the header (RFC 1950 section 2.2.). 139 _, z.err = io.ReadFull(z.r, z.scratch[0:2]) 140 if z.err != nil { 141 if z.err == io.EOF { 142 z.err = io.ErrUnexpectedEOF 143 } 144 return z.err 145 } 146 h := uint(z.scratch[0])<<8 | uint(z.scratch[1]) 147 if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) { 148 z.err = ErrHeader 149 return z.err 150 } 151 haveDict := z.scratch[1]&0x20 != 0 152 if haveDict { 153 _, z.err = io.ReadFull(z.r, z.scratch[0:4]) 154 if z.err != nil { 155 if z.err == io.EOF { 156 z.err = io.ErrUnexpectedEOF 157 } 158 return z.err 159 } 160 checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) 161 if checksum != adler32.Checksum(dict) { 162 z.err = ErrDictionary 163 return z.err 164 } 165 } 166 167 if z.decompressor == nil { 168 if haveDict { 169 z.decompressor = flate.NewReaderDict(z.r, dict) 170 } else { 171 z.decompressor = flate.NewReader(z.r) 172 } 173 } else { 174 z.decompressor.(flate.Resetter).Reset(z.r, dict) 175 } 176 177 if z.digest != nil { 178 z.digest.Reset() 179 } else { 180 z.digest = adler32.New() 181 } 182 return nil 183 }