decode.go (12251B)
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package vp8 implements a decoder for the VP8 lossy image format. 6 // 7 // The VP8 specification is RFC 6386. 8 package vp8 // import "golang.org/x/image/vp8" 9 10 // This file implements the top-level decoding algorithm. 11 12 import ( 13 "errors" 14 "image" 15 "io" 16 ) 17 18 // limitReader wraps an io.Reader to read at most n bytes from it. 19 type limitReader struct { 20 r io.Reader 21 n int 22 } 23 24 // ReadFull reads exactly len(p) bytes into p. 25 func (r *limitReader) ReadFull(p []byte) error { 26 if len(p) > r.n { 27 return io.ErrUnexpectedEOF 28 } 29 n, err := io.ReadFull(r.r, p) 30 r.n -= n 31 return err 32 } 33 34 // FrameHeader is a frame header, as specified in section 9.1. 35 type FrameHeader struct { 36 KeyFrame bool 37 VersionNumber uint8 38 ShowFrame bool 39 FirstPartitionLen uint32 40 Width int 41 Height int 42 XScale uint8 43 YScale uint8 44 } 45 46 const ( 47 nSegment = 4 48 nSegmentProb = 3 49 ) 50 51 // segmentHeader holds segment-related header information. 52 type segmentHeader struct { 53 useSegment bool 54 updateMap bool 55 relativeDelta bool 56 quantizer [nSegment]int8 57 filterStrength [nSegment]int8 58 prob [nSegmentProb]uint8 59 } 60 61 const ( 62 nRefLFDelta = 4 63 nModeLFDelta = 4 64 ) 65 66 // filterHeader holds filter-related header information. 67 type filterHeader struct { 68 simple bool 69 level int8 70 sharpness uint8 71 useLFDelta bool 72 refLFDelta [nRefLFDelta]int8 73 modeLFDelta [nModeLFDelta]int8 74 perSegmentLevel [nSegment]int8 75 } 76 77 // mb is the per-macroblock decode state. A decoder maintains mbw+1 of these 78 // as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the 79 // macroblocks in the row above, and one for the macroblock to the left. 80 type mb struct { 81 // pred is the predictor mode for the 4 bottom or right 4x4 luma regions. 82 pred [4]uint8 83 // nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions, 84 // and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates 85 // that region has non-zero coefficients. 86 nzMask uint8 87 // nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and 88 // had non-zero coefficients. 89 nzY16 uint8 90 } 91 92 // Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of 93 // calling Init, DecodeFrameHeader and then DecodeFrame in that order. 94 // A Decoder can be re-used to decode multiple frames. 95 type Decoder struct { 96 // r is the input bitsream. 97 r limitReader 98 // scratch is a scratch buffer. 99 scratch [8]byte 100 // img is the YCbCr image to decode into. 101 img *image.YCbCr 102 // mbw and mbh are the number of 16x16 macroblocks wide and high the image is. 103 mbw, mbh int 104 // frameHeader is the frame header. When decoding multiple frames, 105 // frames that aren't key frames will inherit the Width, Height, 106 // XScale and YScale of the most recent key frame. 107 frameHeader FrameHeader 108 // Other headers. 109 segmentHeader segmentHeader 110 filterHeader filterHeader 111 // The image data is divided into a number of independent partitions. 112 // There is 1 "first partition" and between 1 and 8 "other partitions" 113 // for coefficient data. 114 fp partition 115 op [8]partition 116 nOP int 117 // Quantization factors. 118 quant [nSegment]quant 119 // DCT/WHT coefficient decoding probabilities. 120 tokenProb [nPlane][nBand][nContext][nProb]uint8 121 useSkipProb bool 122 skipProb uint8 123 // Loop filter parameters. 124 filterParams [nSegment][2]filterParam 125 perMBFilterParams []filterParam 126 127 // The eight fields below relate to the current macroblock being decoded. 128 // 129 // Segment-based adjustments. 130 segment int 131 // Per-macroblock state for the macroblock immediately left of and those 132 // macroblocks immediately above the current macroblock. 133 leftMB mb 134 upMB []mb 135 // Bitmasks for which 4x4 regions of coeff contain non-zero coefficients. 136 nzDCMask, nzACMask uint32 137 // Predictor modes. 138 usePredY16 bool // The libwebp C code calls this !is_i4x4_. 139 predY16 uint8 140 predC8 uint8 141 predY4 [4][4]uint8 142 143 // The two fields below form a workspace for reconstructing a macroblock. 144 // Their specific sizes are documented in reconstruct.go. 145 coeff [1*16*16 + 2*8*8 + 1*4*4]int16 146 ybr [1 + 16 + 1 + 8][32]uint8 147 } 148 149 // NewDecoder returns a new Decoder. 150 func NewDecoder() *Decoder { 151 return &Decoder{} 152 } 153 154 // Init initializes the decoder to read at most n bytes from r. 155 func (d *Decoder) Init(r io.Reader, n int) { 156 d.r = limitReader{r, n} 157 } 158 159 // DecodeFrameHeader decodes the frame header. 160 func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) { 161 // All frame headers are at least 3 bytes long. 162 b := d.scratch[:3] 163 if err = d.r.ReadFull(b); err != nil { 164 return 165 } 166 d.frameHeader.KeyFrame = (b[0] & 1) == 0 167 d.frameHeader.VersionNumber = (b[0] >> 1) & 7 168 d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1 169 d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11 170 if !d.frameHeader.KeyFrame { 171 return d.frameHeader, nil 172 } 173 // Frame headers for key frames are an additional 7 bytes long. 174 b = d.scratch[:7] 175 if err = d.r.ReadFull(b); err != nil { 176 return 177 } 178 // Check the magic sync code. 179 if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a { 180 err = errors.New("vp8: invalid format") 181 return 182 } 183 d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3]) 184 d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5]) 185 d.frameHeader.XScale = b[4] >> 6 186 d.frameHeader.YScale = b[6] >> 6 187 d.mbw = (d.frameHeader.Width + 0x0f) >> 4 188 d.mbh = (d.frameHeader.Height + 0x0f) >> 4 189 d.segmentHeader = segmentHeader{ 190 prob: [3]uint8{0xff, 0xff, 0xff}, 191 } 192 d.tokenProb = defaultTokenProb 193 d.segment = 0 194 return d.frameHeader, nil 195 } 196 197 // ensureImg ensures that d.img is large enough to hold the decoded frame. 198 func (d *Decoder) ensureImg() { 199 if d.img != nil { 200 p0, p1 := d.img.Rect.Min, d.img.Rect.Max 201 if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh { 202 return 203 } 204 } 205 m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420) 206 d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr) 207 d.perMBFilterParams = make([]filterParam, d.mbw*d.mbh) 208 d.upMB = make([]mb, d.mbw) 209 } 210 211 // parseSegmentHeader parses the segment header, as specified in section 9.3. 212 func (d *Decoder) parseSegmentHeader() { 213 d.segmentHeader.useSegment = d.fp.readBit(uniformProb) 214 if !d.segmentHeader.useSegment { 215 d.segmentHeader.updateMap = false 216 return 217 } 218 d.segmentHeader.updateMap = d.fp.readBit(uniformProb) 219 if d.fp.readBit(uniformProb) { 220 d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb) 221 for i := range d.segmentHeader.quantizer { 222 d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7)) 223 } 224 for i := range d.segmentHeader.filterStrength { 225 d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6)) 226 } 227 } 228 if !d.segmentHeader.updateMap { 229 return 230 } 231 for i := range d.segmentHeader.prob { 232 if d.fp.readBit(uniformProb) { 233 d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8)) 234 } else { 235 d.segmentHeader.prob[i] = 0xff 236 } 237 } 238 } 239 240 // parseFilterHeader parses the filter header, as specified in section 9.4. 241 func (d *Decoder) parseFilterHeader() { 242 d.filterHeader.simple = d.fp.readBit(uniformProb) 243 d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6)) 244 d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3)) 245 d.filterHeader.useLFDelta = d.fp.readBit(uniformProb) 246 if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) { 247 for i := range d.filterHeader.refLFDelta { 248 d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6)) 249 } 250 for i := range d.filterHeader.modeLFDelta { 251 d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6)) 252 } 253 } 254 if d.filterHeader.level == 0 { 255 return 256 } 257 if d.segmentHeader.useSegment { 258 for i := range d.filterHeader.perSegmentLevel { 259 strength := d.segmentHeader.filterStrength[i] 260 if d.segmentHeader.relativeDelta { 261 strength += d.filterHeader.level 262 } 263 d.filterHeader.perSegmentLevel[i] = strength 264 } 265 } else { 266 d.filterHeader.perSegmentLevel[0] = d.filterHeader.level 267 } 268 d.computeFilterParams() 269 } 270 271 // parseOtherPartitions parses the other partitions, as specified in section 9.5. 272 func (d *Decoder) parseOtherPartitions() error { 273 const maxNOP = 1 << 3 274 var partLens [maxNOP]int 275 d.nOP = 1 << d.fp.readUint(uniformProb, 2) 276 277 // The final partition length is implied by the remaining chunk data 278 // (d.r.n) and the other d.nOP-1 partition lengths. Those d.nOP-1 partition 279 // lengths are stored as 24-bit uints, i.e. up to 16 MiB per partition. 280 n := 3 * (d.nOP - 1) 281 partLens[d.nOP-1] = d.r.n - n 282 if partLens[d.nOP-1] < 0 { 283 return io.ErrUnexpectedEOF 284 } 285 if n > 0 { 286 buf := make([]byte, n) 287 if err := d.r.ReadFull(buf); err != nil { 288 return err 289 } 290 for i := 0; i < d.nOP-1; i++ { 291 pl := int(buf[3*i+0]) | int(buf[3*i+1])<<8 | int(buf[3*i+2])<<16 292 if pl > partLens[d.nOP-1] { 293 return io.ErrUnexpectedEOF 294 } 295 partLens[i] = pl 296 partLens[d.nOP-1] -= pl 297 } 298 } 299 300 // We check if the final partition length can also fit into a 24-bit uint. 301 // Strictly speaking, this isn't part of the spec, but it guards against a 302 // malicious WEBP image that is too large to ReadFull the encoded DCT 303 // coefficients into memory, whether that's because the actual WEBP file is 304 // too large, or whether its RIFF metadata lists too large a chunk. 305 if 1<<24 <= partLens[d.nOP-1] { 306 return errors.New("vp8: too much data to decode") 307 } 308 309 buf := make([]byte, d.r.n) 310 if err := d.r.ReadFull(buf); err != nil { 311 return err 312 } 313 for i, pl := range partLens { 314 if i == d.nOP { 315 break 316 } 317 d.op[i].init(buf[:pl]) 318 buf = buf[pl:] 319 } 320 return nil 321 } 322 323 // parseOtherHeaders parses header information other than the frame header. 324 func (d *Decoder) parseOtherHeaders() error { 325 // Initialize and parse the first partition. 326 firstPartition := make([]byte, d.frameHeader.FirstPartitionLen) 327 if err := d.r.ReadFull(firstPartition); err != nil { 328 return err 329 } 330 d.fp.init(firstPartition) 331 if d.frameHeader.KeyFrame { 332 // Read and ignore the color space and pixel clamp values. They are 333 // specified in section 9.2, but are unimplemented. 334 d.fp.readBit(uniformProb) 335 d.fp.readBit(uniformProb) 336 } 337 d.parseSegmentHeader() 338 d.parseFilterHeader() 339 if err := d.parseOtherPartitions(); err != nil { 340 return err 341 } 342 d.parseQuant() 343 if !d.frameHeader.KeyFrame { 344 // Golden and AltRef frames are specified in section 9.7. 345 // TODO(nigeltao): implement. Note that they are only used for video, not still images. 346 return errors.New("vp8: Golden / AltRef frames are not implemented") 347 } 348 // Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8. 349 // It applies only to video, and not still images. 350 d.fp.readBit(uniformProb) 351 d.parseTokenProb() 352 d.useSkipProb = d.fp.readBit(uniformProb) 353 if d.useSkipProb { 354 d.skipProb = uint8(d.fp.readUint(uniformProb, 8)) 355 } 356 if d.fp.unexpectedEOF { 357 return io.ErrUnexpectedEOF 358 } 359 return nil 360 } 361 362 // DecodeFrame decodes the frame and returns it as an YCbCr image. 363 // The image's contents are valid up until the next call to Decoder.Init. 364 func (d *Decoder) DecodeFrame() (*image.YCbCr, error) { 365 d.ensureImg() 366 if err := d.parseOtherHeaders(); err != nil { 367 return nil, err 368 } 369 // Reconstruct the rows. 370 for mbx := 0; mbx < d.mbw; mbx++ { 371 d.upMB[mbx] = mb{} 372 } 373 for mby := 0; mby < d.mbh; mby++ { 374 d.leftMB = mb{} 375 for mbx := 0; mbx < d.mbw; mbx++ { 376 skip := d.reconstruct(mbx, mby) 377 fs := d.filterParams[d.segment][btou(!d.usePredY16)] 378 fs.inner = fs.inner || !skip 379 d.perMBFilterParams[d.mbw*mby+mbx] = fs 380 } 381 } 382 if d.fp.unexpectedEOF { 383 return nil, io.ErrUnexpectedEOF 384 } 385 for i := 0; i < d.nOP; i++ { 386 if d.op[i].unexpectedEOF { 387 return nil, io.ErrUnexpectedEOF 388 } 389 } 390 // Apply the loop filter. 391 // 392 // Even if we are using per-segment levels, section 15 says that "loop 393 // filtering must be skipped entirely if loop_filter_level at either the 394 // frame header level or macroblock override level is 0". 395 if d.filterHeader.level != 0 { 396 if d.filterHeader.simple { 397 d.simpleFilter() 398 } else { 399 d.normalFilter() 400 } 401 } 402 return d.img, nil 403 }