gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

decode.go (12251B)


      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package vp8 implements a decoder for the VP8 lossy image format.
      6 //
      7 // The VP8 specification is RFC 6386.
      8 package vp8 // import "golang.org/x/image/vp8"
      9 
     10 // This file implements the top-level decoding algorithm.
     11 
     12 import (
     13 	"errors"
     14 	"image"
     15 	"io"
     16 )
     17 
     18 // limitReader wraps an io.Reader to read at most n bytes from it.
     19 type limitReader struct {
     20 	r io.Reader
     21 	n int
     22 }
     23 
     24 // ReadFull reads exactly len(p) bytes into p.
     25 func (r *limitReader) ReadFull(p []byte) error {
     26 	if len(p) > r.n {
     27 		return io.ErrUnexpectedEOF
     28 	}
     29 	n, err := io.ReadFull(r.r, p)
     30 	r.n -= n
     31 	return err
     32 }
     33 
     34 // FrameHeader is a frame header, as specified in section 9.1.
     35 type FrameHeader struct {
     36 	KeyFrame          bool
     37 	VersionNumber     uint8
     38 	ShowFrame         bool
     39 	FirstPartitionLen uint32
     40 	Width             int
     41 	Height            int
     42 	XScale            uint8
     43 	YScale            uint8
     44 }
     45 
     46 const (
     47 	nSegment     = 4
     48 	nSegmentProb = 3
     49 )
     50 
     51 // segmentHeader holds segment-related header information.
     52 type segmentHeader struct {
     53 	useSegment     bool
     54 	updateMap      bool
     55 	relativeDelta  bool
     56 	quantizer      [nSegment]int8
     57 	filterStrength [nSegment]int8
     58 	prob           [nSegmentProb]uint8
     59 }
     60 
     61 const (
     62 	nRefLFDelta  = 4
     63 	nModeLFDelta = 4
     64 )
     65 
     66 // filterHeader holds filter-related header information.
     67 type filterHeader struct {
     68 	simple          bool
     69 	level           int8
     70 	sharpness       uint8
     71 	useLFDelta      bool
     72 	refLFDelta      [nRefLFDelta]int8
     73 	modeLFDelta     [nModeLFDelta]int8
     74 	perSegmentLevel [nSegment]int8
     75 }
     76 
     77 // mb is the per-macroblock decode state. A decoder maintains mbw+1 of these
     78 // as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the
     79 // macroblocks in the row above, and one for the macroblock to the left.
     80 type mb struct {
     81 	// pred is the predictor mode for the 4 bottom or right 4x4 luma regions.
     82 	pred [4]uint8
     83 	// nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions,
     84 	// and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates
     85 	// that region has non-zero coefficients.
     86 	nzMask uint8
     87 	// nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and
     88 	// had non-zero coefficients.
     89 	nzY16 uint8
     90 }
     91 
     92 // Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of
     93 // calling Init, DecodeFrameHeader and then DecodeFrame in that order.
     94 // A Decoder can be re-used to decode multiple frames.
     95 type Decoder struct {
     96 	// r is the input bitsream.
     97 	r limitReader
     98 	// scratch is a scratch buffer.
     99 	scratch [8]byte
    100 	// img is the YCbCr image to decode into.
    101 	img *image.YCbCr
    102 	// mbw and mbh are the number of 16x16 macroblocks wide and high the image is.
    103 	mbw, mbh int
    104 	// frameHeader is the frame header. When decoding multiple frames,
    105 	// frames that aren't key frames will inherit the Width, Height,
    106 	// XScale and YScale of the most recent key frame.
    107 	frameHeader FrameHeader
    108 	// Other headers.
    109 	segmentHeader segmentHeader
    110 	filterHeader  filterHeader
    111 	// The image data is divided into a number of independent partitions.
    112 	// There is 1 "first partition" and between 1 and 8 "other partitions"
    113 	// for coefficient data.
    114 	fp  partition
    115 	op  [8]partition
    116 	nOP int
    117 	// Quantization factors.
    118 	quant [nSegment]quant
    119 	// DCT/WHT coefficient decoding probabilities.
    120 	tokenProb   [nPlane][nBand][nContext][nProb]uint8
    121 	useSkipProb bool
    122 	skipProb    uint8
    123 	// Loop filter parameters.
    124 	filterParams      [nSegment][2]filterParam
    125 	perMBFilterParams []filterParam
    126 
    127 	// The eight fields below relate to the current macroblock being decoded.
    128 	//
    129 	// Segment-based adjustments.
    130 	segment int
    131 	// Per-macroblock state for the macroblock immediately left of and those
    132 	// macroblocks immediately above the current macroblock.
    133 	leftMB mb
    134 	upMB   []mb
    135 	// Bitmasks for which 4x4 regions of coeff contain non-zero coefficients.
    136 	nzDCMask, nzACMask uint32
    137 	// Predictor modes.
    138 	usePredY16 bool // The libwebp C code calls this !is_i4x4_.
    139 	predY16    uint8
    140 	predC8     uint8
    141 	predY4     [4][4]uint8
    142 
    143 	// The two fields below form a workspace for reconstructing a macroblock.
    144 	// Their specific sizes are documented in reconstruct.go.
    145 	coeff [1*16*16 + 2*8*8 + 1*4*4]int16
    146 	ybr   [1 + 16 + 1 + 8][32]uint8
    147 }
    148 
    149 // NewDecoder returns a new Decoder.
    150 func NewDecoder() *Decoder {
    151 	return &Decoder{}
    152 }
    153 
    154 // Init initializes the decoder to read at most n bytes from r.
    155 func (d *Decoder) Init(r io.Reader, n int) {
    156 	d.r = limitReader{r, n}
    157 }
    158 
    159 // DecodeFrameHeader decodes the frame header.
    160 func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) {
    161 	// All frame headers are at least 3 bytes long.
    162 	b := d.scratch[:3]
    163 	if err = d.r.ReadFull(b); err != nil {
    164 		return
    165 	}
    166 	d.frameHeader.KeyFrame = (b[0] & 1) == 0
    167 	d.frameHeader.VersionNumber = (b[0] >> 1) & 7
    168 	d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1
    169 	d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11
    170 	if !d.frameHeader.KeyFrame {
    171 		return d.frameHeader, nil
    172 	}
    173 	// Frame headers for key frames are an additional 7 bytes long.
    174 	b = d.scratch[:7]
    175 	if err = d.r.ReadFull(b); err != nil {
    176 		return
    177 	}
    178 	// Check the magic sync code.
    179 	if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a {
    180 		err = errors.New("vp8: invalid format")
    181 		return
    182 	}
    183 	d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3])
    184 	d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5])
    185 	d.frameHeader.XScale = b[4] >> 6
    186 	d.frameHeader.YScale = b[6] >> 6
    187 	d.mbw = (d.frameHeader.Width + 0x0f) >> 4
    188 	d.mbh = (d.frameHeader.Height + 0x0f) >> 4
    189 	d.segmentHeader = segmentHeader{
    190 		prob: [3]uint8{0xff, 0xff, 0xff},
    191 	}
    192 	d.tokenProb = defaultTokenProb
    193 	d.segment = 0
    194 	return d.frameHeader, nil
    195 }
    196 
    197 // ensureImg ensures that d.img is large enough to hold the decoded frame.
    198 func (d *Decoder) ensureImg() {
    199 	if d.img != nil {
    200 		p0, p1 := d.img.Rect.Min, d.img.Rect.Max
    201 		if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh {
    202 			return
    203 		}
    204 	}
    205 	m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420)
    206 	d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr)
    207 	d.perMBFilterParams = make([]filterParam, d.mbw*d.mbh)
    208 	d.upMB = make([]mb, d.mbw)
    209 }
    210 
    211 // parseSegmentHeader parses the segment header, as specified in section 9.3.
    212 func (d *Decoder) parseSegmentHeader() {
    213 	d.segmentHeader.useSegment = d.fp.readBit(uniformProb)
    214 	if !d.segmentHeader.useSegment {
    215 		d.segmentHeader.updateMap = false
    216 		return
    217 	}
    218 	d.segmentHeader.updateMap = d.fp.readBit(uniformProb)
    219 	if d.fp.readBit(uniformProb) {
    220 		d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb)
    221 		for i := range d.segmentHeader.quantizer {
    222 			d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7))
    223 		}
    224 		for i := range d.segmentHeader.filterStrength {
    225 			d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
    226 		}
    227 	}
    228 	if !d.segmentHeader.updateMap {
    229 		return
    230 	}
    231 	for i := range d.segmentHeader.prob {
    232 		if d.fp.readBit(uniformProb) {
    233 			d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8))
    234 		} else {
    235 			d.segmentHeader.prob[i] = 0xff
    236 		}
    237 	}
    238 }
    239 
    240 // parseFilterHeader parses the filter header, as specified in section 9.4.
    241 func (d *Decoder) parseFilterHeader() {
    242 	d.filterHeader.simple = d.fp.readBit(uniformProb)
    243 	d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6))
    244 	d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3))
    245 	d.filterHeader.useLFDelta = d.fp.readBit(uniformProb)
    246 	if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) {
    247 		for i := range d.filterHeader.refLFDelta {
    248 			d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
    249 		}
    250 		for i := range d.filterHeader.modeLFDelta {
    251 			d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
    252 		}
    253 	}
    254 	if d.filterHeader.level == 0 {
    255 		return
    256 	}
    257 	if d.segmentHeader.useSegment {
    258 		for i := range d.filterHeader.perSegmentLevel {
    259 			strength := d.segmentHeader.filterStrength[i]
    260 			if d.segmentHeader.relativeDelta {
    261 				strength += d.filterHeader.level
    262 			}
    263 			d.filterHeader.perSegmentLevel[i] = strength
    264 		}
    265 	} else {
    266 		d.filterHeader.perSegmentLevel[0] = d.filterHeader.level
    267 	}
    268 	d.computeFilterParams()
    269 }
    270 
    271 // parseOtherPartitions parses the other partitions, as specified in section 9.5.
    272 func (d *Decoder) parseOtherPartitions() error {
    273 	const maxNOP = 1 << 3
    274 	var partLens [maxNOP]int
    275 	d.nOP = 1 << d.fp.readUint(uniformProb, 2)
    276 
    277 	// The final partition length is implied by the remaining chunk data
    278 	// (d.r.n) and the other d.nOP-1 partition lengths. Those d.nOP-1 partition
    279 	// lengths are stored as 24-bit uints, i.e. up to 16 MiB per partition.
    280 	n := 3 * (d.nOP - 1)
    281 	partLens[d.nOP-1] = d.r.n - n
    282 	if partLens[d.nOP-1] < 0 {
    283 		return io.ErrUnexpectedEOF
    284 	}
    285 	if n > 0 {
    286 		buf := make([]byte, n)
    287 		if err := d.r.ReadFull(buf); err != nil {
    288 			return err
    289 		}
    290 		for i := 0; i < d.nOP-1; i++ {
    291 			pl := int(buf[3*i+0]) | int(buf[3*i+1])<<8 | int(buf[3*i+2])<<16
    292 			if pl > partLens[d.nOP-1] {
    293 				return io.ErrUnexpectedEOF
    294 			}
    295 			partLens[i] = pl
    296 			partLens[d.nOP-1] -= pl
    297 		}
    298 	}
    299 
    300 	// We check if the final partition length can also fit into a 24-bit uint.
    301 	// Strictly speaking, this isn't part of the spec, but it guards against a
    302 	// malicious WEBP image that is too large to ReadFull the encoded DCT
    303 	// coefficients into memory, whether that's because the actual WEBP file is
    304 	// too large, or whether its RIFF metadata lists too large a chunk.
    305 	if 1<<24 <= partLens[d.nOP-1] {
    306 		return errors.New("vp8: too much data to decode")
    307 	}
    308 
    309 	buf := make([]byte, d.r.n)
    310 	if err := d.r.ReadFull(buf); err != nil {
    311 		return err
    312 	}
    313 	for i, pl := range partLens {
    314 		if i == d.nOP {
    315 			break
    316 		}
    317 		d.op[i].init(buf[:pl])
    318 		buf = buf[pl:]
    319 	}
    320 	return nil
    321 }
    322 
    323 // parseOtherHeaders parses header information other than the frame header.
    324 func (d *Decoder) parseOtherHeaders() error {
    325 	// Initialize and parse the first partition.
    326 	firstPartition := make([]byte, d.frameHeader.FirstPartitionLen)
    327 	if err := d.r.ReadFull(firstPartition); err != nil {
    328 		return err
    329 	}
    330 	d.fp.init(firstPartition)
    331 	if d.frameHeader.KeyFrame {
    332 		// Read and ignore the color space and pixel clamp values. They are
    333 		// specified in section 9.2, but are unimplemented.
    334 		d.fp.readBit(uniformProb)
    335 		d.fp.readBit(uniformProb)
    336 	}
    337 	d.parseSegmentHeader()
    338 	d.parseFilterHeader()
    339 	if err := d.parseOtherPartitions(); err != nil {
    340 		return err
    341 	}
    342 	d.parseQuant()
    343 	if !d.frameHeader.KeyFrame {
    344 		// Golden and AltRef frames are specified in section 9.7.
    345 		// TODO(nigeltao): implement. Note that they are only used for video, not still images.
    346 		return errors.New("vp8: Golden / AltRef frames are not implemented")
    347 	}
    348 	// Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8.
    349 	// It applies only to video, and not still images.
    350 	d.fp.readBit(uniformProb)
    351 	d.parseTokenProb()
    352 	d.useSkipProb = d.fp.readBit(uniformProb)
    353 	if d.useSkipProb {
    354 		d.skipProb = uint8(d.fp.readUint(uniformProb, 8))
    355 	}
    356 	if d.fp.unexpectedEOF {
    357 		return io.ErrUnexpectedEOF
    358 	}
    359 	return nil
    360 }
    361 
    362 // DecodeFrame decodes the frame and returns it as an YCbCr image.
    363 // The image's contents are valid up until the next call to Decoder.Init.
    364 func (d *Decoder) DecodeFrame() (*image.YCbCr, error) {
    365 	d.ensureImg()
    366 	if err := d.parseOtherHeaders(); err != nil {
    367 		return nil, err
    368 	}
    369 	// Reconstruct the rows.
    370 	for mbx := 0; mbx < d.mbw; mbx++ {
    371 		d.upMB[mbx] = mb{}
    372 	}
    373 	for mby := 0; mby < d.mbh; mby++ {
    374 		d.leftMB = mb{}
    375 		for mbx := 0; mbx < d.mbw; mbx++ {
    376 			skip := d.reconstruct(mbx, mby)
    377 			fs := d.filterParams[d.segment][btou(!d.usePredY16)]
    378 			fs.inner = fs.inner || !skip
    379 			d.perMBFilterParams[d.mbw*mby+mbx] = fs
    380 		}
    381 	}
    382 	if d.fp.unexpectedEOF {
    383 		return nil, io.ErrUnexpectedEOF
    384 	}
    385 	for i := 0; i < d.nOP; i++ {
    386 		if d.op[i].unexpectedEOF {
    387 			return nil, io.ErrUnexpectedEOF
    388 		}
    389 	}
    390 	// Apply the loop filter.
    391 	//
    392 	// Even if we are using per-segment levels, section 15 says that "loop
    393 	// filtering must be skipped entirely if loop_filter_level at either the
    394 	// frame header level or macroblock override level is 0".
    395 	if d.filterHeader.level != 0 {
    396 		if d.filterHeader.simple {
    397 			d.simpleFilter()
    398 		} else {
    399 			d.normalFilter()
    400 		}
    401 	}
    402 	return d.img, nil
    403 }