gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

splitter.go (10255B)


      1 package jpegstructure
      2 
      3 import (
      4 	"bufio"
      5 	"bytes"
      6 	"io"
      7 
      8 	"encoding/binary"
      9 
     10 	"github.com/dsoprea/go-logging"
     11 )
     12 
     13 // JpegSplitter uses the Go stream splitter to divide the JPEG stream into
     14 // segments.
     15 type JpegSplitter struct {
     16 	lastMarkerId   byte
     17 	lastMarkerName string
     18 	counter        int
     19 	lastIsScanData bool
     20 	visitor        interface{}
     21 
     22 	currentOffset int
     23 	segments      *SegmentList
     24 
     25 	scandataOffset int
     26 }
     27 
     28 // NewJpegSplitter returns a new JpegSplitter.
     29 func NewJpegSplitter(visitor interface{}) *JpegSplitter {
     30 	return &JpegSplitter{
     31 		segments: NewSegmentList(nil),
     32 		visitor:  visitor,
     33 	}
     34 }
     35 
     36 // Segments returns all found segments.
     37 func (js *JpegSplitter) Segments() *SegmentList {
     38 	return js.segments
     39 }
     40 
     41 // MarkerId returns the ID of the last processed marker.
     42 func (js *JpegSplitter) MarkerId() byte {
     43 	return js.lastMarkerId
     44 }
     45 
     46 // MarkerName returns the name of the last-processed marker.
     47 func (js *JpegSplitter) MarkerName() string {
     48 	return js.lastMarkerName
     49 }
     50 
     51 // Counter returns the number of processed segments.
     52 func (js *JpegSplitter) Counter() int {
     53 	return js.counter
     54 }
     55 
     56 // IsScanData returns whether the last processed segment was scan-data.
     57 func (js *JpegSplitter) IsScanData() bool {
     58 	return js.lastIsScanData
     59 }
     60 
     61 func (js *JpegSplitter) processScanData(data []byte) (advanceBytes int, err error) {
     62 	defer func() {
     63 		if state := recover(); state != nil {
     64 			err = log.Wrap(state.(error))
     65 		}
     66 	}()
     67 
     68 	// Search through the segment, past all 0xff's therein, until we encounter
     69 	// the EOI segment.
     70 
     71 	dataLength := -1
     72 	for i := js.scandataOffset; i < len(data); i++ {
     73 		thisByte := data[i]
     74 
     75 		if i == 0 {
     76 			continue
     77 		}
     78 
     79 		lastByte := data[i-1]
     80 		if lastByte != 0xff {
     81 			continue
     82 		}
     83 
     84 		if thisByte == 0x00 || thisByte >= 0xd0 && thisByte <= 0xd8 {
     85 			continue
     86 		}
     87 
     88 		// After all of the other checks, this means that we're on the EOF
     89 		// segment.
     90 		if thisByte != MARKER_EOI {
     91 			continue
     92 		}
     93 
     94 		dataLength = i - 1
     95 		break
     96 	}
     97 
     98 	if dataLength == -1 {
     99 		// On the next pass, start on the last byte of this pass, just in case
    100 		// the first byte of the two-byte sequence is here.
    101 		js.scandataOffset = len(data) - 1
    102 
    103 		jpegLogger.Debugf(nil, "Scan-data not fully available (%d).", len(data))
    104 		return 0, nil
    105 	}
    106 
    107 	js.lastIsScanData = true
    108 	js.lastMarkerId = 0
    109 	js.lastMarkerName = ""
    110 
    111 	// Note that we don't increment the counter since this isn't an actual
    112 	// segment.
    113 
    114 	jpegLogger.Debugf(nil, "End of scan-data.")
    115 
    116 	err = js.handleSegment(0x0, "!SCANDATA", 0x0, data[:dataLength])
    117 	log.PanicIf(err)
    118 
    119 	return dataLength, nil
    120 }
    121 
    122 func (js *JpegSplitter) readSegment(data []byte) (count int, err error) {
    123 	defer func() {
    124 		if state := recover(); state != nil {
    125 			err = log.Wrap(state.(error))
    126 		}
    127 	}()
    128 
    129 	if js.counter == 0 {
    130 		// Verify magic bytes.
    131 
    132 		if len(data) < 3 {
    133 			jpegLogger.Debugf(nil, "Not enough (1)")
    134 			return 0, nil
    135 		}
    136 
    137 		if data[0] == jpegMagic2000[0] && data[1] == jpegMagic2000[1] && data[2] == jpegMagic2000[2] {
    138 			// TODO(dustin): Revisit JPEG2000 support.
    139 			log.Panicf("JPEG2000 not supported")
    140 		}
    141 
    142 		if data[0] != jpegMagicStandard[0] || data[1] != jpegMagicStandard[1] || data[2] != jpegMagicStandard[2] {
    143 			log.Panicf("file does not look like a JPEG: (%02x) (%02x) (%02x)", data[0], data[1], data[2])
    144 		}
    145 	}
    146 
    147 	chunkLength := len(data)
    148 
    149 	jpegLogger.Debugf(nil, "SPLIT: LEN=(%d) COUNTER=(%d)", chunkLength, js.counter)
    150 
    151 	if js.scanDataIsNext() == true {
    152 		// If the last segment was the SOS, we're currently sitting on scan data.
    153 		// Search for the EOI marker afterward in order to know how much data
    154 		// there is. Return this as its own token.
    155 		//
    156 		// REF: https://stackoverflow.com/questions/26715684/parsing-jpeg-sos-marker
    157 
    158 		advanceBytes, err := js.processScanData(data)
    159 		log.PanicIf(err)
    160 
    161 		// This will either return 0 and implicitly request that we need more
    162 		// data and then need to run again or will return an actual byte count
    163 		// to progress by.
    164 
    165 		return advanceBytes, nil
    166 	} else if js.lastMarkerId == MARKER_EOI {
    167 		// We have more data following the EOI, which is unexpected. There
    168 		// might be non-standard cruft at the end of the file. Terminate the
    169 		// parse because the file-structure is, technically, complete at this
    170 		// point.
    171 
    172 		return 0, io.EOF
    173 	} else {
    174 		js.lastIsScanData = false
    175 	}
    176 
    177 	// If we're here, we're supposed to be sitting on the 0xff bytes at the
    178 	// beginning of a segment (just before the marker).
    179 
    180 	if data[0] != 0xff {
    181 		log.Panicf("not on new segment marker @ (%d): (%02X)", js.currentOffset, data[0])
    182 	}
    183 
    184 	i := 0
    185 	found := false
    186 	for ; i < chunkLength; i++ {
    187 		jpegLogger.Debugf(nil, "Prefix check: (%d) %02X", i, data[i])
    188 
    189 		if data[i] != 0xff {
    190 			found = true
    191 			break
    192 		}
    193 	}
    194 
    195 	jpegLogger.Debugf(nil, "Skipped over leading 0xFF bytes: (%d)", i)
    196 
    197 	if found == false || i >= chunkLength {
    198 		jpegLogger.Debugf(nil, "Not enough (3)")
    199 		return 0, nil
    200 	}
    201 
    202 	markerId := data[i]
    203 
    204 	js.lastMarkerName = markerNames[markerId]
    205 
    206 	sizeLen, found := markerLen[markerId]
    207 	jpegLogger.Debugf(nil, "MARKER-ID=%x SIZELEN=%v FOUND=%v", markerId, sizeLen, found)
    208 
    209 	i++
    210 
    211 	b := bytes.NewBuffer(data[i:])
    212 	payloadLength := 0
    213 
    214 	// marker-ID + size => 2 + <dynamic>
    215 	headerSize := 2 + sizeLen
    216 
    217 	if found == false {
    218 
    219 		// It's not one of the static-length markers. Read the length.
    220 		//
    221 		// The length is an unsigned 16-bit network/big-endian.
    222 
    223 		// marker-ID + size => 2 + 2
    224 		headerSize = 2 + 2
    225 
    226 		if i+2 >= chunkLength {
    227 			jpegLogger.Debugf(nil, "Not enough (4)")
    228 			return 0, nil
    229 		}
    230 
    231 		l := uint16(0)
    232 		err = binary.Read(b, binary.BigEndian, &l)
    233 		log.PanicIf(err)
    234 
    235 		if l < 2 {
    236 			log.Panicf("length of size read for non-special marker (%02x) is unexpectedly less than two.", markerId)
    237 		}
    238 
    239 		// (l includes the bytes of the length itself.)
    240 		payloadLength = int(l) - 2
    241 		jpegLogger.Debugf(nil, "DataLength (dynamically-sized segment): (%d)", payloadLength)
    242 
    243 		i += 2
    244 	} else if sizeLen > 0 {
    245 
    246 		// Accommodates the non-zero markers in our marker index, which only
    247 		// represent J2C extensions.
    248 		//
    249 		// The length is an unsigned 32-bit network/big-endian.
    250 
    251 		// TODO(dustin): !! This needs to be tested, but we need an image.
    252 
    253 		if sizeLen != 4 {
    254 			log.Panicf("known non-zero marker is not four bytes, which is not currently handled: M=(%x)", markerId)
    255 		}
    256 
    257 		if i+4 >= chunkLength {
    258 			jpegLogger.Debugf(nil, "Not enough (5)")
    259 			return 0, nil
    260 		}
    261 
    262 		l := uint32(0)
    263 		err = binary.Read(b, binary.BigEndian, &l)
    264 		log.PanicIf(err)
    265 
    266 		payloadLength = int(l) - 4
    267 		jpegLogger.Debugf(nil, "DataLength (four-byte-length segment): (%u)", l)
    268 
    269 		i += 4
    270 	}
    271 
    272 	jpegLogger.Debugf(nil, "PAYLOAD-LENGTH: %d", payloadLength)
    273 
    274 	payload := data[i:]
    275 
    276 	if payloadLength < 0 {
    277 		log.Panicf("payload length less than zero: (%d)", payloadLength)
    278 	}
    279 
    280 	i += int(payloadLength)
    281 
    282 	if i > chunkLength {
    283 		jpegLogger.Debugf(nil, "Not enough (6)")
    284 		return 0, nil
    285 	}
    286 
    287 	jpegLogger.Debugf(nil, "Found whole segment.")
    288 
    289 	js.lastMarkerId = markerId
    290 
    291 	payloadWindow := payload[:payloadLength]
    292 	err = js.handleSegment(markerId, js.lastMarkerName, headerSize, payloadWindow)
    293 	log.PanicIf(err)
    294 
    295 	js.counter++
    296 
    297 	jpegLogger.Debugf(nil, "Returning advance of (%d)", i)
    298 
    299 	return i, nil
    300 }
    301 
    302 func (js *JpegSplitter) scanDataIsNext() bool {
    303 	return js.lastMarkerId == MARKER_SOS
    304 }
    305 
    306 // Split is the base splitting function that satisfies `bufio.SplitFunc`.
    307 func (js *JpegSplitter) Split(data []byte, atEOF bool) (advance int, token []byte, err error) {
    308 	defer func() {
    309 		if state := recover(); state != nil {
    310 			err = log.Wrap(state.(error))
    311 		}
    312 	}()
    313 
    314 	for len(data) > 0 {
    315 		currentAdvance, err := js.readSegment(data)
    316 		if err != nil {
    317 			if err == io.EOF {
    318 				// We've encountered an EOI marker.
    319 				return 0, nil, err
    320 			}
    321 
    322 			log.Panic(err)
    323 		}
    324 
    325 		if currentAdvance == 0 {
    326 			if len(data) > 0 && atEOF == true {
    327 				// Provide a little context in the error message.
    328 
    329 				if js.scanDataIsNext() == true {
    330 					// Yes, we've ran into this.
    331 
    332 					log.Panicf("scan-data is unbounded; EOI not encountered before EOF")
    333 				} else {
    334 					log.Panicf("partial segment data encountered before scan-data")
    335 				}
    336 			}
    337 
    338 			// We don't have enough data for another segment.
    339 			break
    340 		}
    341 
    342 		data = data[currentAdvance:]
    343 		advance += currentAdvance
    344 	}
    345 
    346 	return advance, nil, nil
    347 }
    348 
    349 func (js *JpegSplitter) parseSof(data []byte) (sof *SofSegment, err error) {
    350 	defer func() {
    351 		if state := recover(); state != nil {
    352 			err = log.Wrap(state.(error))
    353 		}
    354 	}()
    355 
    356 	stream := bytes.NewBuffer(data)
    357 	buffer := bufio.NewReader(stream)
    358 
    359 	bitsPerSample, err := buffer.ReadByte()
    360 	log.PanicIf(err)
    361 
    362 	height := uint16(0)
    363 	err = binary.Read(buffer, binary.BigEndian, &height)
    364 	log.PanicIf(err)
    365 
    366 	width := uint16(0)
    367 	err = binary.Read(buffer, binary.BigEndian, &width)
    368 	log.PanicIf(err)
    369 
    370 	componentCount, err := buffer.ReadByte()
    371 	log.PanicIf(err)
    372 
    373 	sof = &SofSegment{
    374 		BitsPerSample:  bitsPerSample,
    375 		Width:          width,
    376 		Height:         height,
    377 		ComponentCount: componentCount,
    378 	}
    379 
    380 	return sof, nil
    381 }
    382 
    383 func (js *JpegSplitter) parseAppData(markerId byte, data []byte) (err error) {
    384 	defer func() {
    385 		if state := recover(); state != nil {
    386 			err = log.Wrap(state.(error))
    387 		}
    388 	}()
    389 
    390 	return nil
    391 }
    392 
    393 func (js *JpegSplitter) handleSegment(markerId byte, markerName string, headerSize int, payload []byte) (err error) {
    394 	defer func() {
    395 		if state := recover(); state != nil {
    396 			err = log.Wrap(state.(error))
    397 		}
    398 	}()
    399 
    400 	cloned := make([]byte, len(payload))
    401 	copy(cloned, payload)
    402 
    403 	s := &Segment{
    404 		MarkerId:   markerId,
    405 		MarkerName: markerName,
    406 		Offset:     js.currentOffset,
    407 		Data:       cloned,
    408 	}
    409 
    410 	jpegLogger.Debugf(nil, "Encountered marker (0x%02x) [%s] at offset (%d)", markerId, markerName, js.currentOffset)
    411 
    412 	js.currentOffset += headerSize + len(payload)
    413 
    414 	js.segments.Add(s)
    415 
    416 	sv, ok := js.visitor.(SegmentVisitor)
    417 	if ok == true {
    418 		err = sv.HandleSegment(js.lastMarkerId, js.lastMarkerName, js.counter, js.lastIsScanData)
    419 		log.PanicIf(err)
    420 	}
    421 
    422 	if markerId >= MARKER_SOF0 && markerId <= MARKER_SOF15 {
    423 		ssv, ok := js.visitor.(SofSegmentVisitor)
    424 		if ok == true {
    425 			sof, err := js.parseSof(payload)
    426 			log.PanicIf(err)
    427 
    428 			err = ssv.HandleSof(sof)
    429 			log.PanicIf(err)
    430 		}
    431 	} else if markerId >= MARKER_APP0 && markerId <= MARKER_APP15 {
    432 		err := js.parseAppData(markerId, payload)
    433 		log.PanicIf(err)
    434 	}
    435 
    436 	return nil
    437 }