splitter.go (10255B)
1 package jpegstructure 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 8 "encoding/binary" 9 10 "github.com/dsoprea/go-logging" 11 ) 12 13 // JpegSplitter uses the Go stream splitter to divide the JPEG stream into 14 // segments. 15 type JpegSplitter struct { 16 lastMarkerId byte 17 lastMarkerName string 18 counter int 19 lastIsScanData bool 20 visitor interface{} 21 22 currentOffset int 23 segments *SegmentList 24 25 scandataOffset int 26 } 27 28 // NewJpegSplitter returns a new JpegSplitter. 29 func NewJpegSplitter(visitor interface{}) *JpegSplitter { 30 return &JpegSplitter{ 31 segments: NewSegmentList(nil), 32 visitor: visitor, 33 } 34 } 35 36 // Segments returns all found segments. 37 func (js *JpegSplitter) Segments() *SegmentList { 38 return js.segments 39 } 40 41 // MarkerId returns the ID of the last processed marker. 42 func (js *JpegSplitter) MarkerId() byte { 43 return js.lastMarkerId 44 } 45 46 // MarkerName returns the name of the last-processed marker. 47 func (js *JpegSplitter) MarkerName() string { 48 return js.lastMarkerName 49 } 50 51 // Counter returns the number of processed segments. 52 func (js *JpegSplitter) Counter() int { 53 return js.counter 54 } 55 56 // IsScanData returns whether the last processed segment was scan-data. 57 func (js *JpegSplitter) IsScanData() bool { 58 return js.lastIsScanData 59 } 60 61 func (js *JpegSplitter) processScanData(data []byte) (advanceBytes int, err error) { 62 defer func() { 63 if state := recover(); state != nil { 64 err = log.Wrap(state.(error)) 65 } 66 }() 67 68 // Search through the segment, past all 0xff's therein, until we encounter 69 // the EOI segment. 70 71 dataLength := -1 72 for i := js.scandataOffset; i < len(data); i++ { 73 thisByte := data[i] 74 75 if i == 0 { 76 continue 77 } 78 79 lastByte := data[i-1] 80 if lastByte != 0xff { 81 continue 82 } 83 84 if thisByte == 0x00 || thisByte >= 0xd0 && thisByte <= 0xd8 { 85 continue 86 } 87 88 // After all of the other checks, this means that we're on the EOF 89 // segment. 90 if thisByte != MARKER_EOI { 91 continue 92 } 93 94 dataLength = i - 1 95 break 96 } 97 98 if dataLength == -1 { 99 // On the next pass, start on the last byte of this pass, just in case 100 // the first byte of the two-byte sequence is here. 101 js.scandataOffset = len(data) - 1 102 103 jpegLogger.Debugf(nil, "Scan-data not fully available (%d).", len(data)) 104 return 0, nil 105 } 106 107 js.lastIsScanData = true 108 js.lastMarkerId = 0 109 js.lastMarkerName = "" 110 111 // Note that we don't increment the counter since this isn't an actual 112 // segment. 113 114 jpegLogger.Debugf(nil, "End of scan-data.") 115 116 err = js.handleSegment(0x0, "!SCANDATA", 0x0, data[:dataLength]) 117 log.PanicIf(err) 118 119 return dataLength, nil 120 } 121 122 func (js *JpegSplitter) readSegment(data []byte) (count int, err error) { 123 defer func() { 124 if state := recover(); state != nil { 125 err = log.Wrap(state.(error)) 126 } 127 }() 128 129 if js.counter == 0 { 130 // Verify magic bytes. 131 132 if len(data) < 3 { 133 jpegLogger.Debugf(nil, "Not enough (1)") 134 return 0, nil 135 } 136 137 if data[0] == jpegMagic2000[0] && data[1] == jpegMagic2000[1] && data[2] == jpegMagic2000[2] { 138 // TODO(dustin): Revisit JPEG2000 support. 139 log.Panicf("JPEG2000 not supported") 140 } 141 142 if data[0] != jpegMagicStandard[0] || data[1] != jpegMagicStandard[1] || data[2] != jpegMagicStandard[2] { 143 log.Panicf("file does not look like a JPEG: (%02x) (%02x) (%02x)", data[0], data[1], data[2]) 144 } 145 } 146 147 chunkLength := len(data) 148 149 jpegLogger.Debugf(nil, "SPLIT: LEN=(%d) COUNTER=(%d)", chunkLength, js.counter) 150 151 if js.scanDataIsNext() == true { 152 // If the last segment was the SOS, we're currently sitting on scan data. 153 // Search for the EOI marker afterward in order to know how much data 154 // there is. Return this as its own token. 155 // 156 // REF: https://stackoverflow.com/questions/26715684/parsing-jpeg-sos-marker 157 158 advanceBytes, err := js.processScanData(data) 159 log.PanicIf(err) 160 161 // This will either return 0 and implicitly request that we need more 162 // data and then need to run again or will return an actual byte count 163 // to progress by. 164 165 return advanceBytes, nil 166 } else if js.lastMarkerId == MARKER_EOI { 167 // We have more data following the EOI, which is unexpected. There 168 // might be non-standard cruft at the end of the file. Terminate the 169 // parse because the file-structure is, technically, complete at this 170 // point. 171 172 return 0, io.EOF 173 } else { 174 js.lastIsScanData = false 175 } 176 177 // If we're here, we're supposed to be sitting on the 0xff bytes at the 178 // beginning of a segment (just before the marker). 179 180 if data[0] != 0xff { 181 log.Panicf("not on new segment marker @ (%d): (%02X)", js.currentOffset, data[0]) 182 } 183 184 i := 0 185 found := false 186 for ; i < chunkLength; i++ { 187 jpegLogger.Debugf(nil, "Prefix check: (%d) %02X", i, data[i]) 188 189 if data[i] != 0xff { 190 found = true 191 break 192 } 193 } 194 195 jpegLogger.Debugf(nil, "Skipped over leading 0xFF bytes: (%d)", i) 196 197 if found == false || i >= chunkLength { 198 jpegLogger.Debugf(nil, "Not enough (3)") 199 return 0, nil 200 } 201 202 markerId := data[i] 203 204 js.lastMarkerName = markerNames[markerId] 205 206 sizeLen, found := markerLen[markerId] 207 jpegLogger.Debugf(nil, "MARKER-ID=%x SIZELEN=%v FOUND=%v", markerId, sizeLen, found) 208 209 i++ 210 211 b := bytes.NewBuffer(data[i:]) 212 payloadLength := 0 213 214 // marker-ID + size => 2 + <dynamic> 215 headerSize := 2 + sizeLen 216 217 if found == false { 218 219 // It's not one of the static-length markers. Read the length. 220 // 221 // The length is an unsigned 16-bit network/big-endian. 222 223 // marker-ID + size => 2 + 2 224 headerSize = 2 + 2 225 226 if i+2 >= chunkLength { 227 jpegLogger.Debugf(nil, "Not enough (4)") 228 return 0, nil 229 } 230 231 l := uint16(0) 232 err = binary.Read(b, binary.BigEndian, &l) 233 log.PanicIf(err) 234 235 if l < 2 { 236 log.Panicf("length of size read for non-special marker (%02x) is unexpectedly less than two.", markerId) 237 } 238 239 // (l includes the bytes of the length itself.) 240 payloadLength = int(l) - 2 241 jpegLogger.Debugf(nil, "DataLength (dynamically-sized segment): (%d)", payloadLength) 242 243 i += 2 244 } else if sizeLen > 0 { 245 246 // Accommodates the non-zero markers in our marker index, which only 247 // represent J2C extensions. 248 // 249 // The length is an unsigned 32-bit network/big-endian. 250 251 // TODO(dustin): !! This needs to be tested, but we need an image. 252 253 if sizeLen != 4 { 254 log.Panicf("known non-zero marker is not four bytes, which is not currently handled: M=(%x)", markerId) 255 } 256 257 if i+4 >= chunkLength { 258 jpegLogger.Debugf(nil, "Not enough (5)") 259 return 0, nil 260 } 261 262 l := uint32(0) 263 err = binary.Read(b, binary.BigEndian, &l) 264 log.PanicIf(err) 265 266 payloadLength = int(l) - 4 267 jpegLogger.Debugf(nil, "DataLength (four-byte-length segment): (%u)", l) 268 269 i += 4 270 } 271 272 jpegLogger.Debugf(nil, "PAYLOAD-LENGTH: %d", payloadLength) 273 274 payload := data[i:] 275 276 if payloadLength < 0 { 277 log.Panicf("payload length less than zero: (%d)", payloadLength) 278 } 279 280 i += int(payloadLength) 281 282 if i > chunkLength { 283 jpegLogger.Debugf(nil, "Not enough (6)") 284 return 0, nil 285 } 286 287 jpegLogger.Debugf(nil, "Found whole segment.") 288 289 js.lastMarkerId = markerId 290 291 payloadWindow := payload[:payloadLength] 292 err = js.handleSegment(markerId, js.lastMarkerName, headerSize, payloadWindow) 293 log.PanicIf(err) 294 295 js.counter++ 296 297 jpegLogger.Debugf(nil, "Returning advance of (%d)", i) 298 299 return i, nil 300 } 301 302 func (js *JpegSplitter) scanDataIsNext() bool { 303 return js.lastMarkerId == MARKER_SOS 304 } 305 306 // Split is the base splitting function that satisfies `bufio.SplitFunc`. 307 func (js *JpegSplitter) Split(data []byte, atEOF bool) (advance int, token []byte, err error) { 308 defer func() { 309 if state := recover(); state != nil { 310 err = log.Wrap(state.(error)) 311 } 312 }() 313 314 for len(data) > 0 { 315 currentAdvance, err := js.readSegment(data) 316 if err != nil { 317 if err == io.EOF { 318 // We've encountered an EOI marker. 319 return 0, nil, err 320 } 321 322 log.Panic(err) 323 } 324 325 if currentAdvance == 0 { 326 if len(data) > 0 && atEOF == true { 327 // Provide a little context in the error message. 328 329 if js.scanDataIsNext() == true { 330 // Yes, we've ran into this. 331 332 log.Panicf("scan-data is unbounded; EOI not encountered before EOF") 333 } else { 334 log.Panicf("partial segment data encountered before scan-data") 335 } 336 } 337 338 // We don't have enough data for another segment. 339 break 340 } 341 342 data = data[currentAdvance:] 343 advance += currentAdvance 344 } 345 346 return advance, nil, nil 347 } 348 349 func (js *JpegSplitter) parseSof(data []byte) (sof *SofSegment, err error) { 350 defer func() { 351 if state := recover(); state != nil { 352 err = log.Wrap(state.(error)) 353 } 354 }() 355 356 stream := bytes.NewBuffer(data) 357 buffer := bufio.NewReader(stream) 358 359 bitsPerSample, err := buffer.ReadByte() 360 log.PanicIf(err) 361 362 height := uint16(0) 363 err = binary.Read(buffer, binary.BigEndian, &height) 364 log.PanicIf(err) 365 366 width := uint16(0) 367 err = binary.Read(buffer, binary.BigEndian, &width) 368 log.PanicIf(err) 369 370 componentCount, err := buffer.ReadByte() 371 log.PanicIf(err) 372 373 sof = &SofSegment{ 374 BitsPerSample: bitsPerSample, 375 Width: width, 376 Height: height, 377 ComponentCount: componentCount, 378 } 379 380 return sof, nil 381 } 382 383 func (js *JpegSplitter) parseAppData(markerId byte, data []byte) (err error) { 384 defer func() { 385 if state := recover(); state != nil { 386 err = log.Wrap(state.(error)) 387 } 388 }() 389 390 return nil 391 } 392 393 func (js *JpegSplitter) handleSegment(markerId byte, markerName string, headerSize int, payload []byte) (err error) { 394 defer func() { 395 if state := recover(); state != nil { 396 err = log.Wrap(state.(error)) 397 } 398 }() 399 400 cloned := make([]byte, len(payload)) 401 copy(cloned, payload) 402 403 s := &Segment{ 404 MarkerId: markerId, 405 MarkerName: markerName, 406 Offset: js.currentOffset, 407 Data: cloned, 408 } 409 410 jpegLogger.Debugf(nil, "Encountered marker (0x%02x) [%s] at offset (%d)", markerId, markerName, js.currentOffset) 411 412 js.currentOffset += headerSize + len(payload) 413 414 js.segments.Add(s) 415 416 sv, ok := js.visitor.(SegmentVisitor) 417 if ok == true { 418 err = sv.HandleSegment(js.lastMarkerId, js.lastMarkerName, js.counter, js.lastIsScanData) 419 log.PanicIf(err) 420 } 421 422 if markerId >= MARKER_SOF0 && markerId <= MARKER_SOF15 { 423 ssv, ok := js.visitor.(SofSegmentVisitor) 424 if ok == true { 425 sof, err := js.parseSof(payload) 426 log.PanicIf(err) 427 428 err = ssv.HandleSof(sof) 429 log.PanicIf(err) 430 } 431 } else if markerId >= MARKER_APP0 && markerId <= MARKER_APP15 { 432 err := js.parseAppData(markerId, payload) 433 log.PanicIf(err) 434 } 435 436 return nil 437 }