exif.go (8533B)
1 package exif 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "os" 10 11 "encoding/binary" 12 "io/ioutil" 13 14 "github.com/dsoprea/go-logging" 15 16 "github.com/dsoprea/go-exif/v3/common" 17 ) 18 19 const ( 20 // ExifAddressableAreaStart is the absolute offset in the file that all 21 // offsets are relative to. 22 ExifAddressableAreaStart = uint32(0x0) 23 24 // ExifDefaultFirstIfdOffset is essentially the number of bytes in addition 25 // to `ExifAddressableAreaStart` that you have to move in order to escape 26 // the rest of the header and get to the earliest point where we can put 27 // stuff (which has to be the first IFD). This is the size of the header 28 // sequence containing the two-character byte-order, two-character fixed- 29 // bytes, and the four bytes describing the first-IFD offset. 30 ExifDefaultFirstIfdOffset = uint32(2 + 2 + 4) 31 ) 32 33 const ( 34 // ExifSignatureLength is the number of bytes in the EXIF signature (which 35 // customarily includes the first IFD offset). 36 ExifSignatureLength = 8 37 ) 38 39 var ( 40 exifLogger = log.NewLogger("exif.exif") 41 42 ExifBigEndianSignature = [4]byte{'M', 'M', 0x00, 0x2a} 43 ExifLittleEndianSignature = [4]byte{'I', 'I', 0x2a, 0x00} 44 ) 45 46 var ( 47 ErrNoExif = errors.New("no exif data") 48 ErrExifHeaderError = errors.New("exif header error") 49 ) 50 51 // SearchAndExtractExif searches for an EXIF blob in the byte-slice. 52 func SearchAndExtractExif(data []byte) (rawExif []byte, err error) { 53 defer func() { 54 if state := recover(); state != nil { 55 err = log.Wrap(state.(error)) 56 } 57 }() 58 59 b := bytes.NewBuffer(data) 60 61 rawExif, err = SearchAndExtractExifWithReader(b) 62 if err != nil { 63 if err == ErrNoExif { 64 return nil, err 65 } 66 67 log.Panic(err) 68 } 69 70 return rawExif, nil 71 } 72 73 // SearchAndExtractExifN searches for an EXIF blob in the byte-slice, but skips 74 // the given number of EXIF blocks first. This is a forensics tool that helps 75 // identify multiple EXIF blocks in a file. 76 func SearchAndExtractExifN(data []byte, n int) (rawExif []byte, err error) { 77 defer func() { 78 if state := recover(); state != nil { 79 err = log.Wrap(state.(error)) 80 } 81 }() 82 83 skips := 0 84 totalDiscarded := 0 85 for { 86 b := bytes.NewBuffer(data) 87 88 var discarded int 89 90 rawExif, discarded, err = searchAndExtractExifWithReaderWithDiscarded(b) 91 if err != nil { 92 if err == ErrNoExif { 93 return nil, err 94 } 95 96 log.Panic(err) 97 } 98 99 exifLogger.Debugf(nil, "Read EXIF block (%d).", skips) 100 101 totalDiscarded += discarded 102 103 if skips >= n { 104 exifLogger.Debugf(nil, "Reached requested EXIF block (%d).", n) 105 break 106 } 107 108 nextOffset := discarded + 1 109 exifLogger.Debugf(nil, "Skipping EXIF block (%d) by seeking to position (%d).", skips, nextOffset) 110 111 data = data[nextOffset:] 112 skips++ 113 } 114 115 exifLogger.Debugf(nil, "Found EXIF blob (%d) bytes from initial position.", totalDiscarded) 116 return rawExif, nil 117 } 118 119 // searchAndExtractExifWithReaderWithDiscarded searches for an EXIF blob using 120 // an `io.Reader`. We can't know how much long the EXIF data is without parsing 121 // it, so this will likely grab up a lot of the image-data, too. 122 // 123 // This function returned the count of preceding bytes. 124 func searchAndExtractExifWithReaderWithDiscarded(r io.Reader) (rawExif []byte, discarded int, err error) { 125 defer func() { 126 if state := recover(); state != nil { 127 err = log.Wrap(state.(error)) 128 } 129 }() 130 131 // Search for the beginning of the EXIF information. The EXIF is near the 132 // beginning of most JPEGs, so this likely doesn't have a high cost (at 133 // least, again, with JPEGs). 134 135 br := bufio.NewReader(r) 136 137 for { 138 window, err := br.Peek(ExifSignatureLength) 139 if err != nil { 140 if err == io.EOF { 141 return nil, 0, ErrNoExif 142 } 143 144 log.Panic(err) 145 } 146 147 _, err = ParseExifHeader(window) 148 if err != nil { 149 if log.Is(err, ErrNoExif) == true { 150 // No EXIF. Move forward by one byte. 151 152 _, err := br.Discard(1) 153 log.PanicIf(err) 154 155 discarded++ 156 157 continue 158 } 159 160 // Some other error. 161 log.Panic(err) 162 } 163 164 break 165 } 166 167 exifLogger.Debugf(nil, "Found EXIF blob (%d) bytes from initial position.", discarded) 168 169 rawExif, err = ioutil.ReadAll(br) 170 log.PanicIf(err) 171 172 return rawExif, discarded, nil 173 } 174 175 // RELEASE(dustin): We should replace the implementation of SearchAndExtractExifWithReader with searchAndExtractExifWithReaderWithDiscarded and drop the latter. 176 177 // SearchAndExtractExifWithReader searches for an EXIF blob using an 178 // `io.Reader`. We can't know how much long the EXIF data is without parsing it, 179 // so this will likely grab up a lot of the image-data, too. 180 func SearchAndExtractExifWithReader(r io.Reader) (rawExif []byte, err error) { 181 defer func() { 182 if state := recover(); state != nil { 183 err = log.Wrap(state.(error)) 184 } 185 }() 186 187 rawExif, _, err = searchAndExtractExifWithReaderWithDiscarded(r) 188 if err != nil { 189 if err == ErrNoExif { 190 return nil, err 191 } 192 193 log.Panic(err) 194 } 195 196 return rawExif, nil 197 } 198 199 // SearchFileAndExtractExif returns a slice from the beginning of the EXIF data 200 // to the end of the file (it's not practical to try and calculate where the 201 // data actually ends). 202 func SearchFileAndExtractExif(filepath string) (rawExif []byte, err error) { 203 defer func() { 204 if state := recover(); state != nil { 205 err = log.Wrap(state.(error)) 206 } 207 }() 208 209 // Open the file. 210 211 f, err := os.Open(filepath) 212 log.PanicIf(err) 213 214 defer f.Close() 215 216 rawExif, err = SearchAndExtractExifWithReader(f) 217 log.PanicIf(err) 218 219 return rawExif, nil 220 } 221 222 type ExifHeader struct { 223 ByteOrder binary.ByteOrder 224 FirstIfdOffset uint32 225 } 226 227 func (eh ExifHeader) String() string { 228 return fmt.Sprintf("ExifHeader<BYTE-ORDER=[%v] FIRST-IFD-OFFSET=(0x%02x)>", eh.ByteOrder, eh.FirstIfdOffset) 229 } 230 231 // ParseExifHeader parses the bytes at the very top of the header. 232 // 233 // This will panic with ErrNoExif on any data errors so that we can double as 234 // an EXIF-detection routine. 235 func ParseExifHeader(data []byte) (eh ExifHeader, err error) { 236 defer func() { 237 if state := recover(); state != nil { 238 err = log.Wrap(state.(error)) 239 } 240 }() 241 242 // Good reference: 243 // 244 // CIPA DC-008-2016; JEITA CP-3451D 245 // -> http://www.cipa.jp/std/documents/e/DC-008-Translation-2016-E.pdf 246 247 if len(data) < ExifSignatureLength { 248 exifLogger.Warningf(nil, "Not enough data for EXIF header: (%d)", len(data)) 249 return eh, ErrNoExif 250 } 251 252 if bytes.Equal(data[:4], ExifBigEndianSignature[:]) == true { 253 exifLogger.Debugf(nil, "Byte-order is big-endian.") 254 eh.ByteOrder = binary.BigEndian 255 } else if bytes.Equal(data[:4], ExifLittleEndianSignature[:]) == true { 256 eh.ByteOrder = binary.LittleEndian 257 exifLogger.Debugf(nil, "Byte-order is little-endian.") 258 } else { 259 return eh, ErrNoExif 260 } 261 262 eh.FirstIfdOffset = eh.ByteOrder.Uint32(data[4:8]) 263 264 return eh, nil 265 } 266 267 // Visit recursively invokes a callback for every tag. 268 func Visit(rootIfdIdentity *exifcommon.IfdIdentity, ifdMapping *exifcommon.IfdMapping, tagIndex *TagIndex, exifData []byte, visitor TagVisitorFn, so *ScanOptions) (eh ExifHeader, furthestOffset uint32, err error) { 269 defer func() { 270 if state := recover(); state != nil { 271 err = log.Wrap(state.(error)) 272 } 273 }() 274 275 eh, err = ParseExifHeader(exifData) 276 log.PanicIf(err) 277 278 ebs := NewExifReadSeekerWithBytes(exifData) 279 ie := NewIfdEnumerate(ifdMapping, tagIndex, ebs, eh.ByteOrder) 280 281 _, err = ie.Scan(rootIfdIdentity, eh.FirstIfdOffset, visitor, so) 282 log.PanicIf(err) 283 284 furthestOffset = ie.FurthestOffset() 285 286 return eh, furthestOffset, nil 287 } 288 289 // Collect recursively builds a static structure of all IFDs and tags. 290 func Collect(ifdMapping *exifcommon.IfdMapping, tagIndex *TagIndex, exifData []byte) (eh ExifHeader, index IfdIndex, err error) { 291 defer func() { 292 if state := recover(); state != nil { 293 err = log.Wrap(state.(error)) 294 } 295 }() 296 297 eh, err = ParseExifHeader(exifData) 298 log.PanicIf(err) 299 300 ebs := NewExifReadSeekerWithBytes(exifData) 301 ie := NewIfdEnumerate(ifdMapping, tagIndex, ebs, eh.ByteOrder) 302 303 index, err = ie.Collect(eh.FirstIfdOffset) 304 log.PanicIf(err) 305 306 return eh, index, nil 307 } 308 309 // BuildExifHeader constructs the bytes that go at the front of the stream. 310 func BuildExifHeader(byteOrder binary.ByteOrder, firstIfdOffset uint32) (headerBytes []byte, err error) { 311 defer func() { 312 if state := recover(); state != nil { 313 err = log.Wrap(state.(error)) 314 } 315 }() 316 317 b := new(bytes.Buffer) 318 319 var signatureBytes []byte 320 if byteOrder == binary.BigEndian { 321 signatureBytes = ExifBigEndianSignature[:] 322 } else { 323 signatureBytes = ExifLittleEndianSignature[:] 324 } 325 326 _, err = b.Write(signatureBytes) 327 log.PanicIf(err) 328 329 err = binary.Write(b, byteOrder, firstIfdOffset) 330 log.PanicIf(err) 331 332 return b.Bytes(), nil 333 }