probe.go (16600B)
1 package mp4 2 3 import ( 4 "bytes" 5 "errors" 6 "io" 7 8 "github.com/abema/go-mp4/bitio" 9 ) 10 11 type ProbeInfo struct { 12 MajorBrand [4]byte 13 MinorVersion uint32 14 CompatibleBrands [][4]byte 15 FastStart bool 16 Timescale uint32 17 Duration uint64 18 Tracks Tracks 19 Segments Segments 20 } 21 22 // Deprecated: replace with ProbeInfo 23 type FraProbeInfo = ProbeInfo 24 25 type Tracks []*Track 26 27 // Deprecated: replace with Track 28 type TrackInfo = Track 29 30 type Track struct { 31 TrackID uint32 32 Timescale uint32 33 Duration uint64 34 Codec Codec 35 Encrypted bool 36 EditList EditList 37 Samples Samples 38 Chunks Chunks 39 AVC *AVCDecConfigInfo 40 MP4A *MP4AInfo 41 } 42 43 type Codec int 44 45 const ( 46 CodecUnknown Codec = iota 47 CodecAVC1 48 CodecMP4A 49 ) 50 51 type EditList []*EditListEntry 52 53 type EditListEntry struct { 54 MediaTime int64 55 SegmentDuration uint64 56 } 57 58 type Samples []*Sample 59 60 type Sample struct { 61 Size uint32 62 TimeDelta uint32 63 CompositionTimeOffset int64 64 } 65 66 type Chunks []*Chunk 67 68 type Chunk struct { 69 DataOffset uint32 70 SamplesPerChunk uint32 71 } 72 73 type AVCDecConfigInfo struct { 74 ConfigurationVersion uint8 75 Profile uint8 76 ProfileCompatibility uint8 77 Level uint8 78 LengthSize uint16 79 Width uint16 80 Height uint16 81 } 82 83 type MP4AInfo struct { 84 OTI uint8 85 AudOTI uint8 86 ChannelCount uint16 87 } 88 89 type Segments []*Segment 90 91 // Deprecated: replace with Segment 92 type SegmentInfo = Segment 93 94 type Segment struct { 95 TrackID uint32 96 MoofOffset uint64 97 BaseMediaDecodeTime uint64 98 DefaultSampleDuration uint32 99 SampleCount uint32 100 Duration uint32 101 CompositionTimeOffset int32 102 Size uint32 103 } 104 105 // Probe probes MP4 file 106 func Probe(r io.ReadSeeker) (*ProbeInfo, error) { 107 probeInfo := &ProbeInfo{ 108 Tracks: make([]*Track, 0, 8), 109 Segments: make([]*Segment, 0, 8), 110 } 111 bis, err := ExtractBoxes(r, nil, []BoxPath{ 112 {BoxTypeFtyp()}, 113 {BoxTypeMoov()}, 114 {BoxTypeMoov(), BoxTypeMvhd()}, 115 {BoxTypeMoov(), BoxTypeTrak()}, 116 {BoxTypeMoof()}, 117 {BoxTypeMdat()}, 118 }) 119 if err != nil { 120 return nil, err 121 } 122 var mdatAppeared bool 123 for _, bi := range bis { 124 switch bi.Type { 125 case BoxTypeFtyp(): 126 var ftyp Ftyp 127 if _, err := bi.SeekToPayload(r); err != nil { 128 return nil, err 129 } 130 if _, err := Unmarshal(r, bi.Size-bi.HeaderSize, &ftyp, bi.Context); err != nil { 131 return nil, err 132 } 133 probeInfo.MajorBrand = ftyp.MajorBrand 134 probeInfo.MinorVersion = ftyp.MinorVersion 135 probeInfo.CompatibleBrands = make([][4]byte, 0, len(ftyp.CompatibleBrands)) 136 for _, entry := range ftyp.CompatibleBrands { 137 probeInfo.CompatibleBrands = append(probeInfo.CompatibleBrands, entry.CompatibleBrand) 138 } 139 case BoxTypeMoov(): 140 probeInfo.FastStart = !mdatAppeared 141 case BoxTypeMvhd(): 142 var mvhd Mvhd 143 if _, err := bi.SeekToPayload(r); err != nil { 144 return nil, err 145 } 146 if _, err := Unmarshal(r, bi.Size-bi.HeaderSize, &mvhd, bi.Context); err != nil { 147 return nil, err 148 } 149 probeInfo.Timescale = mvhd.Timescale 150 if mvhd.GetVersion() == 0 { 151 probeInfo.Duration = uint64(mvhd.DurationV0) 152 } else { 153 probeInfo.Duration = mvhd.DurationV1 154 } 155 case BoxTypeTrak(): 156 track, err := probeTrak(r, bi) 157 if err != nil { 158 return nil, err 159 } 160 probeInfo.Tracks = append(probeInfo.Tracks, track) 161 case BoxTypeMoof(): 162 segment, err := probeMoof(r, bi) 163 if err != nil { 164 return nil, err 165 } 166 probeInfo.Segments = append(probeInfo.Segments, segment) 167 case BoxTypeMdat(): 168 mdatAppeared = true 169 } 170 } 171 return probeInfo, nil 172 } 173 174 // ProbeFra probes fragmented MP4 file 175 // Deprecated: replace with Probe 176 func ProbeFra(r io.ReadSeeker) (*FraProbeInfo, error) { 177 probeInfo, err := Probe(r) 178 return (*FraProbeInfo)(probeInfo), err 179 } 180 181 func probeTrak(r io.ReadSeeker, bi *BoxInfo) (*Track, error) { 182 track := new(Track) 183 184 bips, err := ExtractBoxesWithPayload(r, bi, []BoxPath{ 185 {BoxTypeTkhd()}, 186 {BoxTypeEdts(), BoxTypeElst()}, 187 {BoxTypeMdia(), BoxTypeMdhd()}, 188 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeAvc1()}, 189 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeAvc1(), BoxTypeAvcC()}, 190 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEncv()}, 191 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEncv(), BoxTypeAvcC()}, 192 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeMp4a()}, 193 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeMp4a(), BoxTypeEsds()}, 194 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeMp4a(), BoxTypeWave(), BoxTypeEsds()}, 195 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEnca()}, 196 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEnca(), BoxTypeEsds()}, 197 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStco()}, 198 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStts()}, 199 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeCtts()}, 200 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsc()}, 201 {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsz()}, 202 }) 203 if err != nil { 204 return nil, err 205 } 206 var tkhd *Tkhd 207 var elst *Elst 208 var mdhd *Mdhd 209 var avc1 *VisualSampleEntry 210 var avcC *AVCDecoderConfiguration 211 var audioSampleEntry *AudioSampleEntry 212 var esds *Esds 213 var stco *Stco 214 var stts *Stts 215 var stsc *Stsc 216 var ctts *Ctts 217 var stsz *Stsz 218 for _, bip := range bips { 219 switch bip.Info.Type { 220 case BoxTypeTkhd(): 221 tkhd = bip.Payload.(*Tkhd) 222 case BoxTypeElst(): 223 elst = bip.Payload.(*Elst) 224 case BoxTypeMdhd(): 225 mdhd = bip.Payload.(*Mdhd) 226 case BoxTypeAvc1(): 227 track.Codec = CodecAVC1 228 avc1 = bip.Payload.(*VisualSampleEntry) 229 case BoxTypeAvcC(): 230 avcC = bip.Payload.(*AVCDecoderConfiguration) 231 case BoxTypeEncv(): 232 track.Codec = CodecAVC1 233 track.Encrypted = true 234 case BoxTypeMp4a(): 235 track.Codec = CodecMP4A 236 audioSampleEntry = bip.Payload.(*AudioSampleEntry) 237 case BoxTypeEnca(): 238 track.Codec = CodecMP4A 239 track.Encrypted = true 240 audioSampleEntry = bip.Payload.(*AudioSampleEntry) 241 case BoxTypeEsds(): 242 esds = bip.Payload.(*Esds) 243 case BoxTypeStco(): 244 stco = bip.Payload.(*Stco) 245 case BoxTypeStts(): 246 stts = bip.Payload.(*Stts) 247 case BoxTypeStsc(): 248 stsc = bip.Payload.(*Stsc) 249 case BoxTypeCtts(): 250 ctts = bip.Payload.(*Ctts) 251 case BoxTypeStsz(): 252 stsz = bip.Payload.(*Stsz) 253 } 254 } 255 256 if tkhd == nil { 257 return nil, errors.New("tkhd box not found") 258 } 259 track.TrackID = tkhd.TrackID 260 261 if elst != nil { 262 editList := make([]*EditListEntry, 0, len(elst.Entries)) 263 for i := range elst.Entries { 264 editList = append(editList, &EditListEntry{ 265 MediaTime: elst.GetMediaTime(i), 266 SegmentDuration: elst.GetSegmentDuration(i), 267 }) 268 } 269 track.EditList = editList 270 } 271 272 if mdhd == nil { 273 return nil, errors.New("mdhd box not found") 274 } 275 track.Timescale = mdhd.Timescale 276 track.Duration = mdhd.GetDuration() 277 278 if avc1 != nil && avcC != nil { 279 track.AVC = &AVCDecConfigInfo{ 280 ConfigurationVersion: avcC.ConfigurationVersion, 281 Profile: avcC.Profile, 282 ProfileCompatibility: avcC.ProfileCompatibility, 283 Level: avcC.Level, 284 LengthSize: uint16(avcC.LengthSizeMinusOne) + 1, 285 Width: avc1.Width, 286 Height: avc1.Height, 287 } 288 } 289 290 if audioSampleEntry != nil && esds != nil { 291 oti, audOTI, err := detectAACProfile(esds) 292 if err != nil { 293 return nil, err 294 } 295 track.MP4A = &MP4AInfo{ 296 OTI: oti, 297 AudOTI: audOTI, 298 ChannelCount: audioSampleEntry.ChannelCount, 299 } 300 } 301 302 if stco == nil { 303 return nil, errors.New("stco box not found") 304 } 305 track.Chunks = make([]*Chunk, 0) 306 for _, offset := range stco.ChunkOffset { 307 track.Chunks = append(track.Chunks, &Chunk{ 308 DataOffset: offset, 309 }) 310 } 311 312 if stts == nil { 313 return nil, errors.New("stts box not found") 314 } 315 track.Samples = make([]*Sample, 0) 316 for _, entry := range stts.Entries { 317 for i := uint32(0); i < entry.SampleCount; i++ { 318 track.Samples = append(track.Samples, &Sample{ 319 TimeDelta: entry.SampleDelta, 320 }) 321 } 322 } 323 324 if stsc == nil { 325 return nil, errors.New("stsc box not found") 326 } 327 for si, entry := range stsc.Entries { 328 end := uint32(len(track.Chunks)) 329 if si != len(stsc.Entries)-1 && stsc.Entries[si+1].FirstChunk-1 < end { 330 end = stsc.Entries[si+1].FirstChunk - 1 331 } 332 for ci := entry.FirstChunk - 1; ci < end; ci++ { 333 track.Chunks[ci].SamplesPerChunk = entry.SamplesPerChunk 334 } 335 } 336 337 if ctts != nil { 338 var si uint32 339 for ci, entry := range ctts.Entries { 340 for i := uint32(0); i < entry.SampleCount; i++ { 341 if si >= uint32(len(track.Samples)) { 342 break 343 } 344 track.Samples[si].CompositionTimeOffset = ctts.GetSampleOffset(ci) 345 si++ 346 } 347 } 348 } 349 350 if stsz != nil { 351 for i := 0; i < len(stsz.EntrySize) && i < len(track.Samples); i++ { 352 track.Samples[i].Size = stsz.EntrySize[i] 353 } 354 } 355 356 return track, nil 357 } 358 359 func detectAACProfile(esds *Esds) (oti, audOTI uint8, err error) { 360 configDscr := findDescriptorByTag(esds.Descriptors, DecoderConfigDescrTag) 361 if configDscr == nil || configDscr.DecoderConfigDescriptor == nil { 362 return 0, 0, nil 363 } 364 if configDscr.DecoderConfigDescriptor.ObjectTypeIndication != 0x40 { 365 return configDscr.DecoderConfigDescriptor.ObjectTypeIndication, 0, nil 366 } 367 368 specificDscr := findDescriptorByTag(esds.Descriptors, DecSpecificInfoTag) 369 if specificDscr == nil { 370 return 0, 0, errors.New("DecoderSpecificationInfoDescriptor not found") 371 } 372 373 r := bitio.NewReader(bytes.NewReader(specificDscr.Data)) 374 remaining := len(specificDscr.Data) * 8 375 376 // audio object type 377 audioObjectType, read, err := getAudioObjectType(r) 378 if err != nil { 379 return 0, 0, err 380 } 381 remaining -= read 382 383 // sampling frequency index 384 samplingFrequencyIndex, err := r.ReadBits(4) 385 if err != nil { 386 return 0, 0, err 387 } 388 remaining -= 4 389 if samplingFrequencyIndex[0] == 0x0f { 390 if _, err = r.ReadBits(24); err != nil { 391 return 0, 0, err 392 } 393 remaining -= 24 394 } 395 396 if audioObjectType == 2 && remaining >= 20 { 397 if _, err = r.ReadBits(4); err != nil { 398 return 0, 0, err 399 } 400 remaining -= 4 401 syncExtensionType, err := r.ReadBits(11) 402 if err != nil { 403 return 0, 0, err 404 } 405 remaining -= 11 406 if syncExtensionType[0] == 0x2 && syncExtensionType[1] == 0xb7 { 407 extAudioObjectType, _, err := getAudioObjectType(r) 408 if err != nil { 409 return 0, 0, err 410 } 411 if extAudioObjectType == 5 || extAudioObjectType == 22 { 412 sbr, err := r.ReadBits(1) 413 if err != nil { 414 return 0, 0, err 415 } 416 remaining-- 417 if sbr[0] != 0 { 418 if extAudioObjectType == 5 { 419 sfi, err := r.ReadBits(4) 420 if err != nil { 421 return 0, 0, err 422 } 423 remaining -= 4 424 if sfi[0] == 0xf { 425 if _, err := r.ReadBits(24); err != nil { 426 return 0, 0, err 427 } 428 remaining -= 24 429 } 430 if remaining >= 12 { 431 syncExtensionType, err := r.ReadBits(11) 432 if err != nil { 433 return 0, 0, err 434 } 435 if syncExtensionType[0] == 0x5 && syncExtensionType[1] == 0x48 { 436 ps, err := r.ReadBits(1) 437 if err != nil { 438 return 0, 0, err 439 } 440 if ps[0] != 0 { 441 return 0x40, 29, nil 442 } 443 } 444 } 445 } 446 return 0x40, 5, nil 447 } 448 } 449 } 450 } 451 return 0x40, audioObjectType, nil 452 } 453 454 func findDescriptorByTag(dscrs []Descriptor, tag int8) *Descriptor { 455 for _, dscr := range dscrs { 456 if dscr.Tag == tag { 457 return &dscr 458 } 459 } 460 return nil 461 } 462 463 func getAudioObjectType(r bitio.Reader) (byte, int, error) { 464 audioObjectType, err := r.ReadBits(5) 465 if err != nil { 466 return 0, 0, err 467 } 468 if audioObjectType[0] != 0x1f { 469 return audioObjectType[0], 5, nil 470 } 471 audioObjectType, err = r.ReadBits(6) 472 if err != nil { 473 return 0, 0, err 474 } 475 return audioObjectType[0] + 32, 11, nil 476 } 477 478 func probeMoof(r io.ReadSeeker, bi *BoxInfo) (*Segment, error) { 479 bips, err := ExtractBoxesWithPayload(r, bi, []BoxPath{ 480 {BoxTypeTraf(), BoxTypeTfhd()}, 481 {BoxTypeTraf(), BoxTypeTfdt()}, 482 {BoxTypeTraf(), BoxTypeTrun()}, 483 }) 484 if err != nil { 485 return nil, err 486 } 487 488 var tfhd *Tfhd 489 var tfdt *Tfdt 490 var trun *Trun 491 492 segment := &Segment{ 493 MoofOffset: bi.Offset, 494 } 495 for _, bip := range bips { 496 switch bip.Info.Type { 497 case BoxTypeTfhd(): 498 tfhd = bip.Payload.(*Tfhd) 499 case BoxTypeTfdt(): 500 tfdt = bip.Payload.(*Tfdt) 501 case BoxTypeTrun(): 502 trun = bip.Payload.(*Trun) 503 } 504 } 505 506 if tfhd == nil { 507 return nil, errors.New("tfhd not found") 508 } 509 segment.TrackID = tfhd.TrackID 510 segment.DefaultSampleDuration = tfhd.DefaultSampleDuration 511 512 if tfdt != nil { 513 if tfdt.Version == 0 { 514 segment.BaseMediaDecodeTime = uint64(tfdt.BaseMediaDecodeTimeV0) 515 } else { 516 segment.BaseMediaDecodeTime = tfdt.BaseMediaDecodeTimeV1 517 } 518 } 519 520 if trun != nil { 521 segment.SampleCount = trun.SampleCount 522 523 if trun.CheckFlag(0x000100) { 524 segment.Duration = 0 525 for ei := range trun.Entries { 526 segment.Duration += trun.Entries[ei].SampleDuration 527 } 528 } else { 529 segment.Duration = tfhd.DefaultSampleDuration * segment.SampleCount 530 } 531 532 if trun.CheckFlag(0x000200) { 533 segment.Size = 0 534 for ei := range trun.Entries { 535 segment.Size += trun.Entries[ei].SampleSize 536 } 537 } else { 538 segment.Size = tfhd.DefaultSampleSize * segment.SampleCount 539 } 540 541 var duration uint32 542 for ei := range trun.Entries { 543 offset := int32(duration) + int32(trun.GetSampleCompositionTimeOffset(ei)) 544 if ei == 0 || offset < segment.CompositionTimeOffset { 545 segment.CompositionTimeOffset = offset 546 } 547 if trun.CheckFlag(0x000100) { 548 duration += trun.Entries[ei].SampleDuration 549 } else { 550 duration += tfhd.DefaultSampleDuration 551 } 552 } 553 } 554 555 return segment, nil 556 } 557 558 func FindIDRFrames(r io.ReadSeeker, trackInfo *TrackInfo) ([]int, error) { 559 if trackInfo.AVC == nil { 560 return nil, nil 561 } 562 lengthSize := uint32(trackInfo.AVC.LengthSize) 563 564 var si int 565 idxs := make([]int, 0, 8) 566 for _, chunk := range trackInfo.Chunks { 567 end := si + int(chunk.SamplesPerChunk) 568 dataOffset := chunk.DataOffset 569 for ; si < end && si < len(trackInfo.Samples); si++ { 570 sample := trackInfo.Samples[si] 571 if sample.Size == 0 { 572 continue 573 } 574 for nalOffset := uint32(0); nalOffset+lengthSize+1 <= sample.Size; { 575 if _, err := r.Seek(int64(dataOffset+nalOffset), io.SeekStart); err != nil { 576 return nil, err 577 } 578 data := make([]byte, lengthSize+1) 579 if _, err := io.ReadFull(r, data); err != nil { 580 return nil, err 581 } 582 var length uint32 583 for i := 0; i < int(lengthSize); i++ { 584 length = (length << 8) + uint32(data[i]) 585 } 586 nalHeader := data[lengthSize] 587 nalType := nalHeader & 0x1f 588 if nalType == 5 { 589 idxs = append(idxs, si) 590 break 591 } 592 nalOffset += lengthSize + length 593 } 594 dataOffset += sample.Size 595 } 596 } 597 return idxs, nil 598 } 599 600 func (samples Samples) GetBitrate(timescale uint32) uint64 { 601 var totalSize uint64 602 var totalDuration uint64 603 for _, sample := range samples { 604 totalSize += uint64(sample.Size) 605 totalDuration += uint64(sample.TimeDelta) 606 } 607 if totalDuration == 0 { 608 return 0 609 } 610 return 8 * totalSize * uint64(timescale) / totalDuration 611 } 612 613 func (samples Samples) GetMaxBitrate(timescale uint32, timeDelta uint64) uint64 { 614 if timeDelta == 0 { 615 return 0 616 } 617 var maxBitrate uint64 618 var size uint64 619 var duration uint64 620 var begin int 621 var end int 622 for end < len(samples) { 623 for { 624 size += uint64(samples[end].Size) 625 duration += uint64(samples[end].TimeDelta) 626 end++ 627 if duration >= timeDelta || end == len(samples) { 628 break 629 } 630 } 631 bitrate := 8 * size * uint64(timescale) / duration 632 if bitrate > maxBitrate { 633 maxBitrate = bitrate 634 } 635 for { 636 size -= uint64(samples[begin].Size) 637 duration -= uint64(samples[begin].TimeDelta) 638 begin++ 639 if duration < timeDelta { 640 break 641 } 642 } 643 } 644 return maxBitrate 645 } 646 647 func (segments Segments) GetBitrate(trackID uint32, timescale uint32) uint64 { 648 var totalSize uint64 649 var totalDuration uint64 650 for _, segment := range segments { 651 if segment.TrackID == trackID { 652 totalSize += uint64(segment.Size) 653 totalDuration += uint64(segment.Duration) 654 } 655 } 656 if totalDuration == 0 { 657 return 0 658 } 659 return 8 * totalSize * uint64(timescale) / totalDuration 660 } 661 662 func (segments Segments) GetMaxBitrate(trackID uint32, timescale uint32) uint64 { 663 var maxBitrate uint64 664 for _, segment := range segments { 665 if segment.TrackID == trackID && segment.Duration != 0 { 666 bitrate := 8 * uint64(segment.Size) * uint64(timescale) / uint64(segment.Duration) 667 if bitrate > maxBitrate { 668 maxBitrate = bitrate 669 } 670 } 671 } 672 return maxBitrate 673 }