489 lines
15 KiB
C#
489 lines
15 KiB
C#
using System.IO;
|
|
|
|
namespace Vav1Player.Container;
|
|
|
|
public class Mp4TrackInfo
|
|
{
|
|
public uint TrackId { get; set; }
|
|
public string? CodecType { get; set; }
|
|
public uint Width { get; set; }
|
|
public uint Height { get; set; }
|
|
public double Duration { get; set; }
|
|
public uint TimeScale { get; set; }
|
|
public List<Mp4Sample> Samples { get; set; } = new List<Mp4Sample>();
|
|
public byte[]? Av1ConfigurationRecord { get; set; }
|
|
}
|
|
|
|
public struct Mp4Sample
|
|
{
|
|
public long Offset { get; set; }
|
|
public uint Size { get; set; }
|
|
public uint Duration { get; set; }
|
|
public bool IsKeyFrame { get; set; }
|
|
}
|
|
|
|
public class Mp4Parser
|
|
{
|
|
private readonly byte[] _fileData;
|
|
private int _position;
|
|
|
|
public Mp4Parser(byte[] fileData)
|
|
{
|
|
_fileData = fileData;
|
|
_position = 0;
|
|
}
|
|
|
|
public List<Mp4TrackInfo> Parse()
|
|
{
|
|
var tracks = new List<Mp4TrackInfo>();
|
|
|
|
while (_position < _fileData.Length)
|
|
{
|
|
var box = ReadBox();
|
|
if (box.Type == "moov")
|
|
{
|
|
ParseMovieBox(box, tracks);
|
|
}
|
|
else if (box.Type == "mdat")
|
|
{
|
|
// Media data box - skip for now, we'll use offsets to read samples
|
|
_position += (int)box.Size - 8;
|
|
}
|
|
else
|
|
{
|
|
// Skip unknown boxes
|
|
_position += (int)box.Size - 8;
|
|
}
|
|
}
|
|
|
|
return tracks;
|
|
}
|
|
|
|
private Mp4Box ReadBox()
|
|
{
|
|
if (_position + 8 > _fileData.Length)
|
|
throw new EndOfStreamException("Insufficient data for box header");
|
|
|
|
var span = _fileData.AsSpan(_position);
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(span, 0);
|
|
string type = Mp4Reader.ReadFourCC(span, 4);
|
|
|
|
if (size == 1)
|
|
{
|
|
// Extended size
|
|
if (_position + 16 > _fileData.Length)
|
|
throw new EndOfStreamException("Insufficient data for extended box header");
|
|
|
|
ulong extendedSize = Mp4Reader.ReadUInt64BigEndian(span, 8);
|
|
size = (uint)Math.Min(extendedSize, uint.MaxValue);
|
|
_position += 16;
|
|
}
|
|
else
|
|
{
|
|
_position += 8;
|
|
}
|
|
|
|
int dataSize = (int)size - 8;
|
|
byte[] data = new byte[dataSize];
|
|
|
|
if (_position + dataSize <= _fileData.Length)
|
|
{
|
|
Array.Copy(_fileData, _position, data, 0, dataSize);
|
|
}
|
|
|
|
return new Mp4Box(size, type, data, _position - 8);
|
|
}
|
|
|
|
private void ParseMovieBox(Mp4Box movieBox, List<Mp4TrackInfo> tracks)
|
|
{
|
|
int pos = 0;
|
|
var data = movieBox.Data;
|
|
|
|
while (pos < data.Length)
|
|
{
|
|
if (pos + 8 > data.Length) break;
|
|
|
|
var span = data.AsSpan(pos);
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(span, 0);
|
|
string type = Mp4Reader.ReadFourCC(span, 4);
|
|
|
|
if (size < 8) break;
|
|
|
|
if (type == "trak")
|
|
{
|
|
var trackData = data.AsSpan(pos + 8, (int)size - 8);
|
|
var track = ParseTrackBox(trackData);
|
|
if (track != null && track.CodecType == "av01")
|
|
{
|
|
tracks.Add(track);
|
|
}
|
|
}
|
|
|
|
pos += (int)size;
|
|
}
|
|
}
|
|
|
|
private Mp4TrackInfo? ParseTrackBox(ReadOnlySpan<byte> trackData)
|
|
{
|
|
var track = new Mp4TrackInfo();
|
|
int pos = 0;
|
|
|
|
while (pos < trackData.Length)
|
|
{
|
|
if (pos + 8 > trackData.Length) break;
|
|
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(trackData, pos);
|
|
string type = Mp4Reader.ReadFourCC(trackData, pos + 4);
|
|
|
|
if (size < 8) break;
|
|
|
|
if (type == "mdia")
|
|
{
|
|
var mediaData = trackData.Slice(pos + 8, (int)size - 8);
|
|
ParseMediaBox(mediaData, track);
|
|
}
|
|
else if (type == "tkhd")
|
|
{
|
|
var tkhdData = trackData.Slice(pos + 8, (int)size - 8);
|
|
ParseTrackHeaderBox(tkhdData, track);
|
|
}
|
|
|
|
pos += (int)size;
|
|
}
|
|
|
|
return track.CodecType == "av01" ? track : null;
|
|
}
|
|
|
|
private void ParseMediaBox(ReadOnlySpan<byte> mediaData, Mp4TrackInfo track)
|
|
{
|
|
int pos = 0;
|
|
|
|
while (pos < mediaData.Length)
|
|
{
|
|
if (pos + 8 > mediaData.Length) break;
|
|
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(mediaData, pos);
|
|
string type = Mp4Reader.ReadFourCC(mediaData, pos + 4);
|
|
|
|
if (size < 8) break;
|
|
|
|
if (type == "mdhd")
|
|
{
|
|
var mdhdData = mediaData.Slice(pos + 8, (int)size - 8);
|
|
ParseMediaHeaderBox(mdhdData, track);
|
|
}
|
|
else if (type == "minf")
|
|
{
|
|
var minfData = mediaData.Slice(pos + 8, (int)size - 8);
|
|
ParseMediaInfoBox(minfData, track);
|
|
}
|
|
|
|
pos += (int)size;
|
|
}
|
|
}
|
|
|
|
private void ParseMediaHeaderBox(ReadOnlySpan<byte> mdhdData, Mp4TrackInfo track)
|
|
{
|
|
if (mdhdData.Length < 20) return;
|
|
|
|
byte version = mdhdData[0];
|
|
if (version == 1)
|
|
{
|
|
if (mdhdData.Length < 32) return;
|
|
track.TimeScale = Mp4Reader.ReadUInt32BigEndian(mdhdData, 20);
|
|
ulong duration = Mp4Reader.ReadUInt64BigEndian(mdhdData, 24);
|
|
track.Duration = (double)duration / track.TimeScale;
|
|
}
|
|
else
|
|
{
|
|
track.TimeScale = Mp4Reader.ReadUInt32BigEndian(mdhdData, 12);
|
|
uint duration = Mp4Reader.ReadUInt32BigEndian(mdhdData, 16);
|
|
track.Duration = (double)duration / track.TimeScale;
|
|
}
|
|
}
|
|
|
|
private void ParseTrackHeaderBox(ReadOnlySpan<byte> tkhdData, Mp4TrackInfo track)
|
|
{
|
|
if (tkhdData.Length < 20) return;
|
|
|
|
byte version = tkhdData[0];
|
|
if (version == 1)
|
|
{
|
|
if (tkhdData.Length < 92) return;
|
|
track.TrackId = Mp4Reader.ReadUInt32BigEndian(tkhdData, 12);
|
|
// Width and height are at fixed point 16.16 format
|
|
track.Width = Mp4Reader.ReadUInt32BigEndian(tkhdData, 84) >> 16;
|
|
track.Height = Mp4Reader.ReadUInt32BigEndian(tkhdData, 88) >> 16;
|
|
}
|
|
else
|
|
{
|
|
if (tkhdData.Length < 80) return;
|
|
track.TrackId = Mp4Reader.ReadUInt32BigEndian(tkhdData, 8);
|
|
track.Width = Mp4Reader.ReadUInt32BigEndian(tkhdData, 72) >> 16;
|
|
track.Height = Mp4Reader.ReadUInt32BigEndian(tkhdData, 76) >> 16;
|
|
}
|
|
}
|
|
|
|
private void ParseMediaInfoBox(ReadOnlySpan<byte> minfData, Mp4TrackInfo track)
|
|
{
|
|
int pos = 0;
|
|
|
|
while (pos < minfData.Length)
|
|
{
|
|
if (pos + 8 > minfData.Length) break;
|
|
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(minfData, pos);
|
|
string type = Mp4Reader.ReadFourCC(minfData, pos + 4);
|
|
|
|
if (size < 8) break;
|
|
|
|
if (type == "stbl")
|
|
{
|
|
var stblData = minfData.Slice(pos + 8, (int)size - 8);
|
|
ParseSampleTableBox(stblData, track);
|
|
}
|
|
|
|
pos += (int)size;
|
|
}
|
|
}
|
|
|
|
private void ParseSampleTableBox(ReadOnlySpan<byte> stblData, Mp4TrackInfo track)
|
|
{
|
|
int pos = 0;
|
|
var chunkOffsets = new List<long>();
|
|
var sampleSizes = new List<uint>();
|
|
var samplesPerChunk = new List<(uint firstChunk, uint samplesPerChunk)>();
|
|
|
|
while (pos < stblData.Length)
|
|
{
|
|
if (pos + 8 > stblData.Length) break;
|
|
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(stblData, pos);
|
|
string type = Mp4Reader.ReadFourCC(stblData, pos + 4);
|
|
|
|
if (size < 8) break;
|
|
|
|
var boxData = stblData.Slice(pos + 8, (int)size - 8);
|
|
|
|
switch (type)
|
|
{
|
|
case "stsd":
|
|
ParseSampleDescriptionBox(boxData, track);
|
|
break;
|
|
case "stco":
|
|
chunkOffsets.AddRange(ParseChunkOffsetBox(boxData));
|
|
break;
|
|
case "co64":
|
|
chunkOffsets.AddRange(ParseChunkOffset64Box(boxData));
|
|
break;
|
|
case "stsz":
|
|
sampleSizes.AddRange(ParseSampleSizeBox(boxData));
|
|
break;
|
|
case "stsc":
|
|
samplesPerChunk.AddRange(ParseSampleToChunkBox(boxData));
|
|
break;
|
|
}
|
|
|
|
pos += (int)size;
|
|
}
|
|
|
|
// Build sample list from parsed tables
|
|
System.Diagnostics.Debug.WriteLine($"[MP4_PARSER] Building sample list: {chunkOffsets.Count} chunks, {sampleSizes.Count} sizes, {samplesPerChunk.Count} sample-to-chunk entries");
|
|
BuildSampleList(track, chunkOffsets, sampleSizes, samplesPerChunk);
|
|
}
|
|
|
|
private void ParseSampleDescriptionBox(ReadOnlySpan<byte> stsdData, Mp4TrackInfo track)
|
|
{
|
|
if (stsdData.Length < 8) return;
|
|
|
|
uint entryCount = Mp4Reader.ReadUInt32BigEndian(stsdData, 4);
|
|
if (entryCount == 0) return;
|
|
|
|
int pos = 8;
|
|
if (pos + 8 > stsdData.Length) return;
|
|
|
|
uint entrySize = Mp4Reader.ReadUInt32BigEndian(stsdData, pos);
|
|
string codecType = Mp4Reader.ReadFourCC(stsdData, pos + 4);
|
|
|
|
track.CodecType = codecType;
|
|
System.Diagnostics.Debug.WriteLine($"[MP4_PARSER] Found codec: {codecType}, entry count: {entryCount}, entry size: {entrySize}");
|
|
|
|
// For AV1, look for av1C configuration box within the sample entry
|
|
if (codecType == "av01")
|
|
{
|
|
ParseAv1SampleEntry(stsdData.Slice(pos, (int)entrySize), track);
|
|
}
|
|
}
|
|
|
|
private void ParseAv1SampleEntry(ReadOnlySpan<byte> av01Data, Mp4TrackInfo track)
|
|
{
|
|
// Skip the fixed part of the VisualSampleEntry (78 bytes total)
|
|
// 4 bytes size + 4 bytes type + 6 bytes reserved + 2 bytes data_reference_index
|
|
// + 16 bytes predefined/reserved + 4 bytes width + 4 bytes height + 36 bytes other fields
|
|
int pos = 86; // Skip to the end of VisualSampleEntry structure
|
|
|
|
// Look for av1C box
|
|
while (pos + 8 <= av01Data.Length)
|
|
{
|
|
if (pos + 8 > av01Data.Length) break;
|
|
|
|
uint boxSize = Mp4Reader.ReadUInt32BigEndian(av01Data, pos);
|
|
string boxType = Mp4Reader.ReadFourCC(av01Data, pos + 4);
|
|
|
|
if (boxSize < 8) break;
|
|
|
|
if (boxType == "av1C")
|
|
{
|
|
// Extract av1C configuration data (skip 8-byte header)
|
|
int configSize = (int)boxSize - 8;
|
|
if (pos + 8 + configSize <= av01Data.Length)
|
|
{
|
|
track.Av1ConfigurationRecord = av01Data.Slice(pos + 8, configSize).ToArray();
|
|
System.Diagnostics.Debug.WriteLine($"[MP4_PARSER] Extracted av1C configuration: {configSize} bytes");
|
|
}
|
|
break;
|
|
}
|
|
|
|
pos += (int)boxSize;
|
|
}
|
|
}
|
|
|
|
private List<long> ParseChunkOffsetBox(ReadOnlySpan<byte> stcoData)
|
|
{
|
|
var offsets = new List<long>();
|
|
if (stcoData.Length < 8) return offsets;
|
|
|
|
uint entryCount = Mp4Reader.ReadUInt32BigEndian(stcoData, 4);
|
|
int pos = 8;
|
|
|
|
for (uint i = 0; i < entryCount && pos + 4 <= stcoData.Length; i++)
|
|
{
|
|
uint offset = Mp4Reader.ReadUInt32BigEndian(stcoData, pos);
|
|
offsets.Add(offset);
|
|
pos += 4;
|
|
}
|
|
|
|
return offsets;
|
|
}
|
|
|
|
private List<long> ParseChunkOffset64Box(ReadOnlySpan<byte> co64Data)
|
|
{
|
|
var offsets = new List<long>();
|
|
if (co64Data.Length < 8) return offsets;
|
|
|
|
uint entryCount = Mp4Reader.ReadUInt32BigEndian(co64Data, 4);
|
|
int pos = 8;
|
|
|
|
for (uint i = 0; i < entryCount && pos + 8 <= co64Data.Length; i++)
|
|
{
|
|
ulong offset = Mp4Reader.ReadUInt64BigEndian(co64Data, pos);
|
|
offsets.Add((long)offset);
|
|
pos += 8;
|
|
}
|
|
|
|
return offsets;
|
|
}
|
|
|
|
private List<uint> ParseSampleSizeBox(ReadOnlySpan<byte> stszData)
|
|
{
|
|
var sizes = new List<uint>();
|
|
if (stszData.Length < 12) return sizes;
|
|
|
|
uint sampleSize = Mp4Reader.ReadUInt32BigEndian(stszData, 4);
|
|
uint sampleCount = Mp4Reader.ReadUInt32BigEndian(stszData, 8);
|
|
|
|
if (sampleSize != 0)
|
|
{
|
|
// All samples have the same size
|
|
for (uint i = 0; i < sampleCount; i++)
|
|
{
|
|
sizes.Add(sampleSize);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Each sample has individual size
|
|
int pos = 12;
|
|
for (uint i = 0; i < sampleCount && pos + 4 <= stszData.Length; i++)
|
|
{
|
|
uint size = Mp4Reader.ReadUInt32BigEndian(stszData, pos);
|
|
sizes.Add(size);
|
|
pos += 4;
|
|
}
|
|
}
|
|
|
|
return sizes;
|
|
}
|
|
|
|
private List<(uint firstChunk, uint samplesPerChunk)> ParseSampleToChunkBox(ReadOnlySpan<byte> stscData)
|
|
{
|
|
var entries = new List<(uint firstChunk, uint samplesPerChunk)>();
|
|
if (stscData.Length < 8) return entries;
|
|
|
|
uint entryCount = Mp4Reader.ReadUInt32BigEndian(stscData, 4);
|
|
int pos = 8;
|
|
|
|
for (uint i = 0; i < entryCount && pos + 12 <= stscData.Length; i++)
|
|
{
|
|
uint firstChunk = Mp4Reader.ReadUInt32BigEndian(stscData, pos);
|
|
uint samplesPerChunk = Mp4Reader.ReadUInt32BigEndian(stscData, pos + 4);
|
|
// Skip sample description index at pos + 8
|
|
entries.Add((firstChunk, samplesPerChunk));
|
|
pos += 12;
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
private void BuildSampleList(Mp4TrackInfo track, List<long> chunkOffsets, List<uint> sampleSizes,
|
|
List<(uint firstChunk, uint samplesPerChunk)> samplesPerChunk)
|
|
{
|
|
if (chunkOffsets.Count == 0 || sampleSizes.Count == 0 || samplesPerChunk.Count == 0)
|
|
return;
|
|
|
|
int sampleIndex = 0;
|
|
|
|
for (int chunkIndex = 0; chunkIndex < chunkOffsets.Count; chunkIndex++)
|
|
{
|
|
uint chunkNumber = (uint)(chunkIndex + 1);
|
|
|
|
// Find samples per chunk for this chunk
|
|
uint currentSamplesPerChunk = samplesPerChunk[0].samplesPerChunk;
|
|
for (int i = samplesPerChunk.Count - 1; i >= 0; i--)
|
|
{
|
|
if (chunkNumber >= samplesPerChunk[i].firstChunk)
|
|
{
|
|
currentSamplesPerChunk = samplesPerChunk[i].samplesPerChunk;
|
|
break;
|
|
}
|
|
}
|
|
|
|
long currentOffset = chunkOffsets[chunkIndex];
|
|
|
|
for (uint sampleInChunk = 0; sampleInChunk < currentSamplesPerChunk && sampleIndex < sampleSizes.Count; sampleInChunk++)
|
|
{
|
|
var sample = new Mp4Sample
|
|
{
|
|
Offset = currentOffset,
|
|
Size = sampleSizes[sampleIndex],
|
|
Duration = 1, // Default duration, should be parsed from stts
|
|
IsKeyFrame = true // Simplified - should parse from stss
|
|
};
|
|
|
|
track.Samples.Add(sample);
|
|
currentOffset += sample.Size;
|
|
sampleIndex++;
|
|
}
|
|
}
|
|
}
|
|
|
|
public byte[] GetSampleData(Mp4Sample sample)
|
|
{
|
|
if (sample.Offset < 0 || sample.Offset + sample.Size > _fileData.Length)
|
|
return Array.Empty<byte>();
|
|
|
|
byte[] data = new byte[sample.Size];
|
|
Array.Copy(_fileData, sample.Offset, data, 0, sample.Size);
|
|
return data;
|
|
}
|
|
} |