Files
video-v1/vav1/Vav1Player/Container/Mp4Parser.cs
2025-09-17 04:16:34 +09:00

489 lines
15 KiB
C#

using System.IO;
namespace Vav1Player.Container;
public class Mp4TrackInfo
{
public uint TrackId { get; set; }
public string? CodecType { get; set; }
public uint Width { get; set; }
public uint Height { get; set; }
public double Duration { get; set; }
public uint TimeScale { get; set; }
public List<Mp4Sample> Samples { get; set; } = new List<Mp4Sample>();
public byte[]? Av1ConfigurationRecord { get; set; }
}
public struct Mp4Sample
{
public long Offset { get; set; }
public uint Size { get; set; }
public uint Duration { get; set; }
public bool IsKeyFrame { get; set; }
}
public class Mp4Parser
{
private readonly byte[] _fileData;
private int _position;
public Mp4Parser(byte[] fileData)
{
_fileData = fileData;
_position = 0;
}
public List<Mp4TrackInfo> Parse()
{
var tracks = new List<Mp4TrackInfo>();
while (_position < _fileData.Length)
{
var box = ReadBox();
if (box.Type == "moov")
{
ParseMovieBox(box, tracks);
}
else if (box.Type == "mdat")
{
// Media data box - skip for now, we'll use offsets to read samples
_position += (int)box.Size - 8;
}
else
{
// Skip unknown boxes
_position += (int)box.Size - 8;
}
}
return tracks;
}
private Mp4Box ReadBox()
{
if (_position + 8 > _fileData.Length)
throw new EndOfStreamException("Insufficient data for box header");
var span = _fileData.AsSpan(_position);
uint size = Mp4Reader.ReadUInt32BigEndian(span, 0);
string type = Mp4Reader.ReadFourCC(span, 4);
if (size == 1)
{
// Extended size
if (_position + 16 > _fileData.Length)
throw new EndOfStreamException("Insufficient data for extended box header");
ulong extendedSize = Mp4Reader.ReadUInt64BigEndian(span, 8);
size = (uint)Math.Min(extendedSize, uint.MaxValue);
_position += 16;
}
else
{
_position += 8;
}
int dataSize = (int)size - 8;
byte[] data = new byte[dataSize];
if (_position + dataSize <= _fileData.Length)
{
Array.Copy(_fileData, _position, data, 0, dataSize);
}
return new Mp4Box(size, type, data, _position - 8);
}
private void ParseMovieBox(Mp4Box movieBox, List<Mp4TrackInfo> tracks)
{
int pos = 0;
var data = movieBox.Data;
while (pos < data.Length)
{
if (pos + 8 > data.Length) break;
var span = data.AsSpan(pos);
uint size = Mp4Reader.ReadUInt32BigEndian(span, 0);
string type = Mp4Reader.ReadFourCC(span, 4);
if (size < 8) break;
if (type == "trak")
{
var trackData = data.AsSpan(pos + 8, (int)size - 8);
var track = ParseTrackBox(trackData);
if (track != null && track.CodecType == "av01")
{
tracks.Add(track);
}
}
pos += (int)size;
}
}
private Mp4TrackInfo? ParseTrackBox(ReadOnlySpan<byte> trackData)
{
var track = new Mp4TrackInfo();
int pos = 0;
while (pos < trackData.Length)
{
if (pos + 8 > trackData.Length) break;
uint size = Mp4Reader.ReadUInt32BigEndian(trackData, pos);
string type = Mp4Reader.ReadFourCC(trackData, pos + 4);
if (size < 8) break;
if (type == "mdia")
{
var mediaData = trackData.Slice(pos + 8, (int)size - 8);
ParseMediaBox(mediaData, track);
}
else if (type == "tkhd")
{
var tkhdData = trackData.Slice(pos + 8, (int)size - 8);
ParseTrackHeaderBox(tkhdData, track);
}
pos += (int)size;
}
return track.CodecType == "av01" ? track : null;
}
private void ParseMediaBox(ReadOnlySpan<byte> mediaData, Mp4TrackInfo track)
{
int pos = 0;
while (pos < mediaData.Length)
{
if (pos + 8 > mediaData.Length) break;
uint size = Mp4Reader.ReadUInt32BigEndian(mediaData, pos);
string type = Mp4Reader.ReadFourCC(mediaData, pos + 4);
if (size < 8) break;
if (type == "mdhd")
{
var mdhdData = mediaData.Slice(pos + 8, (int)size - 8);
ParseMediaHeaderBox(mdhdData, track);
}
else if (type == "minf")
{
var minfData = mediaData.Slice(pos + 8, (int)size - 8);
ParseMediaInfoBox(minfData, track);
}
pos += (int)size;
}
}
private void ParseMediaHeaderBox(ReadOnlySpan<byte> mdhdData, Mp4TrackInfo track)
{
if (mdhdData.Length < 20) return;
byte version = mdhdData[0];
if (version == 1)
{
if (mdhdData.Length < 32) return;
track.TimeScale = Mp4Reader.ReadUInt32BigEndian(mdhdData, 20);
ulong duration = Mp4Reader.ReadUInt64BigEndian(mdhdData, 24);
track.Duration = (double)duration / track.TimeScale;
}
else
{
track.TimeScale = Mp4Reader.ReadUInt32BigEndian(mdhdData, 12);
uint duration = Mp4Reader.ReadUInt32BigEndian(mdhdData, 16);
track.Duration = (double)duration / track.TimeScale;
}
}
private void ParseTrackHeaderBox(ReadOnlySpan<byte> tkhdData, Mp4TrackInfo track)
{
if (tkhdData.Length < 20) return;
byte version = tkhdData[0];
if (version == 1)
{
if (tkhdData.Length < 92) return;
track.TrackId = Mp4Reader.ReadUInt32BigEndian(tkhdData, 12);
// Width and height are at fixed point 16.16 format
track.Width = Mp4Reader.ReadUInt32BigEndian(tkhdData, 84) >> 16;
track.Height = Mp4Reader.ReadUInt32BigEndian(tkhdData, 88) >> 16;
}
else
{
if (tkhdData.Length < 80) return;
track.TrackId = Mp4Reader.ReadUInt32BigEndian(tkhdData, 8);
track.Width = Mp4Reader.ReadUInt32BigEndian(tkhdData, 72) >> 16;
track.Height = Mp4Reader.ReadUInt32BigEndian(tkhdData, 76) >> 16;
}
}
private void ParseMediaInfoBox(ReadOnlySpan<byte> minfData, Mp4TrackInfo track)
{
int pos = 0;
while (pos < minfData.Length)
{
if (pos + 8 > minfData.Length) break;
uint size = Mp4Reader.ReadUInt32BigEndian(minfData, pos);
string type = Mp4Reader.ReadFourCC(minfData, pos + 4);
if (size < 8) break;
if (type == "stbl")
{
var stblData = minfData.Slice(pos + 8, (int)size - 8);
ParseSampleTableBox(stblData, track);
}
pos += (int)size;
}
}
private void ParseSampleTableBox(ReadOnlySpan<byte> stblData, Mp4TrackInfo track)
{
int pos = 0;
var chunkOffsets = new List<long>();
var sampleSizes = new List<uint>();
var samplesPerChunk = new List<(uint firstChunk, uint samplesPerChunk)>();
while (pos < stblData.Length)
{
if (pos + 8 > stblData.Length) break;
uint size = Mp4Reader.ReadUInt32BigEndian(stblData, pos);
string type = Mp4Reader.ReadFourCC(stblData, pos + 4);
if (size < 8) break;
var boxData = stblData.Slice(pos + 8, (int)size - 8);
switch (type)
{
case "stsd":
ParseSampleDescriptionBox(boxData, track);
break;
case "stco":
chunkOffsets.AddRange(ParseChunkOffsetBox(boxData));
break;
case "co64":
chunkOffsets.AddRange(ParseChunkOffset64Box(boxData));
break;
case "stsz":
sampleSizes.AddRange(ParseSampleSizeBox(boxData));
break;
case "stsc":
samplesPerChunk.AddRange(ParseSampleToChunkBox(boxData));
break;
}
pos += (int)size;
}
// Build sample list from parsed tables
System.Diagnostics.Debug.WriteLine($"[MP4_PARSER] Building sample list: {chunkOffsets.Count} chunks, {sampleSizes.Count} sizes, {samplesPerChunk.Count} sample-to-chunk entries");
BuildSampleList(track, chunkOffsets, sampleSizes, samplesPerChunk);
}
private void ParseSampleDescriptionBox(ReadOnlySpan<byte> stsdData, Mp4TrackInfo track)
{
if (stsdData.Length < 8) return;
uint entryCount = Mp4Reader.ReadUInt32BigEndian(stsdData, 4);
if (entryCount == 0) return;
int pos = 8;
if (pos + 8 > stsdData.Length) return;
uint entrySize = Mp4Reader.ReadUInt32BigEndian(stsdData, pos);
string codecType = Mp4Reader.ReadFourCC(stsdData, pos + 4);
track.CodecType = codecType;
System.Diagnostics.Debug.WriteLine($"[MP4_PARSER] Found codec: {codecType}, entry count: {entryCount}, entry size: {entrySize}");
// For AV1, look for av1C configuration box within the sample entry
if (codecType == "av01")
{
ParseAv1SampleEntry(stsdData.Slice(pos, (int)entrySize), track);
}
}
private void ParseAv1SampleEntry(ReadOnlySpan<byte> av01Data, Mp4TrackInfo track)
{
// Skip the fixed part of the VisualSampleEntry (78 bytes total)
// 4 bytes size + 4 bytes type + 6 bytes reserved + 2 bytes data_reference_index
// + 16 bytes predefined/reserved + 4 bytes width + 4 bytes height + 36 bytes other fields
int pos = 86; // Skip to the end of VisualSampleEntry structure
// Look for av1C box
while (pos + 8 <= av01Data.Length)
{
if (pos + 8 > av01Data.Length) break;
uint boxSize = Mp4Reader.ReadUInt32BigEndian(av01Data, pos);
string boxType = Mp4Reader.ReadFourCC(av01Data, pos + 4);
if (boxSize < 8) break;
if (boxType == "av1C")
{
// Extract av1C configuration data (skip 8-byte header)
int configSize = (int)boxSize - 8;
if (pos + 8 + configSize <= av01Data.Length)
{
track.Av1ConfigurationRecord = av01Data.Slice(pos + 8, configSize).ToArray();
System.Diagnostics.Debug.WriteLine($"[MP4_PARSER] Extracted av1C configuration: {configSize} bytes");
}
break;
}
pos += (int)boxSize;
}
}
private List<long> ParseChunkOffsetBox(ReadOnlySpan<byte> stcoData)
{
var offsets = new List<long>();
if (stcoData.Length < 8) return offsets;
uint entryCount = Mp4Reader.ReadUInt32BigEndian(stcoData, 4);
int pos = 8;
for (uint i = 0; i < entryCount && pos + 4 <= stcoData.Length; i++)
{
uint offset = Mp4Reader.ReadUInt32BigEndian(stcoData, pos);
offsets.Add(offset);
pos += 4;
}
return offsets;
}
private List<long> ParseChunkOffset64Box(ReadOnlySpan<byte> co64Data)
{
var offsets = new List<long>();
if (co64Data.Length < 8) return offsets;
uint entryCount = Mp4Reader.ReadUInt32BigEndian(co64Data, 4);
int pos = 8;
for (uint i = 0; i < entryCount && pos + 8 <= co64Data.Length; i++)
{
ulong offset = Mp4Reader.ReadUInt64BigEndian(co64Data, pos);
offsets.Add((long)offset);
pos += 8;
}
return offsets;
}
private List<uint> ParseSampleSizeBox(ReadOnlySpan<byte> stszData)
{
var sizes = new List<uint>();
if (stszData.Length < 12) return sizes;
uint sampleSize = Mp4Reader.ReadUInt32BigEndian(stszData, 4);
uint sampleCount = Mp4Reader.ReadUInt32BigEndian(stszData, 8);
if (sampleSize != 0)
{
// All samples have the same size
for (uint i = 0; i < sampleCount; i++)
{
sizes.Add(sampleSize);
}
}
else
{
// Each sample has individual size
int pos = 12;
for (uint i = 0; i < sampleCount && pos + 4 <= stszData.Length; i++)
{
uint size = Mp4Reader.ReadUInt32BigEndian(stszData, pos);
sizes.Add(size);
pos += 4;
}
}
return sizes;
}
private List<(uint firstChunk, uint samplesPerChunk)> ParseSampleToChunkBox(ReadOnlySpan<byte> stscData)
{
var entries = new List<(uint firstChunk, uint samplesPerChunk)>();
if (stscData.Length < 8) return entries;
uint entryCount = Mp4Reader.ReadUInt32BigEndian(stscData, 4);
int pos = 8;
for (uint i = 0; i < entryCount && pos + 12 <= stscData.Length; i++)
{
uint firstChunk = Mp4Reader.ReadUInt32BigEndian(stscData, pos);
uint samplesPerChunk = Mp4Reader.ReadUInt32BigEndian(stscData, pos + 4);
// Skip sample description index at pos + 8
entries.Add((firstChunk, samplesPerChunk));
pos += 12;
}
return entries;
}
private void BuildSampleList(Mp4TrackInfo track, List<long> chunkOffsets, List<uint> sampleSizes,
List<(uint firstChunk, uint samplesPerChunk)> samplesPerChunk)
{
if (chunkOffsets.Count == 0 || sampleSizes.Count == 0 || samplesPerChunk.Count == 0)
return;
int sampleIndex = 0;
for (int chunkIndex = 0; chunkIndex < chunkOffsets.Count; chunkIndex++)
{
uint chunkNumber = (uint)(chunkIndex + 1);
// Find samples per chunk for this chunk
uint currentSamplesPerChunk = samplesPerChunk[0].samplesPerChunk;
for (int i = samplesPerChunk.Count - 1; i >= 0; i--)
{
if (chunkNumber >= samplesPerChunk[i].firstChunk)
{
currentSamplesPerChunk = samplesPerChunk[i].samplesPerChunk;
break;
}
}
long currentOffset = chunkOffsets[chunkIndex];
for (uint sampleInChunk = 0; sampleInChunk < currentSamplesPerChunk && sampleIndex < sampleSizes.Count; sampleInChunk++)
{
var sample = new Mp4Sample
{
Offset = currentOffset,
Size = sampleSizes[sampleIndex],
Duration = 1, // Default duration, should be parsed from stts
IsKeyFrame = true // Simplified - should parse from stss
};
track.Samples.Add(sample);
currentOffset += sample.Size;
sampleIndex++;
}
}
}
public byte[] GetSampleData(Mp4Sample sample)
{
if (sample.Offset < 0 || sample.Offset + sample.Size > _fileData.Length)
return Array.Empty<byte>();
byte[] data = new byte[sample.Size];
Array.Copy(_fileData, sample.Offset, data, 0, sample.Size);
return data;
}
}