Files
video-orchestra/godot-project/scripts/Utils/WebMParser.cs
2025-09-15 00:17:01 +09:00

592 lines
14 KiB
C#

using Godot;
using System;
using System.Collections.Generic;
using System.IO;
namespace VideoOrchestra.Utils
{
/// <summary>
/// Enhanced WebM container parser to extract VP9 bitstream frames
/// Attempts to locate actual VP9 packets within the WebM/Matroska container
/// </summary>
public static class WebMParser
{
// EBML/Matroska element IDs
private const uint EBML_HEADER = 0x1A45DFA3;
private const uint SEGMENT = 0x18538067;
private const uint CLUSTER = 0x1F43B675;
private const uint SIMPLE_BLOCK = 0xA3;
private const uint BLOCK_GROUP = 0xA0;
private const uint BLOCK = 0xA1;
private const uint TRACK_NUMBER = 0xD7;
// VP9 frame markers
private static readonly byte[] VP9_FRAME_MARKER = { 0x82, 0x49, 0x83, 0x42 }; // VP9 sync pattern
/// <summary>
/// Extract VP9 frames from WebM file data using enhanced container parsing
/// Returns a list of VP9 bitstream packets
/// </summary>
/// <param name="webmData">Raw WebM file data</param>
/// <returns>List of VP9 bitstream data</returns>
public static List<byte[]> ExtractVP9Frames(byte[] webmData)
{
var frames = new List<byte[]>();
try
{
// Try enhanced WebM parsing first
var enhancedFrames = ExtractFramesEnhanced(webmData);
if (enhancedFrames.Count > 0)
{
frames.AddRange(enhancedFrames);
}
else
{
// Fallback to pattern-based extraction
var patternFrames = ExtractFramesPatternBased(webmData);
frames.AddRange(patternFrames);
}
if (frames.Count == 0)
{
// Final fallback to simulation
var simFrames = ExtractFramesSimple(webmData);
frames.AddRange(simFrames);
}
GD.Print($"WebM parsing: {frames.Count} frames extracted from {webmData.Length} bytes");
}
catch (Exception ex)
{
GD.PrintErr($"Error parsing WebM data: {ex.Message}");
// Fallback to simple extraction
var fallbackFrames = ExtractFramesSimple(webmData);
frames.AddRange(fallbackFrames);
}
return frames;
}
/// <summary>
/// Enhanced WebM container parsing to extract VP9 bitstream packets
/// </summary>
private static List<byte[]> ExtractFramesEnhanced(byte[] data)
{
var frames = new List<byte[]>();
try
{
using var stream = new MemoryStream(data);
using var reader = new BinaryReader(stream);
// Look for EBML header
if (!FindEBMLHeader(reader))
{
return frames;
}
// Look for Segment
if (!FindElement(reader, SEGMENT))
{
return frames;
}
// Parse clusters to find blocks with VP9 data
while (reader.BaseStream.Position < reader.BaseStream.Length - 8)
{
if (FindElement(reader, CLUSTER))
{
var clusterFrames = ParseCluster(reader);
frames.AddRange(clusterFrames);
if (frames.Count > 100) // Prevent excessive frame count
break;
}
else
{
// Skip ahead
if (reader.BaseStream.Position + 1024 < reader.BaseStream.Length)
reader.BaseStream.Position += 1024;
else
break;
}
}
// Essential summary only
if (frames.Count > 0)
{
int totalSize = 0;
foreach (var frame in frames)
{
totalSize += frame.Length;
}
int avgSize = frames.Count > 0 ? totalSize / frames.Count : 0;
GD.Print($"Enhanced: {frames.Count} frames, avg {avgSize} bytes, {_vp9SignatureFrames} VP9 signatures");
}
}
catch (Exception ex)
{
GD.PrintErr($"Enhanced WebM parsing failed: {ex.Message}");
}
return frames;
}
/// <summary>
/// Pattern-based VP9 frame extraction using known VP9 signatures
/// </summary>
private static List<byte[]> ExtractFramesPatternBased(byte[] data)
{
var frames = new List<byte[]>();
try
{
// Look for VP9 frame start patterns
var vp9Patterns = new List<byte[]>
{
new byte[] { 0x82, 0x49, 0x83, 0x42 }, // VP9 sync pattern
new byte[] { 0x49, 0x83, 0x42 }, // Alternative pattern
new byte[] { 0x30, 0x00, 0x00 }, // Common VP9 frame start
new byte[] { 0x10, 0x00, 0x00 }, // Another VP9 pattern
};
foreach (var pattern in vp9Patterns)
{
int searchPos = 0;
while (searchPos < data.Length - pattern.Length)
{
int patternPos = FindPattern(data, pattern, searchPos);
if (patternPos >= 0)
{
// Extract potential frame data
int frameStart = patternPos;
int frameEnd = FindNextFrameStart(data, frameStart + pattern.Length, vp9Patterns);
if (frameEnd > frameStart + pattern.Length && frameEnd - frameStart < 100000) // Reasonable frame size
{
byte[] frameData = new byte[frameEnd - frameStart];
Array.Copy(data, frameStart, frameData, 0, frameData.Length);
if (IsValidVP9Frame(frameData))
{
frames.Add(frameData);
}
}
searchPos = patternPos + pattern.Length;
}
else
{
break;
}
}
}
// Remove duplicates based on content similarity
frames = RemoveDuplicateFrames(frames);
}
catch (Exception ex)
{
GD.PrintErr($"Pattern-based VP9 extraction failed: {ex.Message}");
}
return frames;
}
private static bool FindEBMLHeader(BinaryReader reader)
{
try
{
// Look for EBML magic number 0x1A45DFA3
byte[] buffer = new byte[4];
while (reader.BaseStream.Position <= reader.BaseStream.Length - 4)
{
reader.Read(buffer, 0, 4);
uint value = (uint)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
if (value == EBML_HEADER)
{
return true;
}
reader.BaseStream.Position -= 3; // Overlap search
}
return false;
}
catch (Exception)
{
return false;
}
}
private static bool FindElement(BinaryReader reader, uint elementId)
{
try
{
byte[] buffer = new byte[4];
while (reader.BaseStream.Position <= reader.BaseStream.Length - 4)
{
reader.Read(buffer, 0, 4);
uint value = (uint)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
if (value == elementId || (elementId == SIMPLE_BLOCK && buffer[0] == 0xA3))
{
reader.BaseStream.Position -= 4; // Reset to element start
return true;
}
reader.BaseStream.Position -= 3; // Overlap search
}
return false;
}
catch (Exception)
{
return false;
}
}
private static List<byte[]> ParseCluster(BinaryReader reader)
{
var frames = new List<byte[]>();
try
{
long clusterStart = reader.BaseStream.Position;
long clusterEnd = Math.Min(clusterStart + 1024 * 1024, reader.BaseStream.Length); // Max 1MB cluster
while (reader.BaseStream.Position < clusterEnd - 8)
{
// Look for SimpleBlock or Block elements
if (FindElement(reader, SIMPLE_BLOCK))
{
var blockData = ExtractBlockData(reader);
if (blockData != null && IsValidVP9Frame(blockData))
{
frames.Add(blockData);
}
}
else
{
reader.BaseStream.Position += 16; // Skip ahead
}
}
}
catch (Exception ex)
{
GD.PrintErr($"Error parsing cluster: {ex.Message}");
}
return frames;
}
private static byte[] ExtractBlockData(BinaryReader reader)
{
try
{
reader.BaseStream.Position += 1; // Skip element ID
// Read VINT size (simplified)
int size = ReadVINT(reader);
if (size > 0 && size < 500000) // Reasonable frame size
{
byte[] blockData = reader.ReadBytes(size);
// Skip block header (track number, timestamp, flags)
if (blockData.Length > 4)
{
int headerSize = 4; // Simplified header size
if (blockData.Length > headerSize)
{
byte[] frameData = new byte[blockData.Length - headerSize];
Array.Copy(blockData, headerSize, frameData, 0, frameData.Length);
return frameData;
}
}
}
}
catch (Exception ex)
{
GD.PrintErr($"Error extracting block data: {ex.Message}");
}
return null;
}
private static int ReadVINT(BinaryReader reader)
{
try
{
byte firstByte = reader.ReadByte();
int length = 1;
// Count leading zeros to determine VINT length
for (int i = 7; i >= 0; i--)
{
if ((firstByte & (1 << i)) != 0)
break;
length++;
}
if (length > 8) return 0; // Invalid VINT
int value = firstByte & ((1 << (8 - length)) - 1);
for (int i = 1; i < length; i++)
{
value = (value << 8) | reader.ReadByte();
}
return value;
}
catch (Exception)
{
return 0;
}
}
private static int FindPattern(byte[] data, byte[] pattern, int startPos)
{
for (int i = startPos; i <= data.Length - pattern.Length; i++)
{
bool found = true;
for (int j = 0; j < pattern.Length; j++)
{
if (data[i + j] != pattern[j])
{
found = false;
break;
}
}
if (found) return i;
}
return -1;
}
private static int FindNextFrameStart(byte[] data, int startPos, List<byte[]> patterns)
{
int nearestPos = data.Length;
foreach (var pattern in patterns)
{
int pos = FindPattern(data, pattern, startPos);
if (pos > 0 && pos < nearestPos)
{
nearestPos = pos;
}
}
return nearestPos;
}
private static int _loggedFrames = 0;
private static int _validFrames = 0;
private static int _vp9SignatureFrames = 0;
private static bool IsValidVP9Frame(byte[] frameData)
{
if (frameData == null || frameData.Length < 4)
{
return false;
}
// Basic VP9 frame validation with minimal logging
bool isValid = false;
string validationReason = "";
// Check for common VP9 frame markers
if (frameData.Length >= 4)
{
// VP9 sync pattern
if (frameData[0] == 0x82 && frameData[1] == 0x49)
{
isValid = true;
validationReason = "VP9 sync pattern 0x82 0x49";
_vp9SignatureFrames++;
}
else if (frameData[0] == 0x49 && frameData[1] == 0x83)
{
isValid = true;
validationReason = "VP9 sync pattern 0x49 0x83";
_vp9SignatureFrames++;
}
// Common VP9 frame start patterns
else if (frameData[0] == 0x30)
{
isValid = true;
validationReason = "VP9 frame start pattern 0x30";
}
else if (frameData[0] == 0x10)
{
isValid = true;
validationReason = "VP9 frame start pattern 0x10";
}
// Check for other VP9 indicators
else if ((frameData[0] & 0xF0) == 0x00 || (frameData[0] & 0xF0) == 0x10)
{
isValid = true;
validationReason = $"Potential VP9 frame marker 0x{frameData[0]:X2}";
}
// Frame size should be reasonable
else if (frameData.Length >= 100 && frameData.Length <= 100000)
{
isValid = true;
validationReason = $"Reasonable frame size ({frameData.Length} bytes)";
}
if (isValid)
{
_validFrames++;
// Minimal logging - only critical texture conversion issues
}
}
return isValid;
}
// Removed detailed frame content analysis to reduce logging
private static double CalculateEntropy(byte[] data)
{
var frequencies = new int[256];
int sampleSize = Math.Min(1024, data.Length); // Sample first 1KB for performance
for (int i = 0; i < sampleSize; i++)
{
frequencies[data[i]]++;
}
double entropy = 0.0;
for (int i = 0; i < 256; i++)
{
if (frequencies[i] > 0)
{
double probability = (double)frequencies[i] / sampleSize;
entropy -= probability * Math.Log2(probability);
}
}
return entropy;
}
private static bool ContainsVP9Patterns(byte[] frameData)
{
// Look for VP9-specific byte sequences
var vp9Indicators = new byte[][]
{
new byte[] { 0x82, 0x49, 0x83, 0x42 }, // VP9 signature
new byte[] { 0x30, 0x00 }, // Common VP9 pattern
new byte[] { 0x10, 0x00 }, // Another VP9 pattern
new byte[] { 0x00, 0x00, 0x01 }, // Start code
};
foreach (var pattern in vp9Indicators)
{
if (FindPattern(frameData, pattern, 0) >= 0)
{
return true;
}
}
return false;
}
private static List<byte[]> RemoveDuplicateFrames(List<byte[]> frames)
{
var uniqueFrames = new List<byte[]>();
var checksums = new HashSet<int>();
foreach (var frame in frames)
{
// Calculate checksum from first 64 bytes manually
int checksum = 0;
int sampleSize = Math.Min(64, frame.Length);
for (int i = 0; i < sampleSize; i++)
{
checksum += frame[i];
}
if (!checksums.Contains(checksum))
{
checksums.Add(checksum);
uniqueFrames.Add(frame);
}
}
return uniqueFrames;
}
/// <summary>
/// Simple frame extraction method with enhanced frame variation
/// This creates more realistic frame data for better visual simulation
/// </summary>
private static List<byte[]> ExtractFramesSimple(byte[] data)
{
var frames = new List<byte[]>();
// For demonstration, we'll create multiple "frames" from the WebM data
// In reality, we would parse the WebM container to find actual VP9 packets
int frameCount = Math.Min(30, Math.Max(10, data.Length / 2048)); // Better frame count calculation
int baseFrameSize = data.Length / frameCount;
for (int i = 0; i < frameCount; i++)
{
// Create varied frame sizes to simulate real video frames
float sizeVariation = (float)(0.8 + 0.4 * Math.Sin(i * 0.5)); // 80%-120% of base size
int actualFrameSize = (int)(baseFrameSize * sizeVariation);
actualFrameSize = Math.Min(actualFrameSize, data.Length - (i * baseFrameSize / 2));
if (actualFrameSize > 0)
{
byte[] frame = new byte[actualFrameSize];
// Create more realistic frame data by combining different parts of the source
int sourcePos = (i * data.Length / frameCount) % (data.Length - actualFrameSize);
Array.Copy(data, sourcePos, frame, 0, actualFrameSize);
// Add some frame-specific variation to make frames more distinct
for (int j = 0; j < Math.Min(frame.Length, 1000); j += 10)
{
frame[j] = (byte)((frame[j] + i * 7 + j) % 256);
}
frames.Add(frame);
}
}
// Created simulation frames without detailed logging
return frames;
}
/// <summary>
/// Get video information from WebM file
/// </summary>
public static WebMInfo GetVideoInfo(byte[] webmData)
{
// This would normally parse WebM headers to get actual video info
// For now, return default values
return new WebMInfo
{
Width = 1920,
Height = 1080,
FrameRate = 30.0f,
Duration = 10.0f, // seconds
HasVP9 = true
};
}
}
/// <summary>
/// WebM video information
/// </summary>
public class WebMInfo
{
public int Width { get; set; }
public int Height { get; set; }
public float FrameRate { get; set; }
public float Duration { get; set; }
public bool HasVP9 { get; set; }
}
}