using Godot; using System; using System.Collections.Concurrent; using System.Runtime.InteropServices; namespace VideoOrchestra.Platform { /// /// macOS VP9 decoder. Tries to use VideoToolbox for hardware acceleration first, /// and falls back to libvpx for software decoding if hardware is not available. /// public unsafe class macOSVP9Decoder : IVP9PlatformDecoder { private const int MAX_STREAMS = 3; private ImageTexture[] _godotTextures = new ImageTexture[MAX_STREAMS]; private bool _initialized = false; private int _width = 0; private int _height = 0; private VP9DecoderStatus _status = VP9DecoderStatus.Uninitialized; // Decoder mode private bool _useLibvpx = false; // VideoToolbox fields private IntPtr[] _decompressionSessions = new IntPtr[MAX_STREAMS]; private GCHandle _selfHandle; private ConcurrentQueue[] _decodedImageBuffers = new ConcurrentQueue[MAX_STREAMS]; private IntPtr _formatDesc; // libvpx fields private vpx_codec_ctx_t[] _libvpxContexts = new vpx_codec_ctx_t[MAX_STREAMS]; public string PlatformName => "macOS"; public bool IsHardwareDecodingSupported => CheckHardwareSupport(); #region Native Interop #region Native Library Loading private static class NativeLibrary { [DllImport("libSystem.dylib")] internal static extern IntPtr dlopen(string path, int mode); [DllImport("libSystem.dylib")] internal static extern IntPtr dlsym(IntPtr handle, string symbol); [DllImport("libSystem.dylib")] internal static extern int dlclose(IntPtr handle); private static IntPtr _coreVideoHandle = IntPtr.Zero; internal static IntPtr GetCoreVideoSymbol(string symbol) { if (_coreVideoHandle == IntPtr.Zero) { _coreVideoHandle = dlopen("/System/Library/Frameworks/CoreVideo.framework/CoreVideo", 0); if (_coreVideoHandle == IntPtr.Zero) { GD.PrintErr("Failed to load CoreVideo framework."); return IntPtr.Zero; } } return dlsym(_coreVideoHandle, symbol); } internal static void CloseCoreVideo() { if (_coreVideoHandle != IntPtr.Zero) { dlclose(_coreVideoHandle); _coreVideoHandle = IntPtr.Zero; } } } #endregion #region VideoToolbox P/Invoke [DllImport("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation")] private static extern void CFRelease(IntPtr cf); [DllImport("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation")] private static extern void CFRetain(IntPtr cf); [DllImport("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation")] private static extern IntPtr CFDictionaryCreateMutable(IntPtr allocator, nint capacity, IntPtr keyCallbacks, IntPtr valueCallbacks); [DllImport("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation")] private static extern void CFDictionarySetValue(IntPtr theDict, IntPtr key, IntPtr value); [DllImport("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation")] private static extern IntPtr CFNumberCreate(IntPtr allocator, int theType, ref int valuePtr); [DllImport("/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox")] private static extern int VTDecompressionSessionCreate(IntPtr allocator, IntPtr formatDescription, IntPtr videoDecoderSpecification, IntPtr destinationImageBufferAttributes, IntPtr outputCallback, out IntPtr decompressionSessionOut); [DllImport("/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox")] private static extern int VTDecompressionSessionDecodeFrame(IntPtr session, IntPtr sampleBuffer, uint decodeFlags, IntPtr sourceFrameRefCon, out uint infoFlagsOut); [DllImport("/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox")] private static extern void VTDecompressionSessionInvalidate(IntPtr session); [DllImport("/System/Library/Frameworks/CoreMedia.framework/CoreMedia")] private static extern int CMVideoFormatDescriptionCreate(IntPtr allocator, uint codecType, int width, int height, IntPtr extensions, out IntPtr formatDescriptionOut); [DllImport("/System/Library/Frameworks/CoreMedia.framework/CoreMedia")] private static extern int CMSampleBufferCreate(IntPtr allocator, IntPtr dataBuffer, bool dataReady, IntPtr makeDataReadyCallback, IntPtr makeDataReadyRefcon, IntPtr formatDescription, nint numSamples, nint numSampleTimingEntries, IntPtr sampleTimingArray, nint numSampleSizeEntries, IntPtr sampleSizeArray, out IntPtr sampleBufferOut); [DllImport("/System/Library/Frameworks/CoreMedia.framework/CoreMedia")] private static extern int CMBlockBufferCreateWithMemoryBlock(IntPtr structureAllocator, IntPtr memoryBlock, nint blockLength, IntPtr blockAllocator, IntPtr customBlockSource, nint offsetToData, nint dataLength, uint flags, out IntPtr blockBufferOut); [DllImport("/System/Library/Frameworks/CoreVideo.framework/CoreVideo")] private static extern int CVPixelBufferLockBaseAddress(IntPtr pixelBuffer, uint lockFlags); [DllImport("/System/Library/Frameworks/CoreVideo.framework/CoreVideo")] private static extern int CVPixelBufferUnlockBaseAddress(IntPtr pixelBuffer, uint lockFlags); [DllImport("/System/Library/Frameworks/CoreVideo.framework/CoreVideo")] private static extern IntPtr CVPixelBufferGetBaseAddress(IntPtr pixelBuffer); [DllImport("/System/Library/Frameworks/CoreVideo.framework/CoreVideo")] private static extern nint CVPixelBufferGetWidth(IntPtr pixelBuffer); [DllImport("/System/Library/Frameworks/CoreVideo.framework/CoreVideo")] private static extern nint CVPixelBufferGetHeight(IntPtr pixelBuffer); [DllImport("/System/Library/Frameworks/CoreVideo.framework/CoreVideo")] private static extern nint CVPixelBufferGetBytesPerRow(IntPtr pixelBuffer); private const uint kCMVideoCodecType_VP9 = 0x76703039; // 'vp09' private const int kCFNumberSInt32Type = 3; private const uint kCVPixelFormatType_32BGRA = 0x42475241; // 'BGRA' #endregion #region libvpx P/Invoke private const int VPX_DECODER_ABI_VERSION = 4; [DllImport("libvpx")] private static extern IntPtr vpx_codec_vp9_dx(); [DllImport("libvpx")] private static extern int vpx_codec_dec_init_ver(ref vpx_codec_ctx_t ctx, IntPtr iface, IntPtr cfg, long flags, int ver); [DllImport("libvpx")] private static extern int vpx_codec_decode(ref vpx_codec_ctx_t ctx, byte* data, uint data_sz, IntPtr user_priv, long deadline); [DllImport("libvpx")] private static extern IntPtr vpx_codec_get_frame(ref vpx_codec_ctx_t ctx, ref IntPtr iter); [DllImport("libvpx")] private static extern int vpx_codec_destroy(ref vpx_codec_ctx_t ctx); [StructLayout(LayoutKind.Sequential)] private struct vpx_codec_ctx_t { public IntPtr priv; } [StructLayout(LayoutKind.Sequential, Pack = 1)] private struct vpx_image_t { public uint fmt; public uint cs; public uint range; public uint w; public uint h; public uint bit_depth; public uint d_w; public uint d_h; public uint r_w; public uint r_h; public uint x_chroma_shift; public uint y_chroma_shift; public IntPtr planes_0; public IntPtr planes_1; public IntPtr planes_2; public IntPtr planes_3; public int stride_0; public int stride_1; public int stride_2; public int stride_3; } #endregion #endregion public macOSVP9Decoder() { for (int i = 0; i < MAX_STREAMS; i++) { _godotTextures[i] = new ImageTexture(); _libvpxContexts[i] = new vpx_codec_ctx_t(); _decompressionSessions[i] = IntPtr.Zero; } _decodedImageBuffers = new ConcurrentQueue[MAX_STREAMS]; } public bool Initialize(int width, int height, bool enableHardware = true) { _width = width; _height = height; string mode = "Unknown"; if (enableHardware && IsHardwareDecodingSupported) { _useLibvpx = false; mode = "Hardware (VideoToolbox)"; GD.Print("[macOS] Attempting to initialize with VideoToolbox..."); if (!InitializeVideoToolbox()) { GD.PushWarning("[macOS] VideoToolbox initialization failed. Falling back to libvpx."); _useLibvpx = true; } } else { GD.Print("[macOS] Hardware support not available or disabled. Using libvpx."); _useLibvpx = true; } if (_useLibvpx) { mode = "Software (libvpx)"; GD.Print("[macOS] Attempting to initialize with libvpx..."); if (!InitializeLibvpx()) { GD.PrintErr("[macOS] Failed to initialize libvpx software decoder. Initialization failed."); _status = VP9DecoderStatus.Error; return false; } } _initialized = true; _status = VP9DecoderStatus.Initialized; GD.Print($"[macOS] VP9 decoder initialized: {width}x{height}, Mode: {mode}"); return true; } private bool InitializeVideoToolbox() { try { _selfHandle = GCHandle.Alloc(this); for (int i = 0; i < MAX_STREAMS; i++) { _decodedImageBuffers[i] = new ConcurrentQueue(); if (!InitializeVideoToolboxStream(i)) { throw new Exception($"Failed to initialize VideoToolbox decoder for stream {i}"); } } return true; } catch (Exception ex) { GD.PrintErr($"[macOS] Error initializing VideoToolbox: {ex.Message}"); ReleaseVideoToolbox(); return false; } } private bool InitializeLibvpx() { try { IntPtr iface = vpx_codec_vp9_dx(); GD.Print("[libvpx] Interface obtained."); for (int i = 0; i < MAX_STREAMS; i++) { int result = vpx_codec_dec_init_ver(ref _libvpxContexts[i], iface, IntPtr.Zero, 0, VPX_DECODER_ABI_VERSION); if (result != 0) { throw new Exception($"libvpx: Failed to initialize decoder for stream {i}. Error code: {result}"); } GD.Print($"[libvpx] Stream {i} initialized."); } return true; } catch (DllNotFoundException) { GD.PrintErr("[libvpx] DllNotFoundException: libvpx.dylib not found. Please check the .csproj configuration and ensure the dynamic library is being copied to the output directory."); return false; } catch (Exception ex) { GD.PrintErr($"[libvpx] Error initializing libvpx: {ex.Message}"); ReleaseLibvpx(); return false; } } public bool DecodeFrame(byte[] frameData, int streamId) { if (!_initialized || streamId < 0 || streamId >= MAX_STREAMS || frameData == null || frameData.Length == 0) return false; try { _status = VP9DecoderStatus.Decoding; if (_useLibvpx) { return DecodeFrameWithLibvpx(frameData, streamId); } else { return DecodeFrameWithVideoToolbox(frameData, streamId); } } catch (Exception ex) { GD.PrintErr($"[macOS] Error decoding frame for stream {streamId}: {ex.Message}"); _status = VP9DecoderStatus.Error; return false; } } public void UpdateTextures() { if (_useLibvpx) { // libvpx is synchronous, no separate update needed return; } // VideoToolbox path for (int i = 0; i < MAX_STREAMS; i++) { if (_decodedImageBuffers[i] != null && _decodedImageBuffers[i].TryDequeue(out IntPtr imageBuffer)) { GD.Print($"[VideoToolbox] Dequeued image buffer for stream {i}."); using (var image = GetImageFromPixelBuffer(imageBuffer, i)) { if (image != null) { _godotTextures[i].SetImage(image); } } CFRelease(imageBuffer); } } } #region VideoToolbox Implementation private bool CheckHardwareSupport() { IntPtr formatDesc = IntPtr.Zero; IntPtr testSession = IntPtr.Zero; try { int result = CMVideoFormatDescriptionCreate(IntPtr.Zero, kCMVideoCodecType_VP9, 1920, 1080, IntPtr.Zero, out formatDesc); if (result != 0) return false; int sessionResult = VTDecompressionSessionCreate(IntPtr.Zero, formatDesc, IntPtr.Zero, IntPtr.Zero, IntPtr.Zero, out testSession); if (sessionResult == 0) { if (testSession != IntPtr.Zero) { VTDecompressionSessionInvalidate(testSession); CFRelease(testSession); } return true; } return false; } finally { if (formatDesc != IntPtr.Zero) CFRelease(formatDesc); } } private bool InitializeVideoToolboxStream(int streamId) { IntPtr pixelBufferAttributes = IntPtr.Zero; try { if (_formatDesc == IntPtr.Zero) { int result = CMVideoFormatDescriptionCreate(IntPtr.Zero, kCMVideoCodecType_VP9, _width, _height, IntPtr.Zero, out _formatDesc); if (result != 0) throw new Exception($"Failed to create format description: {result}"); } pixelBufferAttributes = CreatePixelBufferAttributes(); if (pixelBufferAttributes == IntPtr.Zero) return false; var callbackHandle = (IntPtr)(delegate* unmanaged)&DecompressionCallback; int sessionResult = VTDecompressionSessionCreate(IntPtr.Zero, _formatDesc, IntPtr.Zero, pixelBufferAttributes, callbackHandle, out _decompressionSessions[streamId]); if (sessionResult != 0) throw new Exception($"Failed to create decompression session: {sessionResult}"); return true; } finally { if (pixelBufferAttributes != IntPtr.Zero) CFRelease(pixelBufferAttributes); } } private IntPtr CreatePixelBufferAttributes() { IntPtr attributes = CFDictionaryCreateMutable(IntPtr.Zero, 3, IntPtr.Zero, IntPtr.Zero); IntPtr pixelFormatNumber = IntPtr.Zero; IntPtr widthNumber = IntPtr.Zero; IntPtr heightNumber = IntPtr.Zero; try { if (attributes == IntPtr.Zero) throw new Exception("Failed to create mutable dictionary."); IntPtr kCVPixelBufferPixelFormatTypeKey = NativeLibrary.GetCoreVideoSymbol("kCVPixelBufferPixelFormatTypeKey"); IntPtr kCVPixelBufferWidthKey = NativeLibrary.GetCoreVideoSymbol("kCVPixelBufferWidthKey"); IntPtr kCVPixelBufferHeightKey = NativeLibrary.GetCoreVideoSymbol("kCVPixelBufferHeightKey"); if (kCVPixelBufferPixelFormatTypeKey == IntPtr.Zero || kCVPixelBufferWidthKey == IntPtr.Zero || kCVPixelBufferHeightKey == IntPtr.Zero) throw new Exception("Failed to load CoreVideo keys."); int pixelFormat = (int)kCVPixelFormatType_32BGRA; pixelFormatNumber = CFNumberCreate(IntPtr.Zero, kCFNumberSInt32Type, ref pixelFormat); CFDictionarySetValue(attributes, kCVPixelBufferPixelFormatTypeKey, pixelFormatNumber); int w = _width; widthNumber = CFNumberCreate(IntPtr.Zero, kCFNumberSInt32Type, ref w); CFDictionarySetValue(attributes, kCVPixelBufferWidthKey, widthNumber); int h = _height; heightNumber = CFNumberCreate(IntPtr.Zero, kCFNumberSInt32Type, ref h); CFDictionarySetValue(attributes, kCVPixelBufferHeightKey, heightNumber); return attributes; } catch (Exception ex) { GD.PrintErr($"Failed to create pixel buffer attributes: {ex.Message}"); if (attributes != IntPtr.Zero) CFRelease(attributes); return IntPtr.Zero; } finally { if (pixelFormatNumber != IntPtr.Zero) CFRelease(pixelFormatNumber); if (widthNumber != IntPtr.Zero) CFRelease(widthNumber); if (heightNumber != IntPtr.Zero) CFRelease(heightNumber); } } private bool DecodeFrameWithVideoToolbox(byte[] frameData, int streamId) { IntPtr blockBuffer = IntPtr.Zero; IntPtr sampleBuffer = IntPtr.Zero; GCHandle pinnedArray = GCHandle.Alloc(frameData, GCHandleType.Pinned); try { IntPtr memoryBlock = pinnedArray.AddrOfPinnedObject(); int result = CMBlockBufferCreateWithMemoryBlock(IntPtr.Zero, memoryBlock, frameData.Length, IntPtr.Zero, IntPtr.Zero, 0, frameData.Length, 0, out blockBuffer); if (result != 0) throw new VP9DecoderException(PlatformName, streamId, $"Failed to create block buffer: {result}"); result = CMSampleBufferCreate(IntPtr.Zero, blockBuffer, true, IntPtr.Zero, IntPtr.Zero, _formatDesc, 1, 0, IntPtr.Zero, 0, IntPtr.Zero, out sampleBuffer); if (result != 0) throw new VP9DecoderException(PlatformName, streamId, $"Failed to create sample buffer: {result}"); uint infoFlags; result = VTDecompressionSessionDecodeFrame(_decompressionSessions[streamId], sampleBuffer, 0, (IntPtr)streamId, out infoFlags); if (result != 0) throw new VP9DecoderException(PlatformName, streamId, $"VideoToolbox decode failed: {result}"); return true; } finally { if (pinnedArray.IsAllocated) pinnedArray.Free(); if (blockBuffer != IntPtr.Zero) CFRelease(blockBuffer); if (sampleBuffer != IntPtr.Zero) CFRelease(sampleBuffer); } } private Image GetImageFromPixelBuffer(IntPtr pixelBuffer, int streamId) { if (CVPixelBufferLockBaseAddress(pixelBuffer, 0) != 0) { GD.PrintErr($"[VideoToolbox] Failed to lock pixel buffer for stream {streamId}"); return null; } try { IntPtr baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer); int width = (int)CVPixelBufferGetWidth(pixelBuffer); int height = (int)CVPixelBufferGetHeight(pixelBuffer); int bytesPerRow = (int)CVPixelBufferGetBytesPerRow(pixelBuffer); byte[] buffer = new byte[height * bytesPerRow]; Marshal.Copy(baseAddress, buffer, 0, buffer.Length); var image = Image.CreateFromData(width, height, false, Image.Format.Rgba8, buffer); if (image == null || image.IsEmpty()) { GD.PrintErr($"[VideoToolbox] Failed to create image from BGRA data for stream {streamId}."); return null; } return image; } finally { CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); } } [UnmanagedCallersOnly] private static void DecompressionCallback(IntPtr decompressionOutputRefCon, IntPtr sourceFrameRefCon, int status, uint infoFlags, IntPtr imageBuffer, long presentationTimeStamp, long presentationDuration) { if (status != 0) { GD.PrintErr($"[VideoToolbox] Decode callback error: {status}"); return; } if (imageBuffer == IntPtr.Zero) { GD.PrintErr("[VideoToolbox] Callback received a null imageBuffer."); return; } CFRetain(imageBuffer); GCHandle selfHandle = GCHandle.FromIntPtr(decompressionOutputRefCon); if (selfHandle.Target is macOSVP9Decoder decoder) { int streamId = (int)sourceFrameRefCon; decoder._decodedImageBuffers[streamId].Enqueue(imageBuffer); } } #endregion #region libvpx Implementation private bool DecodeFrameWithLibvpx(byte[] frameData, int streamId) { fixed (byte* pFrameData = frameData) { int result = vpx_codec_decode(ref _libvpxContexts[streamId], pFrameData, (uint)frameData.Length, IntPtr.Zero, 0); if (result != 0) { GD.PrintErr($"[libvpx] Decode failed for stream {streamId}. Error code: {result}"); return false; } } IntPtr iter = IntPtr.Zero; IntPtr imgPtr = vpx_codec_get_frame(ref _libvpxContexts[streamId], ref iter); if (imgPtr != IntPtr.Zero) { GD.Print($"[libvpx] Frame decoded for stream {streamId}. Updating texture."); vpx_image_t* img = (vpx_image_t*)imgPtr; UpdateGodotTextureFromYUV(img, streamId); } else { GD.Print($"[libvpx] No frame decoded yet for stream {streamId}."); } return true; } private void UpdateGodotTextureFromYUV(vpx_image_t* img, int streamId) { GD.Print($"[libvpx] Updating texture for stream {streamId} from YUV. Dims: {img->d_w}x{img->d_h}, Strides: Y={img->stride_0}, U={img->stride_1}, V={img->stride_2}"); var image = Image.CreateEmpty((int)img->d_w, (int)img->d_h, false, Image.Format.Rgba8); byte* yPlane = (byte*)img->planes_0; byte* uPlane = (byte*)img->planes_1; byte* vPlane = (byte*)img->planes_2; int yStride = img->stride_0; int uStride = img->stride_1; int vStride = img->stride_2; if (yPlane == null || uPlane == null || vPlane == null) { GD.PrintErr("[libvpx] YUV plane pointers are null!"); return; } GD.Print($"[libvpx] First YUV values: Y={yPlane[0]}, U={uPlane[0]}, V={vPlane[0]}"); for (int y = 0; y < img->d_h; y++) { for (int x = 0; x < img->d_w; x++) { int y_val = yPlane[y * yStride + x]; int u_val = uPlane[(y / 2) * uStride + (x / 2)]; int v_val = vPlane[(y / 2) * vStride + (x / 2)]; int c = y_val - 16; int d = u_val - 128; int e = v_val - 128; int r = (298 * c + 409 * e + 128) >> 8; int g = (298 * c - 100 * d - 208 * e + 128) >> 8; int b = (298 * c + 516 * d + 128) >> 8; var color = new Color(Math.Clamp(r, 0, 255) / 255.0f, Math.Clamp(g, 0, 255) / 255.0f, Math.Clamp(b, 0, 255) / 255.0f); if (x == 0 && y == 0) { GD.Print($"[libvpx] First pixel RGB: {color}"); } image.SetPixel(x, y, color); } } GD.Print($"[libvpx] YUV to RGB conversion complete for stream {streamId}. Setting image on texture."); _godotTextures[streamId].SetImage(image); } #endregion public ImageTexture GetDecodedTexture(int streamId) { if (!_initialized || streamId < 0 || streamId >= MAX_STREAMS) return null; return _godotTextures[streamId]; } public uint GetNativeTextureId(int streamId) => 0; public VP9DecoderStatus GetStatus() => _status; public void Release() { if (_useLibvpx) { ReleaseLibvpx(); } else { ReleaseVideoToolbox(); } _initialized = false; GD.Print("[macOS] VP9 decoder released"); } private void ReleaseVideoToolbox() { for (int i = 0; i < MAX_STREAMS; i++) { if (_decompressionSessions[i] != IntPtr.Zero) { VTDecompressionSessionInvalidate(_decompressionSessions[i]); CFRelease(_decompressionSessions[i]); _decompressionSessions[i] = IntPtr.Zero; } if (_decodedImageBuffers[i] != null) { while (_decodedImageBuffers[i].TryDequeue(out IntPtr imageBuffer)) { CFRelease(imageBuffer); } } } if (_formatDesc != IntPtr.Zero) { CFRelease(_formatDesc); _formatDesc = IntPtr.Zero; } if (_selfHandle.IsAllocated) { _selfHandle.Free(); } NativeLibrary.CloseCoreVideo(); } private void ReleaseLibvpx() { for (int i = 0; i < MAX_STREAMS; i++) { if (_libvpxContexts[i].priv != IntPtr.Zero) { vpx_codec_destroy(ref _libvpxContexts[i]); _libvpxContexts[i].priv = IntPtr.Zero; } } } public void Dispose() { Release(); } } }