Refactoring NVDEC decoder & VideoPlayerControl2

This commit is contained in:
2025-10-02 00:40:17 +09:00
parent 40c36c4c9c
commit 04f92fc848
45 changed files with 4868 additions and 439 deletions

View File

@@ -14,6 +14,11 @@
#include <cstring>
#include <cassert>
// D3D11 for GPU surface decoding
#include <d3d11.h>
#include <wrl/client.h>
using Microsoft::WRL::ComPtr;
// Include log manager for logging
#include "src/Logger/LogManager.h"
@@ -61,6 +66,9 @@ namespace winrt::Vav2Player::implementation
m_vavCorePlayer = nullptr;
}
// Release D3D11 device
ReleaseD3D11Device();
// GPU renderer cleanup re-enabled
if (m_gpuRenderer) {
m_gpuRenderer->Shutdown();
@@ -352,6 +360,36 @@ namespace winrt::Vav2Player::implementation
}
LogMgr::GetInstance().LogDecoderInfo(decoderName, L"Decoder type selected");
// Initialize GPU renderer and set D3D12 device BEFORE opening file
// This ensures the decoder is created with D3D12 interop from the start
if (m_useHardwareRendering) {
// Create GPU renderer early
if (!m_gpuRenderer) {
m_gpuRenderer = std::make_unique<SimpleGPURenderer>();
}
// Get container dimensions
auto container = VideoDisplayArea();
uint32_t width = static_cast<uint32_t>(container.ActualWidth());
uint32_t height = static_cast<uint32_t>(container.ActualHeight());
// If container has valid dimensions, initialize GPU renderer now
if (width > 0 && height > 0) {
HRESULT hr = m_gpuRenderer->InitializeWithSwapChain(VideoSwapChainPanel(), width, height);
if (SUCCEEDED(hr)) {
// Pass D3D12 device to VavCore BEFORE decoder initialization
auto* gpuRenderer = dynamic_cast<SimpleGPURenderer*>(m_gpuRenderer.get());
if (gpuRenderer) {
ID3D12Device* d3d12Device = gpuRenderer->GetD3D12Device();
if (d3d12Device) {
vavcore_set_d3d_device(m_vavCorePlayer, d3d12Device, VAVCORE_SURFACE_D3D12_RESOURCE);
LogMgr::GetInstance().LogInfo(L"D3D12 device set before decoder initialization", L"VideoPlayerControl");
}
}
}
}
}
// Open video file using VavCore API
VavCoreResult result = vavcore_open_file(m_vavCorePlayer, filePathStr.c_str());
if (result != VAVCORE_SUCCESS) {
@@ -361,6 +399,13 @@ namespace winrt::Vav2Player::implementation
return;
}
// Log actual codec name after decoder is initialized
const char* codecName = vavcore_get_codec_name(m_vavCorePlayer);
if (codecName) {
std::wstring codecNameW = std::wstring(codecName, codecName + strlen(codecName));
LogMgr::GetInstance().LogDecoderInfo(codecNameW, L"Actual decoder initialized");
}
// Get video metadata from VavCore
VavCoreVideoMetadata metadata;
result = vavcore_get_metadata(m_vavCorePlayer, &metadata);
@@ -391,6 +436,21 @@ namespace winrt::Vav2Player::implementation
LogMgr::GetInstance().LogInfo(videoInfo, L"VideoPlayerControl");
InitializeVideoRenderer();
// Create NV12 texture for zero-copy decode AFTER we know video dimensions
if (m_gpuRenderer && m_useHardwareRendering) {
auto* gpuRenderer = dynamic_cast<SimpleGPURenderer*>(m_gpuRenderer.get());
if (gpuRenderer) {
// Create NV12 texture for CUDA-D3D12 interop
HRESULT hr = gpuRenderer->CreateNV12TextureR8Layout(m_videoWidth, m_videoHeight);
if (SUCCEEDED(hr)) {
LogMgr::GetInstance().LogInfo(L"NV12 texture created for NVDEC zero-copy decode", L"VideoPlayerControl");
} else {
LogMgr::GetInstance().LogError(L"Failed to create NV12 texture", L"VideoPlayerControl");
}
}
}
m_hasValidVideoSize = true;
m_isLoaded = true;
@@ -448,12 +508,69 @@ namespace winrt::Vav2Player::implementation
break;
}
// Process frame on UI thread
strongThis->DispatcherQueue().TryEnqueue([strongThis]() {
if (strongThis->m_isPlaying && strongThis->m_isLoaded) {
strongThis->ProcessSingleFrame();
// CRITICAL: Decode on background thread, but Present on UI thread
// This prevents UI blocking while maintaining D3D12 thread safety
bool expected = false;
if (strongThis->m_frameProcessing.compare_exchange_strong(expected, true)) {
// Decode on current background thread (heavy CUDA/NVDEC work)
if (strongThis->m_isPlaying && strongThis->m_isLoaded && strongThis->m_gpuRenderer) {
auto* gpuRenderer = dynamic_cast<SimpleGPURenderer*>(strongThis->m_gpuRenderer.get());
if (gpuRenderer) {
ID3D12Resource* nv12Texture = gpuRenderer->GetNV12TextureForCUDAInterop();
if (nv12Texture) {
VavCoreVideoFrame vavFrame;
VavCoreResult result = vavcore_decode_to_surface(
strongThis->m_vavCorePlayer,
VAVCORE_SURFACE_D3D12_RESOURCE,
nv12Texture,
&vavFrame
);
if (result == VAVCORE_SUCCESS) {
OutputDebugStringA("[VideoPlayerControl] Decode SUCCESS, enqueuing render...\n");
// Render + Present on UI thread (lightweight, thread-safe)
// CRITICAL: Keep m_frameProcessing = true until render completes
// to prevent NVDEC surface queue overflow
auto enqueued = strongThis->DispatcherQueue().TryEnqueue([strongThis, gpuRenderer]() {
OutputDebugStringA("[VideoPlayerControl] Render callback executing...\n");
if (strongThis->m_isPlaying) {
HRESULT hr = gpuRenderer->RenderNV12TextureToBackBuffer();
if (SUCCEEDED(hr)) {
OutputDebugStringA("[VideoPlayerControl] Render SUCCESS\n");
} else {
char buf[256];
sprintf_s(buf, "[VideoPlayerControl] Render FAILED: 0x%08X\n", hr);
OutputDebugStringA(buf);
}
}
// Mark frame processing complete AFTER render
strongThis->m_frameProcessing.store(false);
});
if (!enqueued) {
OutputDebugStringA("[VideoPlayerControl] WARNING: Failed to enqueue render!\n");
// If enqueue failed, release flag immediately
strongThis->m_frameProcessing.store(false);
}
} else if (result == VAVCORE_END_OF_STREAM) {
strongThis->m_isPlaying = false;
strongThis->m_frameProcessing.store(false);
OutputDebugStringA("[VideoPlayerControl] End of stream\n");
} else {
char buf[256];
sprintf_s(buf, "[VideoPlayerControl] Decode failed: %d\n", result);
OutputDebugStringA(buf);
strongThis->m_frameProcessing.store(false);
}
}
}
}
});
} else {
// Previous frame still processing, skip this frame
}
// High-precision sleep until next frame
auto nextFrame = start + std::chrono::microseconds(
@@ -530,19 +647,73 @@ namespace winrt::Vav2Player::implementation
return;
}
// Choose decode path based on D3D surface support
if (m_useD3DSurfaces) {
ProcessSingleFrameWithSurfaces();
return;
}
// Phase 2 Optimization: Start frame timing
m_performanceMonitor->RecordFrameStart();
// Phase 2 Optimization: Start decode timing
m_performanceMonitor->RecordDecodeStart();
// Decode next frame using VavCore
// GPU zero-copy path: Decode directly to D3D12 NV12 texture (R8 layout for CUDA interop)
if (m_gpuRenderer && m_useHardwareRendering) {
auto* gpuRenderer = dynamic_cast<SimpleGPURenderer*>(m_gpuRenderer.get());
if (gpuRenderer) {
// Get NV12 texture for CUDA interop
ID3D12Resource* nv12Texture = gpuRenderer->GetNV12TextureForCUDAInterop();
if (nv12Texture) {
VavCoreVideoFrame vavFrame;
VavCoreResult result = vavcore_decode_to_surface(
m_vavCorePlayer,
VAVCORE_SURFACE_D3D12_RESOURCE,
nv12Texture,
&vavFrame
);
m_performanceMonitor->RecordDecodeEnd();
if (result == VAVCORE_END_OF_STREAM) {
m_isPlaying = false;
if (m_playbackTimer) m_playbackTimer.Stop();
UpdateStatus(L"Playback completed");
LogMgr::GetInstance().LogInfo(L"Playback completed - End of stream reached", L"VideoPlayerControl");
return;
}
if (result == VAVCORE_SUCCESS) {
// NV12 texture updated by NVDEC, render to back buffer
m_performanceMonitor->RecordRenderStart();
// CRITICAL: Add small sleep to ensure GPU-GPU synchronization
// cudaDeviceSynchronize() ensures CUDA completion on CPU side,
// but D3D12 GPU queue may still need time to see the writes
// This is a temporary workaround until proper D3D12-CUDA sync is implemented
std::this_thread::sleep_for(std::chrono::milliseconds(1));
// Render NV12 texture to back buffer (YUV to RGB conversion)
// NOTE: RenderNV12TextureToBackBuffer() internally executes command list,
// signals fence, and advances frame index - no separate Present() needed
HRESULT renderHr = gpuRenderer->RenderNV12TextureToBackBuffer();
if (FAILED(renderHr)) {
LogMgr::GetInstance().LogError(L"Failed to render NV12 texture to back buffer", L"VideoPlayerControl");
}
m_performanceMonitor->RecordRenderEnd();
m_currentFrame++;
m_currentTime = m_currentFrame / m_frameRate;
m_performanceMonitor->RecordFrameEnd();
// Note: No need to call vavcore_free_frame for DecodeToSurface
// The frame data is written directly to the D3D12 surface
return;
} else {
// GPU decode failed, fall through to CPU path
m_framesDecodeErrors++;
}
}
}
}
// CPU fallback path: Use traditional CPU decode
VavCoreVideoFrame vavFrame;
VavCoreResult result = vavcore_decode_next_frame(m_vavCorePlayer, &vavFrame);
@@ -803,7 +974,23 @@ namespace winrt::Vav2Player::implementation
// Initialize GPU renderer
HRESULT hr = m_gpuRenderer->InitializeWithSwapChain(VideoSwapChainPanel(), width, height);
return SUCCEEDED(hr);
if (FAILED(hr)) {
return false;
}
// Pass D3D12 device to VavCore for zero-copy GPU pipeline
if (m_vavCorePlayer) {
auto* gpuRenderer = dynamic_cast<SimpleGPURenderer*>(m_gpuRenderer.get());
if (gpuRenderer) {
ID3D12Device* d3d12Device = gpuRenderer->GetD3D12Device();
if (d3d12Device) {
vavcore_set_d3d_device(m_vavCorePlayer, d3d12Device, VAVCORE_SURFACE_D3D12_RESOURCE);
OutputDebugStringW(L"[VideoPlayerControl] D3D12 device passed to VavCore\n");
}
}
}
return true;
}
void VideoPlayerControl::SetRenderingMode(bool useGPU)
@@ -1000,12 +1187,13 @@ namespace winrt::Vav2Player::implementation
bool VideoPlayerControl::InitializeD3DSurfaceSupport()
{
try {
// Check if decoder supports any D3D surface types
// Check if decoder supports GPU surface types for zero-copy pipeline
// Priority: CUDA (NVIDIA) > D3D12 > AMF (AMD) > D3D11 (fallback)
VavCoreSurfaceType supportedTypes[] = {
VAVCORE_SURFACE_D3D11_TEXTURE,
VAVCORE_SURFACE_D3D12_RESOURCE,
VAVCORE_SURFACE_CUDA_DEVICE,
VAVCORE_SURFACE_AMF_SURFACE
VAVCORE_SURFACE_CUDA_DEVICE, // CUDA device memory (NVIDIA NVDEC)
VAVCORE_SURFACE_D3D12_RESOURCE, // D3D12 resource
VAVCORE_SURFACE_AMF_SURFACE, // AMD AMF surface
VAVCORE_SURFACE_D3D11_TEXTURE // D3D11 texture (fallback)
};
for (auto surfaceType : supportedTypes) {
@@ -1020,21 +1208,47 @@ namespace winrt::Vav2Player::implementation
return false;
}
// For now, prioritize D3D11 texture support for SwapChainPanel compatibility
if (m_supportedSurfaceType == VAVCORE_SURFACE_D3D11_TEXTURE) {
// TODO: Get D3D11 device from SwapChainPanel or create one
// m_d3dDevice = GetD3D11DeviceFromSwapChainPanel();
// Try to initialize D3D device for GPU surface decoding
std::wstring surfaceTypeName;
switch (m_supportedSurfaceType) {
case VAVCORE_SURFACE_D3D11_TEXTURE: surfaceTypeName = L"D3D11"; break;
case VAVCORE_SURFACE_D3D12_RESOURCE: surfaceTypeName = L"D3D12"; break;
case VAVCORE_SURFACE_CUDA_DEVICE: surfaceTypeName = L"CUDA"; break;
case VAVCORE_SURFACE_AMF_SURFACE: surfaceTypeName = L"AMF"; break;
default: surfaceTypeName = L"Unknown"; break;
}
// For now, set to nullptr - will be initialized when needed
VavCoreResult result = vavcore_set_d3d_device(m_vavCorePlayer, m_d3dDevice, m_supportedSurfaceType);
if (result == VAVCORE_SUCCESS) {
m_useD3DSurfaces = true;
LogMgr::GetInstance().LogInfo(L"D3D11 surface decoding enabled", L"VideoPlayerControl");
return true;
LogMgr::GetInstance().LogInfo(
L"Initializing D3D surface support (" + surfaceTypeName + L")...",
L"VideoPlayerControl"
);
// Create D3D11 device for NVDEC/VPL/AMF hardware decoding
if (m_supportedSurfaceType == VAVCORE_SURFACE_D3D11_TEXTURE) {
if (CreateD3D11Device()) {
VavCoreResult result = vavcore_set_d3d_device(m_vavCorePlayer, m_d3dDevice, m_supportedSurfaceType);
if (result == VAVCORE_SUCCESS) {
m_useD3DSurfaces = true;
LogMgr::GetInstance().LogInfo(L"D3D11 surface decoding enabled successfully", L"VideoPlayerControl");
return true;
} else {
LogMgr::GetInstance().LogWarning(L"Failed to set D3D11 device to VavCore", L"VideoPlayerControl");
}
} else {
LogMgr::GetInstance().LogWarning(L"Failed to create D3D11 device", L"VideoPlayerControl");
}
}
LogMgr::GetInstance().LogWarning(L"Failed to initialize D3D surface support", L"VideoPlayerControl");
// Fallback to CPU decode path
LogMgr::GetInstance().LogInfo(
L"D3D surface support not initialized - using CPU decode path",
L"VideoPlayerControl"
);
LogMgr::GetInstance().LogInfo(
L"Note: CPU decode path still provides full hardware acceleration (NVDEC/VPL/AMF), only final output uses CPU memory",
L"VideoPlayerControl"
);
return false;
}
catch (...) {
@@ -1102,19 +1316,134 @@ namespace winrt::Vav2Player::implementation
}
}
bool VideoPlayerControl::CreateD3D11Device()
{
try {
// Create D3D11 device with hardware acceleration
ComPtr<ID3D11Device> d3d11Device;
ComPtr<ID3D11DeviceContext> d3d11Context;
D3D_FEATURE_LEVEL featureLevel;
D3D_FEATURE_LEVEL featureLevels[] = {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0
};
UINT createDeviceFlags = 0;
#ifdef _DEBUG
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
HRESULT hr = D3D11CreateDevice(
nullptr, // Default adapter
D3D_DRIVER_TYPE_HARDWARE, // Hardware acceleration
nullptr, // No software rasterizer
createDeviceFlags,
featureLevels,
ARRAYSIZE(featureLevels),
D3D11_SDK_VERSION,
&d3d11Device,
&featureLevel,
&d3d11Context
);
if (FAILED(hr)) {
LogMgr::GetInstance().LogError(
L"D3D11CreateDevice failed with HRESULT: 0x" + std::to_wstring(hr),
L"VideoPlayerControl"
);
return false;
}
// Store raw pointer for VavCore
m_d3dDevice = d3d11Device.Get();
// Keep ComPtr alive by AddRef
d3d11Device->AddRef();
LogMgr::GetInstance().LogInfo(
L"D3D11 device created successfully with feature level: " + std::to_wstring(featureLevel),
L"VideoPlayerControl"
);
return true;
}
catch (...) {
LogMgr::GetInstance().LogError(L"Exception during D3D11 device creation", L"VideoPlayerControl");
return false;
}
}
void VideoPlayerControl::ReleaseD3D11Device()
{
if (m_d3dDevice) {
// Release the D3D11 device
auto* d3d11Device = static_cast<ID3D11Device*>(m_d3dDevice);
d3d11Device->Release();
m_d3dDevice = nullptr;
LogMgr::GetInstance().LogInfo(L"D3D11 device released", L"VideoPlayerControl");
}
}
bool VideoPlayerControl::CreateD3DTexture(uint32_t width, uint32_t height, void** texture)
{
// TODO: Implement D3D11 texture creation
// For now, return nullptr to indicate fallback to CPU decoding
*texture = nullptr;
return false;
if (!m_d3dDevice || !texture) {
return false;
}
try {
auto* d3d11Device = static_cast<ID3D11Device*>(m_d3dDevice);
// Create D3D11 texture for NVDEC output (NV12 format for YUV420)
// NV12 requires height * 1.5 to accommodate Y plane (height) + UV plane (height/2)
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = width;
texDesc.Height = height + (height / 2); // Y plane + UV plane space
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = DXGI_FORMAT_NV12; // NV12 is standard for video decoding
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.Usage = D3D11_USAGE_DEFAULT;
texDesc.BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
ComPtr<ID3D11Texture2D> d3d11Texture;
HRESULT hr = d3d11Device->CreateTexture2D(&texDesc, nullptr, &d3d11Texture);
if (FAILED(hr)) {
LogMgr::GetInstance().LogError(
L"Failed to create D3D11 texture: HRESULT 0x" + std::to_wstring(hr),
L"VideoPlayerControl"
);
return false;
}
// Return raw pointer and keep ComPtr alive
*texture = d3d11Texture.Get();
d3d11Texture->AddRef();
return true;
}
catch (...) {
LogMgr::GetInstance().LogError(L"Exception during D3D11 texture creation", L"VideoPlayerControl");
return false;
}
}
void VideoPlayerControl::RenderD3DSurfaceToScreen(void* d3dTexture, const VavCoreVideoFrame& frame)
{
// TODO: Implement direct D3D surface rendering to SwapChainPanel
// For now, fall back to software rendering
RenderFrameSoftware(frame);
// TODO: Implement zero-copy CUDA → D3D12 pipeline
// 1. NVDEC decodes to CUDA device memory
// 2. cuGraphicsD3D12RegisterResource registers D3D12 texture with CUDA
// 3. Direct CUDA to D3D12 copy (no CPU involvement)
// 4. SimpleGPURenderer renders NV12 texture
LogMgr::GetInstance().LogError(L"Zero-copy D3D12 pipeline not yet implemented", L"VideoPlayerControl");
}
// ===============================