Fix bug
This commit is contained in:
@@ -319,6 +319,11 @@ namespace winrt::Vav2Player::implementation
|
||||
LogMgr::GetInstance().LogInfo(L"VideoPlayerControl2",
|
||||
L"Video loaded: " + std::to_wstring(videoWidth) + L"x" + std::to_wstring(videoHeight));
|
||||
|
||||
// Prepare video texture before decoding (via FrameProcessor)
|
||||
if (m_frameProcessor) {
|
||||
m_frameProcessor->PrepareVideoTexture(videoWidth, videoHeight);
|
||||
}
|
||||
|
||||
// Update AspectFit
|
||||
UpdateVideoImageAspectFit(videoWidth, videoHeight);
|
||||
|
||||
|
||||
@@ -28,6 +28,13 @@ void FrameProcessor::SetDispatcherQueue(winrt::Microsoft::UI::Dispatching::Dispa
|
||||
m_dispatcherQueue = queue;
|
||||
}
|
||||
|
||||
void FrameProcessor::PrepareVideoTexture(uint32_t width, uint32_t height)
|
||||
{
|
||||
if (m_renderer) {
|
||||
m_renderer->PrepareVideoTexture(width, height);
|
||||
}
|
||||
}
|
||||
|
||||
bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
std::function<void(bool success)> onComplete)
|
||||
{
|
||||
|
||||
@@ -26,6 +26,9 @@ public:
|
||||
// Set dispatcher queue for UI thread callbacks
|
||||
void SetDispatcherQueue(winrt::Microsoft::UI::Dispatching::DispatcherQueue const& queue);
|
||||
|
||||
// Prepare video texture before first decode
|
||||
void PrepareVideoTexture(uint32_t width, uint32_t height);
|
||||
|
||||
// Process single frame (called from PlaybackController timing thread)
|
||||
// Returns: true if frame processing started, false if skipped (previous frame still rendering)
|
||||
// onComplete: Callback invoked on UI thread after render completes (success flag)
|
||||
|
||||
@@ -62,7 +62,10 @@ HRESULT D3D12VideoRenderer::InitializeWithSwapChain(
|
||||
}
|
||||
|
||||
void D3D12VideoRenderer::Shutdown() {
|
||||
WaitForGPU();
|
||||
// Only wait for GPU if we have a valid command queue
|
||||
if (m_commandQueue && m_fence && m_fenceEvent) {
|
||||
WaitForGPU();
|
||||
}
|
||||
|
||||
// Shutdown backends
|
||||
if (m_rgbaSurfaceBackend) {
|
||||
@@ -224,6 +227,24 @@ void D3D12VideoRenderer::SetSwapChainPanel(winrt::Microsoft::UI::Xaml::Controls:
|
||||
m_swapChainPanel = panel;
|
||||
}
|
||||
|
||||
HRESULT D3D12VideoRenderer::PrepareVideoTexture(uint32_t width, uint32_t height) {
|
||||
if (!m_initialized) {
|
||||
return E_NOT_VALID_STATE;
|
||||
}
|
||||
|
||||
// Create RGBA texture in advance
|
||||
if (m_rgbaSurfaceBackend) {
|
||||
HRESULT hr = m_rgbaSurfaceBackend->CreateVideoTexture(width, height);
|
||||
if (FAILED(hr)) {
|
||||
return hr;
|
||||
}
|
||||
m_videoWidth = width;
|
||||
m_videoHeight = height;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
ID3D12Resource* D3D12VideoRenderer::GetRGBATextureForCUDAInterop() const {
|
||||
if (m_rgbaSurfaceBackend) {
|
||||
return m_rgbaSurfaceBackend->GetVideoTexture();
|
||||
@@ -383,13 +404,24 @@ HRESULT D3D12VideoRenderer::CreateCommandObjects() {
|
||||
}
|
||||
}
|
||||
|
||||
return m_device->CreateCommandList(
|
||||
HRESULT hr = m_device->CreateCommandList(
|
||||
0,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
m_commandAllocators[0].Get(),
|
||||
nullptr,
|
||||
IID_PPV_ARGS(&m_commandList)
|
||||
);
|
||||
if (FAILED(hr)) {
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Close command list after creation (it starts in recording state)
|
||||
hr = m_commandList->Close();
|
||||
if (FAILED(hr)) {
|
||||
return hr;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT D3D12VideoRenderer::CreateSynchronizationObjects() {
|
||||
|
||||
@@ -55,6 +55,9 @@ public:
|
||||
ID3D12Device* GetD3D12Device() const { return m_device.Get(); }
|
||||
ID3D12CommandQueue* GetCommandQueue() const { return m_commandQueue.Get(); }
|
||||
|
||||
// Prepare video texture before decoding
|
||||
HRESULT PrepareVideoTexture(uint32_t width, uint32_t height);
|
||||
|
||||
// Backend-specific texture access for CUDA interop
|
||||
ID3D12Resource* GetRGBATextureForCUDAInterop() const;
|
||||
ID3D12Resource* GetNV12TextureForCUDAInterop() const { return nullptr; } // Future: NV12DirectBackend
|
||||
|
||||
@@ -904,34 +904,41 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS
|
||||
|
||||
DecodeSlot& slot = decoder->m_ringBuffer[slot_idx];
|
||||
|
||||
// Find pending submission context using most recent submission_id
|
||||
// cuvidParseVideoData is SYNCHRONOUS - the callback is for the packet we just submitted
|
||||
// Therefore, m_submissionCounter - 1 is the submission_id for THIS packet
|
||||
// Find pending submission context for this decode slot
|
||||
// CRITICAL: Search for the HIGHEST submission_id (most recent packet being processed)
|
||||
// This handles the case where cuvidParseVideoData callbacks may execute asynchronously
|
||||
uint64_t submission_id = 0;
|
||||
size_t pending_idx = 0;
|
||||
bool found = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(decoder->m_submissionMutex);
|
||||
|
||||
// Get the most recent submission (the one that triggered this callback)
|
||||
uint64_t current_submission_id = decoder->m_submissionCounter.load() - 1;
|
||||
pending_idx = current_submission_id % RING_BUFFER_SIZE;
|
||||
// Search backwards from current submission counter to find most recent active pending
|
||||
uint64_t current_counter = decoder->m_submissionCounter.load();
|
||||
uint64_t max_submission_id = 0;
|
||||
bool found_any = false;
|
||||
|
||||
auto& pending = decoder->m_pendingSubmissions[pending_idx];
|
||||
for (size_t i = 0; i < RING_BUFFER_SIZE; i++) {
|
||||
auto& pending = decoder->m_pendingSubmissions[i];
|
||||
|
||||
// Verify this pending submission is in use and matches the slot
|
||||
if (pending.in_use.load()) {
|
||||
// Copy pending submission context to decode slot
|
||||
slot.target_surface = pending.target_surface;
|
||||
slot.surface_type = pending.surface_type;
|
||||
slot.submission_id = pending.submission_id;
|
||||
submission_id = pending.submission_id;
|
||||
if (pending.in_use.load()) {
|
||||
// Found an active pending submission
|
||||
if (!found_any || pending.submission_id > max_submission_id) {
|
||||
// This is the newest one so far
|
||||
slot.target_surface = pending.target_surface;
|
||||
slot.surface_type = pending.surface_type;
|
||||
slot.submission_id = pending.submission_id;
|
||||
submission_id = pending.submission_id;
|
||||
pending_idx = i;
|
||||
max_submission_id = pending.submission_id;
|
||||
found_any = true;
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Release pending slot for reuse
|
||||
pending.in_use.store(false);
|
||||
found = true;
|
||||
} else {
|
||||
LOGF_ERROR("[HandlePictureDecode] Pending submission slot %zu not in use!", pending_idx);
|
||||
if (!found) {
|
||||
LOGF_ERROR("[HandlePictureDecode] No active pending submission found for slot %d", slot_idx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1181,12 +1188,11 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
LOGF_DEBUG("[DecodeToSurface] Allocated submission_id=%llu, pending_idx=%zu",
|
||||
my_submission_id, pending_idx);
|
||||
|
||||
// 2. Wait if pending slot is still in use (overflow protection)
|
||||
while (m_pendingSubmissions[pending_idx].in_use.load()) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
// 3. Store submission context in ring buffer slot
|
||||
// 2. Store submission context in ring buffer slot (overwrite old data)
|
||||
// No need to wait - ring buffer naturally cycles after 16 submissions
|
||||
// Old pending submissions will be overwritten, which is safe because:
|
||||
// - Decode slots already have their copy of pending data
|
||||
// - 16 slots is enough buffer for B-frame reordering
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_submissionMutex);
|
||||
auto& pending = m_pendingSubmissions[pending_idx];
|
||||
@@ -1194,7 +1200,7 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
pending.target_surface = target_surface;
|
||||
pending.surface_type = target_type;
|
||||
pending.submission_id = my_submission_id;
|
||||
pending.in_use.store(true);
|
||||
pending.in_use.store(true); // Mark as active for HandlePictureDecode search
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Prepared submission_id=%llu, pending_idx=%zu",
|
||||
@@ -1212,7 +1218,7 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
my_submission_id);
|
||||
|
||||
CUresult result = cuvidParseVideoData(m_parser, &packet);
|
||||
// cuvidParseVideoData is SYNCHRONOUS - HandlePictureDecode called before return
|
||||
// cuvidParseVideoData is SYNCHRONOUS - all callbacks execute before return
|
||||
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LOGF_ERROR("[DecodeToSurface] cuvidParseVideoData failed with code %d", result);
|
||||
@@ -1227,7 +1233,14 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Packet submitted, callback completed");
|
||||
LOGF_DEBUG("[DecodeToSurface] Packet submitted, synchronous callbacks completed");
|
||||
|
||||
// IMPORTANT: Do NOT release pending submission here!
|
||||
// Even though cuvidParseVideoData is documented as synchronous, NVDEC's B-frame
|
||||
// reordering means callbacks from THIS packet may execute during FUTURE packets.
|
||||
// Pending submissions will naturally be overwritten when ring buffer wraps (16 slots).
|
||||
LOGF_DEBUG("[DecodeToSurface] Keeping pending_idx=%zu active (will be reused after %d submissions)",
|
||||
pending_idx, RING_BUFFER_SIZE);
|
||||
|
||||
// ===== Component 4: Wait and Retrieve =====
|
||||
// 5. Find which slot NVDEC used (check all slots for our submission_id)
|
||||
@@ -1240,8 +1253,13 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
}
|
||||
|
||||
if (my_slot_idx == -1) {
|
||||
LOGF_ERROR("[DecodeToSurface] Failed to find slot for submission_id=%llu", my_submission_id);
|
||||
return false;
|
||||
// Display-only packet: HandlePictureDisplay was called without HandlePictureDecode
|
||||
// This happens when a packet only triggers display of a previously decoded frame
|
||||
// No new frame was decoded, so we return false to indicate no frame is available
|
||||
LOGF_DEBUG("[DecodeToSurface] Display-only packet (no decode) for submission_id=%llu - returning false", my_submission_id);
|
||||
|
||||
m_returnCounter.fetch_add(1); // Advance counter to unblock FIFO queue
|
||||
return false; // No frame decoded - caller should use previous frame
|
||||
}
|
||||
|
||||
DecodeSlot& my_slot = m_ringBuffer[my_slot_idx];
|
||||
@@ -1254,14 +1272,27 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] My turn! submission_id=%llu", my_submission_id);
|
||||
|
||||
// 7. Wait for decode to complete
|
||||
// 7. Wait for decode to complete with adaptive timeout based on resolution
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(my_slot.slot_mutex);
|
||||
|
||||
if (!my_slot.frame_ready.wait_for(lock, std::chrono::milliseconds(500),
|
||||
// Adaptive timeout: base 500ms for 720p, scale by pixel count
|
||||
// 720p (1280x720 = 921,600 pixels) -> 500ms
|
||||
// 1080p (1920x1080 = 2,073,600 pixels) -> 1,125ms
|
||||
// 4K (3840x2160 = 8,294,400 pixels) -> 4,500ms
|
||||
const uint64_t base_pixels = 1280 * 720; // 720p reference
|
||||
const uint64_t base_timeout_ms = 500;
|
||||
const uint64_t current_pixels = static_cast<uint64_t>(m_width) * m_height;
|
||||
const uint64_t timeout_ms = std::max<uint64_t>(base_timeout_ms,
|
||||
(current_pixels * base_timeout_ms) / base_pixels);
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Adaptive timeout: %llums for %dx%d (%llu pixels)",
|
||||
timeout_ms, m_width, m_height, current_pixels);
|
||||
|
||||
if (!my_slot.frame_ready.wait_for(lock, std::chrono::milliseconds(timeout_ms),
|
||||
[&my_slot]() { return my_slot.is_ready.load(); })) {
|
||||
// Timeout - decode took too long
|
||||
LOGF_ERROR("[DecodeToSurface] Decode timeout for slot %d", my_slot_idx);
|
||||
LOGF_ERROR("[DecodeToSurface] Decode timeout for slot %d after %llums", my_slot_idx, timeout_ms);
|
||||
my_slot.in_use.store(false);
|
||||
m_returnCounter.fetch_add(1); // Skip to avoid deadlock
|
||||
return false;
|
||||
@@ -1443,6 +1474,8 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Released slot %d", my_slot_idx);
|
||||
|
||||
// Note: pending submission already released immediately after cuvidParseVideoData (line 1237)
|
||||
|
||||
// 10. Advance return counter (FIFO order)
|
||||
m_returnCounter.fetch_add(1);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user