diff --git a/vav2/docs/working/NVDEC_State_Machine_Refactoring.md b/vav2/docs/working/NVDEC_State_Machine_Refactoring.md new file mode 100644 index 0000000..538b974 --- /dev/null +++ b/vav2/docs/working/NVDEC_State_Machine_Refactoring.md @@ -0,0 +1,416 @@ +# NVDEC Decoder State Machine Refactoring Design + +## Problem Statement + +The current `NVDECAV1Decoder::DecodeToSurface()` has excessive complexity: +- **13+ state variables** tracked across multiple atomic flags and mutexes +- **9+ conditional branches** with nested conditions +- **~150 lines** in a single function +- **High cyclomatic complexity** (2^9 = 512 possible code paths) + +This makes the code: +- Hard to maintain and debug +- Difficult to test comprehensively +- Prone to race conditions and edge cases +- Challenging to extend with new features + +## Solution: State Machine Pattern + +### Core Design Principle + +**Consolidate all decoder state into a single enum** with clear transitions, replacing scattered atomic flags and conditional checks. + +### State Machine States + +```cpp +enum class DecoderState { + UNINITIALIZED, // Before Initialize() is called + READY, // Initialized and ready for decoding + BUFFERING, // Initial buffering (0-15 frames) + DECODING, // Normal frame-by-frame decoding + FLUSHING, // End-of-file reached, draining DPB + FLUSH_COMPLETE, // All frames drained + ERROR // Unrecoverable error state +}; +``` + +### State Transitions + +``` +UNINITIALIZED → READY (Initialize() called successfully) +READY → BUFFERING (First DecodeToSurface() call) +BUFFERING → DECODING (Display queue has frames) +DECODING → FLUSHING (End-of-file reached, NULL packet) +FLUSHING → FLUSH_COMPLETE (Display queue empty) +FLUSH_COMPLETE → READY (Reset() called) +* → ERROR (Any state can transition to ERROR on failure) +ERROR → READY (Reset() called) +``` + +### State Machine Class + +```cpp +class DecoderStateMachine { +public: + DecoderStateMachine() : m_state(DecoderState::UNINITIALIZED) {} + + // State queries + DecoderState GetState() const { return m_state.load(); } + bool IsState(DecoderState state) const { return m_state.load() == state; } + bool CanDecode() const { + auto state = m_state.load(); + return state == DecoderState::READY || + state == DecoderState::BUFFERING || + state == DecoderState::DECODING || + state == DecoderState::FLUSHING; + } + + // State transitions + bool TransitionTo(DecoderState new_state) { + DecoderState expected = m_state.load(); + if (IsValidTransition(expected, new_state)) { + m_state.store(new_state); + LOGF_DEBUG("[DecoderStateMachine] State transition: %s → %s", + StateToString(expected), StateToString(new_state)); + return true; + } + LOGF_ERROR("[DecoderStateMachine] Invalid transition: %s → %s", + StateToString(expected), StateToString(new_state)); + return false; + } + + // Specific transition helpers + void OnInitializeSuccess() { + TransitionTo(DecoderState::READY); + } + + void OnFirstPacket() { + if (IsState(DecoderState::READY)) { + TransitionTo(DecoderState::BUFFERING); + } + } + + void OnBufferingComplete(size_t queue_size) { + if (IsState(DecoderState::BUFFERING) && queue_size > 0) { + TransitionTo(DecoderState::DECODING); + } + } + + void OnEndOfFile() { + if (IsState(DecoderState::DECODING) || IsState(DecoderState::BUFFERING)) { + TransitionTo(DecoderState::FLUSHING); + } + } + + void OnFlushComplete() { + if (IsState(DecoderState::FLUSHING)) { + TransitionTo(DecoderState::FLUSH_COMPLETE); + } + } + + void OnError() { + TransitionTo(DecoderState::ERROR); + } + + void OnReset() { + TransitionTo(DecoderState::READY); + } + +private: + std::atomic m_state; + + bool IsValidTransition(DecoderState from, DecoderState to) const { + // Define valid state transitions + switch (from) { + case DecoderState::UNINITIALIZED: + return to == DecoderState::READY || to == DecoderState::ERROR; + case DecoderState::READY: + return to == DecoderState::BUFFERING || to == DecoderState::ERROR; + case DecoderState::BUFFERING: + return to == DecoderState::DECODING || to == DecoderState::FLUSHING || + to == DecoderState::ERROR || to == DecoderState::READY; + case DecoderState::DECODING: + return to == DecoderState::FLUSHING || to == DecoderState::ERROR || + to == DecoderState::READY; + case DecoderState::FLUSHING: + return to == DecoderState::FLUSH_COMPLETE || to == DecoderState::ERROR || + to == DecoderState::READY; + case DecoderState::FLUSH_COMPLETE: + return to == DecoderState::READY || to == DecoderState::ERROR; + case DecoderState::ERROR: + return to == DecoderState::READY; + default: + return false; + } + } + + const char* StateToString(DecoderState state) const { + switch (state) { + case DecoderState::UNINITIALIZED: return "UNINITIALIZED"; + case DecoderState::READY: return "READY"; + case DecoderState::BUFFERING: return "BUFFERING"; + case DecoderState::DECODING: return "DECODING"; + case DecoderState::FLUSHING: return "FLUSHING"; + case DecoderState::FLUSH_COMPLETE: return "FLUSH_COMPLETE"; + case DecoderState::ERROR: return "ERROR"; + default: return "UNKNOWN"; + } + } +}; +``` + +## Refactored DecodeToSurface() + +### Before (Complex Branching): + +```cpp +bool DecodeToSurface(...) { + // Step 1: Check if initialized + if (!m_initialized) { ... } + + // Handle NULL packet_data as flush mode + if (!packet_data || packet_size == 0) { + m_endOfFileReached = true; + } + + // Step 2: Submit packet + if (m_endOfFileReached) { + // Flush mode logic + } else { + // Normal mode logic + } + + // Step 3: Check initial buffering + if (m_displayQueue.empty() && !m_initialBufferingComplete) { + // Buffering logic + } + if (!m_displayQueue.empty() && !m_initialBufferingComplete) { + m_initialBufferingComplete = true; + } + + // Step 4: Pop from display queue + if (m_displayQueue.empty()) { + if (m_endOfFileReached) { + // Flush complete logic + } else { + // Error - queue empty unexpectedly + } + } + + // ... (continues for 150 more lines) +} +``` + +### After (State Machine): + +```cpp +bool DecodeToSurface(const uint8_t* packet_data, size_t packet_size, + VavCoreSurfaceType target_type, + void* target_surface, + VideoFrame& output_frame) { + // State validation + if (!m_stateMachine.CanDecode()) { + LOGF_ERROR("[DecodeToSurface] Invalid state: %s", + m_stateMachine.GetStateString()); + return false; + } + + // Handle end-of-file + if (!packet_data || packet_size == 0) { + return HandleFlushMode(output_frame); + } + + // Delegate to state-specific handler + switch (m_stateMachine.GetState()) { + case DecoderState::READY: + case DecoderState::BUFFERING: + return HandleBufferingMode(packet_data, packet_size, target_type, + target_surface, output_frame); + case DecoderState::DECODING: + return HandleDecodingMode(packet_data, packet_size, target_type, + target_surface, output_frame); + default: + LOGF_ERROR("[DecodeToSurface] Unexpected state in DecodeToSurface"); + return false; + } +} +``` + +### Helper Methods (State-Specific Logic): + +```cpp +bool HandleBufferingMode(const uint8_t* packet_data, size_t packet_size, + VavCoreSurfaceType target_type, + void* target_surface, + VideoFrame& output_frame) { + // Transition to buffering on first packet + if (m_stateMachine.IsState(DecoderState::READY)) { + m_stateMachine.OnFirstPacket(); + } + + // Submit packet to NVDEC + if (!SubmitPacketToParser(packet_data, packet_size)) { + return false; + } + + // Check if buffering is complete + { + std::lock_guard lock(m_displayMutex); + if (m_displayQueue.empty()) { + // Still buffering + return false; // VAVCORE_PACKET_ACCEPTED + } else { + // Buffering complete + m_stateMachine.OnBufferingComplete(m_displayQueue.size()); + // Fall through to decode the first frame + } + } + + return RetrieveAndRenderFrame(target_type, target_surface, output_frame); +} + +bool HandleDecodingMode(const uint8_t* packet_data, size_t packet_size, + VavCoreSurfaceType target_type, + void* target_surface, + VideoFrame& output_frame) { + // Submit packet to NVDEC + if (!SubmitPacketToParser(packet_data, packet_size)) { + return false; + } + + // Retrieve and render frame + return RetrieveAndRenderFrame(target_type, target_surface, output_frame); +} + +bool HandleFlushMode(VideoFrame& output_frame) { + // Transition to flushing if not already + if (!m_stateMachine.IsState(DecoderState::FLUSHING)) { + m_stateMachine.OnEndOfFile(); + } + + // Submit end-of-stream packet + if (!SubmitFlushPacket()) { + return false; + } + + // Check if flush is complete + { + std::lock_guard lock(m_displayMutex); + if (m_displayQueue.empty()) { + m_stateMachine.OnFlushComplete(); + return false; // VAVCORE_END_OF_STREAM + } + } + + // Still have frames to drain + return RetrieveAndRenderFrame(...); +} +``` + +## Removed/Consolidated State Variables + +### Before: +```cpp +// 13+ state variables +std::atomic m_initialBufferingComplete{false}; +std::atomic m_endOfFileReached{false}; +std::atomic m_converterNeedsReinit{false}; +std::atomic m_submissionCounter{0}; +std::atomic m_returnCounter{0}; +std::atomic m_pollingRunning{false}; +std::mutex m_frameQueueMutex; +std::mutex m_cudaContextMutex; +std::mutex m_submissionMutex; +std::mutex m_displayMutex; +std::queue m_displayQueue; +FrameSlot m_frameSlots[16]; // Each has 5 atomic flags +``` + +### After: +```cpp +// Single state machine + minimal supporting variables +DecoderStateMachine m_stateMachine; + +// Still needed (but usage clarified by state machine): +std::mutex m_displayMutex; +std::queue m_displayQueue; +FrameSlot m_frameSlots[16]; // Frame-specific state (not global decoder state) +std::atomic m_submissionCounter{0}; // Submission ordering +std::mutex m_submissionMutex; +``` + +**Eliminated:** +- `m_initialBufferingComplete` → Replaced by `DecoderState::BUFFERING` vs `DECODING` +- `m_endOfFileReached` → Replaced by `DecoderState::FLUSHING` +- `m_converterNeedsReinit` → Moved to NV12ToRGBAConverter internal state + +## Benefits + +### 1. Complexity Reduction +- **13+ state variables → 1 state machine** with 7 well-defined states +- **9+ conditional branches → State-driven dispatch** (1 switch statement) +- **~150 lines → ~40 lines** per state handler (modular functions) + +### 2. Improved Maintainability +- **Clear state transitions** with validation (no illegal states) +- **State-specific logic** isolated in dedicated functions +- **Easy debugging** with state transition logging + +### 3. Better Testability +- **Test individual states** independently +- **Verify state transitions** explicitly +- **Mock state machine** for unit tests + +### 4. Enhanced Readability +- **Self-documenting code** (state names describe decoder status) +- **Linear flow** instead of nested conditions +- **Clear intent** from state-specific handler names + +## Implementation Plan + +### Phase 1: Create State Machine Class (CURRENT) +- [x] Design state machine enum and transitions +- [ ] Implement DecoderStateMachine class +- [ ] Add state transition logging + +### Phase 2: Extract Helper Methods +- [ ] Create `SubmitPacketToParser()` +- [ ] Create `RetrieveAndRenderFrame()` +- [ ] Create `SubmitFlushPacket()` + +### Phase 3: Refactor DecodeToSurface() +- [ ] Replace state flags with state machine +- [ ] Implement `HandleBufferingMode()` +- [ ] Implement `HandleDecodingMode()` +- [ ] Implement `HandleFlushMode()` + +### Phase 4: Update Other Methods +- [ ] Update `Initialize()` → call `m_stateMachine.OnInitializeSuccess()` +- [ ] Update `Reset()` → call `m_stateMachine.OnReset()` +- [ ] Update `Cleanup()` → call `m_stateMachine.TransitionTo(UNINITIALIZED)` + +### Phase 5: Remove Obsolete State Variables +- [ ] Remove `m_initialBufferingComplete` +- [ ] Remove `m_endOfFileReached` +- [ ] Verify no regressions with existing tests + +## Testing Strategy + +### Unit Tests +- State transition validation (legal/illegal transitions) +- State-specific handler behavior +- Error state recovery + +### Integration Tests +- Full decode pipeline with state transitions +- Edge cases (empty files, flush mode, errors) +- Multi-threaded decoding with state machine + +### Regression Tests +- Existing RedSurfaceNVDECTest +- Vav2PlayerHeadless tests +- Vav2Player GUI tests + +--- +**Status**: Design complete, implementation in progress +**Last Updated**: 2025-10-11 diff --git a/vav2/docs/working/Triple_Buffering_Refactoring_Design.md b/vav2/docs/working/Triple_Buffering_Refactoring_Design.md new file mode 100644 index 0000000..650da60 --- /dev/null +++ b/vav2/docs/working/Triple_Buffering_Refactoring_Design.md @@ -0,0 +1,1139 @@ +# Triple Buffering Refactoring Design + +**문서 작성일**: 2025-10-10 +**작성자**: Claude Code +**상태**: 설계 완료, 구현 대기 중 + +## 📋 목차 +1. [개요](#개요) +2. [현재 문제점](#현재-문제점) +3. [새로운 설계](#새로운-설계) +4. [구현 계획](#구현-계획) +5. [테스트 계획](#테스트-계획) + +--- + +## 개요 + +### 목적 +현재 staging texture 기반 복사 구조를 제거하고, 진정한 triple buffering 구조로 리팩토링하여 30fps 디코딩 + 60fps 렌더링을 안정적으로 지원 + +### 핵심 아이디어 +- **Staging texture 제거**: 불필요한 복사 및 동기화 오버헤드 제거 +- **명확한 버퍼 역할 분리**: Render, Decode, Idle 3가지 상태로 텍스처 관리 +- **33ms 간격 프레임 전환**: 디코딩 완료 시 렌더링/디코딩 인덱스 동시 전환 + +### 기대 효과 +- ✅ 코드 복잡도 감소 (staging texture 관련 ~200줄 제거) +- ✅ 명확한 소유권 (렌더링 중인 텍스처는 디코더가 건드리지 않음) +- ✅ 자연스러운 동기화 (33ms 간격으로 자동 동기화) +- ✅ NULL 텍스처 문제 근본 해결 + +--- + +## 현재 문제점 + +### 1. Staging Texture 기반 복사 구조의 문제 +``` +DecodeToSurface → texture[0/1/2] → CopyToStagingTexture → staging → Render + (33ms) (GPU copy) (읽기 전용) +``` + +**문제점**: +- **불필요한 복사**: GPU 메모리 간 복사 오버헤드 +- **복잡한 동기화**: CopyToStagingTexture + WaitForCopyCompletion +- **애매한 소유권**: 원본 텍스처가 언제 재사용 가능한지 불명확 +- **NULL 텍스처 버그**: Frame 19에서 texture[0]이 NULL이 되는 문제 + +### 2. 현재 코드의 동기화 구조 +```cpp +// FrameProcessor.cpp (lines 107-139) +if (m_framesDecoded >= 16) { + ID3D12Resource* rgbaTexture = m_renderer->GetNextRGBATextureForCUDAInterop(); + result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, rgbaTexture, &vavFrame); + + if (result == VAVCORE_SUCCESS) { + auto backend = m_renderer->GetRGBASurfaceBackend(); + if (backend) { + // 문제: 여기서 staging으로 복사하는데 원본 텍스처 재사용 타이밍이 애매함 + HRESULT hr = backend->CopyToStagingTexture(rgbaTexture); + hr = backend->WaitForCopyCompletion(); + } + } +} +``` + +### 3. Triple Buffering 순환 문제 +``` +Frame 16: texture[0] → staging → render +Frame 17: texture[1] → staging → render (33ms) +Frame 18: texture[2] → staging → render (33ms) +Frame 19: texture[0] 재사용 시도 → NULL ❌ +``` + +--- + +## 새로운 설계 + +### 1. Triple Buffering 구조 + +#### 버퍼 상태 정의 +``` +texture[0]: RENDERING (현재 화면에 출력 중) +texture[1]: IDLE (대기 중, 이미 디코딩 완료) +texture[2]: DECODING (현재 디코딩 작업 중) +``` + +#### 프레임 전환 시퀀스 +``` +초기화 단계 (Frames 0-15): + DecodeToSurface(NULL) × 16번 → VavCore 내부 CUDA DPB 채우기 + +Triple Buffer 채우기 (Frames 16-18): + Frame 16: DecodeToSurface → texture[0] + Frame 17: DecodeToSurface → texture[1] + Frame 18: DecodeToSurface → texture[2] + +정상 디코딩/렌더링 (Frame 19+): + [State: R=0, D=0] + Render: texture[0] 화면 출력 (60fps로 여러 번) + + 33ms 후 디코딩 완료... + [State: R=1, D=1] + AdvanceFrame() 호출: + - m_renderTextureIndex = 1 (texture[1]로 렌더링 전환) + - m_decodeTextureIndex = 1 (texture[1]을 다음 디코딩 타겟으로) + DecodeToSurface → texture[0] 덮어쓰기 (이제 안전) + Render: texture[1] 화면 출력 + + 33ms 후 디코딩 완료... + [State: R=2, D=2] + AdvanceFrame() 호출: + - m_renderTextureIndex = 2 + - m_decodeTextureIndex = 2 + DecodeToSurface → texture[1] 덮어쓰기 + Render: texture[2] 화면 출력 +``` + +### 2. 클래스 구조 변경 + +#### RGBASurfaceBackend.h 변경사항 +```cpp +class RGBASurfaceBackend : public IVideoBackend { +public: + // 삭제될 메서드 + // ❌ ID3D12Resource* GetNextVideoTexture(); + // ❌ ID3D12Resource* GetStagingTexture() const; + // ❌ HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture); + // ❌ HRESULT WaitForCopyCompletion(); + + // 새로운 메서드 + // ✅ GetCurrentRenderTexture() - 렌더링에서 사용 + ID3D12Resource* GetCurrentRenderTexture() const { + return m_rgbaTextures[m_renderTextureIndex].Get(); + } + + // ✅ GetNextDecodeTexture() - 디코딩에서 사용 + ID3D12Resource* GetNextDecodeTexture() const { + return m_rgbaTextures[m_decodeTextureIndex].Get(); + } + + // ✅ AdvanceFrame() - 33ms마다 호출 (디코딩 완료 시) + void AdvanceFrame() { + // 렌더링을 다음 텍스처로 전환 + m_renderTextureIndex = (m_renderTextureIndex + 1) % BUFFER_COUNT; + // 디코딩도 다음 텍스처로 전환 (이전 렌더링 텍스처를 덮어씀) + m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT; + + LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render=%d, decode=%d", + m_renderTextureIndex, m_decodeTextureIndex); + } + + // ✅ GetRenderTextureIndex() - 디버깅용 + int GetRenderTextureIndex() const { return m_renderTextureIndex; } + int GetDecodeTextureIndex() const { return m_decodeTextureIndex; } + +private: + // 삭제될 멤버 변수 + // ❌ ComPtr m_stagingTexture; + // ❌ ComPtr m_copyCommandAllocator; + // ❌ ComPtr m_copyCommandList; + // ❌ ComPtr m_copyFence; + // ❌ UINT64 m_copyFenceValue = 0; + // ❌ HANDLE m_copyFenceEvent = nullptr; + // ❌ bool m_firstCopy = true; + // ❌ int m_currentTextureIndex = 0; + + // 새로운 멤버 변수 + // ✅ 렌더링용 텍스처 인덱스 (현재 화면에 출력 중) + int m_renderTextureIndex = 0; + + // ✅ 디코딩용 텍스처 인덱스 (다음 디코딩 타겟) + int m_decodeTextureIndex = 0; + + // 기존 유지 + ComPtr m_rgbaTextures[BUFFER_COUNT]; // 3개 텍스처 유지 +}; +``` + +#### FrameProcessor.cpp 변경사항 +```cpp +bool FrameProcessor::ProcessFrame(VavCorePlayer* player, + std::function onComplete) +{ + // ... (기존 초기 검증 코드 유지) + + VavCoreVideoFrame vavFrame = {}; + VavCoreResult result; + + if (m_decoderType == VAVCORE_DECODER_DAV1D) { + // DAV1D: CPU 디코딩 (기존 로직 유지) + result = vavcore_decode_next_frame(player, &vavFrame); + if (result == VAVCORE_SUCCESS) { + vavFrame.surface_type = VAVCORE_SURFACE_CPU; + } + } else { + // NVDEC/Hardware: D3D12 surface decoding with triple buffering + + // Phase 1: Initial 16-frame buffering (NULL surface) + if (m_framesDecoded < 16) { + LOGF_DEBUG("[FrameProcessor] Initial buffering phase: frame %llu/16", m_framesDecoded.load()); + result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, nullptr, &vavFrame); + } + // Phase 2: Fill triple buffer (frames 16, 17, 18) + else if (m_framesDecoded < 19) { + LOGF_DEBUG("[FrameProcessor] Filling triple buffer: frame %llu/19", m_framesDecoded.load()); + + auto backend = m_renderer->GetRGBASurfaceBackend(); + ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture(); + + result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, decodeTexture, &vavFrame); + + if (result == VAVCORE_SUCCESS) { + // Triple buffer 채우기 완료 시 인덱스 전진 + backend->AdvanceFrame(); + } + } + // Phase 3: Normal decoding (frame 19+) + else { + auto backend = m_renderer->GetRGBASurfaceBackend(); + + // 다음 디코딩용 텍스처 가져오기 (현재 렌더링 중이 아닌 텍스처) + ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture(); + + LOGF_DEBUG("[FrameProcessor] Normal decoding: decode_idx=%d, render_idx=%d", + backend->GetDecodeTextureIndex(), backend->GetRenderTextureIndex()); + + result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, decodeTexture, &vavFrame); + + if (result == VAVCORE_SUCCESS) { + // 디코딩 완료 - 렌더링/디코딩 인덱스 전환 + backend->AdvanceFrame(); + LOGF_INFO("[FrameProcessor] Frame decoded, advanced to render_idx=%d", + backend->GetRenderTextureIndex()); + } + } + } + + // Phase 1 & 2: Buffering 단계는 렌더링 안 함 + if (m_framesDecoded < 19) { + if (result == VAVCORE_PACKET_ACCEPTED || result == VAVCORE_SUCCESS) { + m_framesDecoded++; + m_frameProcessing.store(false); + if (onComplete) onComplete(true); + return true; + } + } + + // Phase 3: 정상 렌더링 (기존 로직 유지) + if (result != VAVCORE_SUCCESS) { + // ... (기존 에러 처리) + } + + m_framesDecoded++; + + // 렌더링 큐에 추가 (기존 로직 유지) + bool enqueued = m_dispatcherQueue.TryEnqueue([this, vavFrame, onComplete, player, processStart]() { + // ... (기존 렌더링 로직) + }); + + return true; +} +``` + +#### D3D12VideoRenderer.cpp 변경사항 +```cpp +HRESULT D3D12VideoRenderer::RenderVideoFrame(const VavCoreVideoFrame& frame, VavCorePlayer* player) +{ + // ... (기존 초기 검증 코드) + + if (frame.surface_type == VAVCORE_SURFACE_D3D12_RESOURCE) { + auto backend = m_rgbaSurfaceBackend.get(); + if (!backend) { + return E_NOT_VALID_STATE; + } + + // 현재 렌더링용 텍스처 사용 (이미 디코딩 완료된 안정적인 텍스처) + ID3D12Resource* renderTexture = backend->GetCurrentRenderTexture(); + + if (!renderTexture) { + LOGF_ERROR("[D3D12VideoRenderer] Current render texture is NULL!"); + return E_INVALIDARG; + } + + LOGF_DEBUG("[D3D12VideoRenderer] Rendering texture at index %d", + backend->GetRenderTextureIndex()); + + // SRV 업데이트 (렌더링용 텍스처로) + UpdateSRVForTexture(renderTexture); + + // 렌더링 수행 (기존 RenderToBackBuffer 로직) + hr = backend->RenderToBackBuffer(frame, backBuffer, commandList.Get(), rtvHandle); + } + + // ... (나머지 기존 로직) +} +``` + +### 3. CreateSrvHeap() 수정 +```cpp +HRESULT RGBASurfaceBackend::CreateSrvHeap() { + // SRV를 동적으로 업데이트하거나, 3개 텍스처 모두 SRV 생성 후 인덱스로 선택 + + // Option A: 단일 SRV, 매 프레임 업데이트 (간단) + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; + srvHeapDesc.NumDescriptors = 1; + srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + + HRESULT hr = m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_srvHeap)); + if (FAILED(hr)) return hr; + + // 초기에는 texture[0]의 SRV 생성 + UpdateSRVForCurrentRenderTexture(); + + return S_OK; +} + +// 새로운 헬퍼 메서드 +HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart()); + + m_device->CreateShaderResourceView( + m_rgbaTextures[m_renderTextureIndex].Get(), + &srvDesc, + srvHandle + ); + + return S_OK; +} +``` + +--- + +## 구현 계획 + +### Phase 1: RGBASurfaceBackend 리팩토링 + +#### Step 1.1: 멤버 변수 정리 +- [ ] `RGBASurfaceBackend.h`: Staging texture 관련 멤버 변수 제거 +- [ ] `RGBASurfaceBackend.h`: 새로운 인덱스 변수 추가 (`m_renderTextureIndex`, `m_decodeTextureIndex`) +- [ ] `RGBASurfaceBackend.h`: 새로운 메서드 선언 추가 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.h` + +**제거할 코드** (lines 84-96): +```cpp +// ❌ 제거 +ComPtr m_stagingTexture; +ComPtr m_copyCommandAllocator; +ComPtr m_copyCommandList; +ComPtr m_copyFence; +UINT64 m_copyFenceValue = 0; +HANDLE m_copyFenceEvent = nullptr; +int m_currentTextureIndex = 0; +bool m_firstCopy = true; +``` + +**추가할 코드**: +```cpp +// ✅ 추가 +int m_renderTextureIndex = 0; // Current texture being rendered +int m_decodeTextureIndex = 0; // Next texture for decoding +``` + +#### Step 1.2: 메서드 시그니처 변경 +- [ ] `RGBASurfaceBackend.h`: 삭제될 메서드 제거 선언 +- [ ] `RGBASurfaceBackend.h`: 새로운 메서드 추가 선언 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.h` + +**제거할 메서드 선언** (lines 45-56): +```cpp +// ❌ 제거 +ID3D12Resource* GetNextVideoTexture(); +HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture); +HRESULT WaitForCopyCompletion(); +ID3D12Resource* GetStagingTexture() const; +int GetCurrentTextureIndex() const; +``` + +**추가할 메서드 선언**: +```cpp +// ✅ 추가 +ID3D12Resource* GetCurrentRenderTexture() const; +ID3D12Resource* GetNextDecodeTexture() const; +void AdvanceFrame(); +int GetRenderTextureIndex() const; +int GetDecodeTextureIndex() const; +HRESULT UpdateSRVForCurrentRenderTexture(); +``` + +#### Step 1.3: 구현 파일 수정 +- [ ] `RGBASurfaceBackend.cpp`: `CreateVideoTexture()` 수정 (staging texture 생성 제거) +- [ ] `RGBASurfaceBackend.cpp`: `Shutdown()` 수정 (staging texture 정리 제거) +- [ ] `RGBASurfaceBackend.cpp`: 삭제될 메서드 구현 제거 +- [ ] `RGBASurfaceBackend.cpp`: 새로운 메서드 구현 추가 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp` + +**수정 범위**: +- Lines 80-223: `CreateVideoTexture()` - staging texture 생성 코드 제거 +- Lines 44-78: `Shutdown()` - staging texture 정리 코드 제거 +- Lines 608-627: `GetNextVideoTexture()` - 완전 제거 +- Lines 628-696: `CopyToStagingTexture()` - 완전 제거 +- Lines 698-744: `WaitForCopyCompletion()` - 완전 제거 + +**새로 추가할 구현**: +```cpp +ID3D12Resource* RGBASurfaceBackend::GetCurrentRenderTexture() const { + return m_rgbaTextures[m_renderTextureIndex].Get(); +} + +ID3D12Resource* RGBASurfaceBackend::GetNextDecodeTexture() const { + return m_rgbaTextures[m_decodeTextureIndex].Get(); +} + +void RGBASurfaceBackend::AdvanceFrame() { + int prevRender = m_renderTextureIndex; + int prevDecode = m_decodeTextureIndex; + + m_renderTextureIndex = (m_renderTextureIndex + 1) % BUFFER_COUNT; + m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT; + + LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render %d->%d, decode %d->%d", + prevRender, m_renderTextureIndex, prevDecode, m_decodeTextureIndex); + + // Update SRV to point to new render texture + UpdateSRVForCurrentRenderTexture(); +} + +int RGBASurfaceBackend::GetRenderTextureIndex() const { + return m_renderTextureIndex; +} + +int RGBASurfaceBackend::GetDecodeTextureIndex() const { + return m_decodeTextureIndex; +} + +HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() { + if (!m_srvHeap || !m_rgbaTextures[m_renderTextureIndex]) { + return E_NOT_VALID_STATE; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart()); + + m_device->CreateShaderResourceView( + m_rgbaTextures[m_renderTextureIndex].Get(), + &srvDesc, + srvHandle + ); + + LOGF_DEBUG("[RGBASurfaceBackend] Updated SRV for render texture[%d]", m_renderTextureIndex); + + return S_OK; +} +``` + +#### Step 1.4: CreateVideoTexture() 간소화 +- [ ] Staging texture 생성 코드 제거 (lines 143-223) +- [ ] Copy command allocator/list 생성 제거 +- [ ] Copy fence 생성 제거 + +**수정 후 CreateVideoTexture() 구조**: +```cpp +HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) { + LOGF_INFO("[RGBASurfaceBackend] CreateVideoTexture called: %ux%u", width, height); + m_videoWidth = width; + m_videoHeight = height; + + HRESULT hr = S_OK; + + // Create RGBA texture descriptor for CUDA Surface Object write + D3D12_RESOURCE_DESC rgbaTextureDesc = {}; + // ... (기존 descriptor 설정) + + D3D12_HEAP_PROPERTIES defaultHeapProps = {}; + defaultHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + + // Create triple-buffered textures + for (int i = 0; i < BUFFER_COUNT; i++) { + hr = m_device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_SHARED, + &rgbaTextureDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(&m_rgbaTextures[i]) + ); + + if (FAILED(hr)) { + LOGF_ERROR("[RGBASurfaceBackend] Failed to create RGBA texture[%d]: 0x%08X", i, hr); + for (int j = 0; j < i; j++) { + m_rgbaTextures[j].Reset(); + } + return hr; + } + + LOGF_INFO("[RGBASurfaceBackend] Created RGBA texture[%d]: %p", i, m_rgbaTextures[i].Get()); + } + + m_renderTextureIndex = 0; + m_decodeTextureIndex = 0; + + LOGF_INFO("[RGBASurfaceBackend] All %d RGBA textures created successfully", BUFFER_COUNT); + + // Create SRV for rendering + hr = CreateSrvHeap(); + if (FAILED(hr)) { + return hr; + } + + // Update constant buffer + hr = UpdateConstantBuffer(); + if (FAILED(hr)) { + return hr; + } + + return S_OK; +} +``` + +#### Step 1.5: CreateSrvHeap() 수정 +- [ ] Staging texture 대신 현재 render texture로 SRV 생성 +- [ ] 초기화 시 texture[0]의 SRV 생성 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp` + +**수정 위치**: Lines 379-409 + +**수정 후 코드**: +```cpp +HRESULT RGBASurfaceBackend::CreateSrvHeap() { + // Create descriptor heap with 1 descriptor for current render texture + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; + srvHeapDesc.NumDescriptors = 1; + srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + + HRESULT hr = m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_srvHeap)); + if (FAILED(hr)) { + return hr; + } + + // Create initial SRV for texture[0] (m_renderTextureIndex = 0) + hr = UpdateSRVForCurrentRenderTexture(); + if (FAILED(hr)) { + return hr; + } + + LOGF_INFO("[RGBASurfaceBackend] Created SRV heap for render texture"); + + return S_OK; +} +``` + +#### Step 1.6: RenderToBackBuffer() 수정 +- [ ] Staging texture 참조 제거 +- [ ] 현재 render texture 사용하도록 변경 +- [ ] Staging texture 관련 주석 제거 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp` + +**수정 위치**: Lines 225-301 (RenderToBackBuffer 함수 전체) + +**주요 변경사항**: +```cpp +// Line 233-238: 기존 staging texture 사용 제거 +// ❌ 제거 +ID3D12Resource* renderTexture = m_stagingTexture.Get(); +if (!renderTexture) { + LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: staging texture is NULL!"); + return E_INVALIDARG; +} + +// ✅ 추가 +ID3D12Resource* renderTexture = m_rgbaTextures[m_renderTextureIndex].Get(); +if (!renderTexture) { + LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: render texture[%d] is NULL!", m_renderTextureIndex); + return E_INVALIDARG; +} + +LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using texture[%d], ptr=%p", + m_renderTextureIndex, renderTexture); + +// Line 241-243: Staging texture 관련 주석 제거 +// ❌ 제거 +// Staging texture is already in PIXEL_SHADER_RESOURCE state (set by CopyToStagingTexture) +// No barrier needed here + +// ✅ 추가 +// Render texture is in COMMON state (CUDA managed) +// No barrier needed for reading in pixel shader +``` + +#### Step 1.7: Shutdown() 간소화 +- [ ] Staging texture Release 제거 +- [ ] Copy command objects Release 제거 +- [ ] Copy fence Release 제거 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp` + +**수정 위치**: Lines 44-78 + +**제거할 코드** (lines 59-72): +```cpp +// ❌ 제거 +// Release staging texture and copy command objects +m_copyCommandList.Reset(); +m_copyCommandAllocator.Reset(); +m_stagingTexture.Reset(); + +// Close fence event handle +if (m_copyFenceEvent != nullptr) { + CloseHandle(m_copyFenceEvent); + m_copyFenceEvent = nullptr; +} + +// Release fence +m_copyFence.Reset(); +``` + +**수정 후 Shutdown() 구조**: +```cpp +void RGBASurfaceBackend::Shutdown() { + // Release resources + m_constantBuffer.Reset(); + m_pixelShaderBlob.Reset(); + m_vertexShaderBlob.Reset(); + m_srvHeap.Reset(); + m_pipelineState.Reset(); + m_rootSignature.Reset(); + + // Release all texture buffers + for (int i = 0; i < BUFFER_COUNT; i++) { + m_rgbaTextures[i].Reset(); + } + m_renderTextureIndex = 0; + m_decodeTextureIndex = 0; + + // Clear references (not owned) + m_device = nullptr; + m_commandQueue = nullptr; + + m_initialized = false; +} +``` + +### Phase 2: FrameProcessor 수정 + +#### Step 2.1: ProcessFrame() 로직 변경 +- [ ] 초기 16-frame buffering (NULL surface) - 기존 유지 +- [ ] Triple buffer 채우기 (frames 16-18) - 새로운 로직 +- [ ] 정상 디코딩 (frame 19+) - 새로운 로직 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp` + +**수정 위치**: Lines 88-140 (NVDEC/Hardware 디코딩 부분) + +**수정 후 코드**: +```cpp +} else { + // NVDEC/Hardware: D3D12 surface decoding with triple buffering + + // Phase 1: Initial 16-frame buffering (NULL surface) + if (m_framesDecoded < 16) { + LOGF_DEBUG("[FrameProcessor] Initial buffering phase: frame %llu/16", m_framesDecoded.load()); + + result = vavcore_decode_to_surface( + player, + VAVCORE_SURFACE_D3D12_RESOURCE, + nullptr, + &vavFrame + ); + } + // Phase 2: Fill triple buffer (frames 16, 17, 18) + else if (m_framesDecoded < 19) { + LOGF_DEBUG("[FrameProcessor] Filling triple buffer: frame %llu/19", m_framesDecoded.load()); + + auto backend = m_renderer->GetRGBASurfaceBackend(); + if (!backend) { + LOGF_ERROR("[FrameProcessor] RGBASurfaceBackend is NULL"); + m_frameProcessing.store(false); + if (onComplete) onComplete(false); + return false; + } + + ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture(); + if (!decodeTexture) { + LOGF_ERROR("[FrameProcessor] Failed to get decode texture"); + m_frameProcessing.store(false); + if (onComplete) onComplete(false); + return false; + } + + result = vavcore_decode_to_surface( + player, + VAVCORE_SURFACE_D3D12_RESOURCE, + decodeTexture, + &vavFrame + ); + + if (result == VAVCORE_SUCCESS) { + // Triple buffer 채우기 완료 시 인덱스 전진 + backend->AdvanceFrame(); + LOGF_INFO("[FrameProcessor] Triple buffer[%llu] filled, advanced frame", m_framesDecoded.load()); + } + } + // Phase 3: Normal decoding (frame 19+) + else { + auto backend = m_renderer->GetRGBASurfaceBackend(); + if (!backend) { + LOGF_ERROR("[FrameProcessor] RGBASurfaceBackend is NULL"); + m_frameProcessing.store(false); + if (onComplete) onComplete(false); + return false; + } + + // 다음 디코딩용 텍스처 가져오기 (현재 렌더링 중이 아닌 텍스처) + ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture(); + if (!decodeTexture) { + LOGF_ERROR("[FrameProcessor] Failed to get decode texture"); + m_frameProcessing.store(false); + if (onComplete) onComplete(false); + return false; + } + + LOGF_DEBUG("[FrameProcessor] Normal decoding: decode_idx=%d, render_idx=%d", + backend->GetDecodeTextureIndex(), backend->GetRenderTextureIndex()); + + result = vavcore_decode_to_surface( + player, + VAVCORE_SURFACE_D3D12_RESOURCE, + decodeTexture, + &vavFrame + ); + + if (result == VAVCORE_SUCCESS) { + // 디코딩 완료 - 렌더링/디코딩 인덱스 전환 + backend->AdvanceFrame(); + LOGF_INFO("[FrameProcessor] Frame decoded, advanced to render_idx=%d", + backend->GetRenderTextureIndex()); + } + } +} +``` + +#### Step 2.2: 렌더링 시작 조건 변경 +- [ ] Phase 1 & 2 (frames 0-18): 렌더링 안 함 +- [ ] Phase 3 (frame 19+): 정상 렌더링 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp` + +**수정 위치**: Lines 146-179 + +**수정 후 코드**: +```cpp +auto decodeEnd = std::chrono::high_resolution_clock::now(); +double decodeTime = std::chrono::duration(decodeEnd - decodeStart).count(); + +// Phase 1 & 2: Buffering 단계는 렌더링 안 함 +if (m_framesDecoded < 19) { + if (result == VAVCORE_PACKET_ACCEPTED || result == VAVCORE_SUCCESS) { + m_framesDecoded++; + LOGF_DEBUG("[FrameProcessor] Buffering frame %llu, no rendering yet", m_framesDecoded.load()); + m_frameProcessing.store(false); + if (onComplete) onComplete(true); + return true; + } +} + +// Error handling for all phases +if (result != VAVCORE_SUCCESS) { + if (result == VAVCORE_END_OF_STREAM) { + LOGF_INFO("[FrameProcessor] End of stream"); + m_frameProcessing.store(false); + if (onComplete) onComplete(true); + return false; + } + + if (result == VAVCORE_PACKET_ACCEPTED) { + // VavCore CUDA DPB buffering + LOGF_DEBUG("[FrameProcessor] PACKET ACCEPTED - Frame buffered"); + m_framesDecoded++; + m_frameProcessing.store(false); + if (onComplete) onComplete(true); + return true; + } + + // All other errors + m_decodeErrors++; + LOGF_ERROR("[FrameProcessor] Decode ERROR: result=%d", result); + m_frameProcessing.store(false); + if (onComplete) onComplete(false); + return false; +} + +m_framesDecoded++; +LOGF_INFO("[FrameProcessor] DECODE: %.1f ms", decodeTime); + +// Phase 3: Enqueue render on UI thread (frame 19+) +bool enqueued = m_dispatcherQueue.TryEnqueue([this, vavFrame, onComplete, player, processStart]() { + // ... (기존 렌더링 로직 유지) +}); +``` + +#### Step 2.3: CopyToStagingTexture 호출 제거 +- [ ] Lines 123-139의 CopyToStagingTexture + WaitForCopyCompletion 제거 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp` + +**제거할 코드** (lines 123-139): +```cpp +// ❌ 완전 제거 +// After successful decode, copy to staging texture for safe rendering +if (result == VAVCORE_SUCCESS) { + auto backend = m_renderer->GetRGBASurfaceBackend(); + if (backend) { + HRESULT hr = backend->CopyToStagingTexture(rgbaTexture); + if (FAILED(hr)) { + LOGF_ERROR("[FrameProcessor] Failed to copy to staging texture: 0x%08X", hr); + } else { + // Wait for GPU copy to complete before proceeding + hr = backend->WaitForCopyCompletion(); + if (FAILED(hr)) { + LOGF_ERROR("[FrameProcessor] Failed to wait for copy completion: 0x%08X", hr); + } else { + LOGF_INFO("[FrameProcessor] GPU copy completed, staging texture ready"); + } + } + } +} +``` + +### Phase 3: D3D12VideoRenderer 수정 + +#### Step 3.1: RenderVideoFrame() 수정 +- [ ] GetStagingTexture() 제거 +- [ ] GetCurrentRenderTexture() 사용 + +**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\D3D12VideoRenderer.cpp` + +**수정 위치**: RenderVideoFrame() 함수 내부 + +**주요 변경사항**: +```cpp +// ❌ 제거 (기존 코드 없음, RenderToBackBuffer가 내부에서 처리) + +// ✅ RenderToBackBuffer에서 자동으로 GetCurrentRenderTexture() 사용 +// 추가 수정 불필요 (RGBASurfaceBackend::RenderToBackBuffer가 이미 올바른 텍스처 사용) +``` + +### Phase 4: 빌드 및 초기 테스트 + +#### Step 4.1: 빌드 오류 수정 +- [ ] Vav2Player.vcxproj 빌드 +- [ ] 컴파일 오류 수정 +- [ ] 링크 오류 수정 + +**빌드 명령어**: +```bash +cd "D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player" +"/c/Program Files/Microsoft Visual Studio/2022/Community/MSBuild/Current/Bin/MSBuild.exe" Vav2Player.vcxproj //p:Configuration=Debug //p:Platform=x64 //v:minimal +``` + +#### Step 4.2: 기본 동작 테스트 +- [ ] 애플리케이션 실행 +- [ ] 비디오 로드 +- [ ] 초기 19프레임 버퍼링 확인 +- [ ] 정상 재생 시작 확인 + +**예상 로그 패턴**: +``` +[FrameProcessor] Initial buffering phase: frame 0/16 +... +[FrameProcessor] Initial buffering phase: frame 15/16 +[FrameProcessor] Filling triple buffer: frame 16/19 +[RGBASurfaceBackend] AdvanceFrame: render 0->1, decode 0->1 +[FrameProcessor] Triple buffer[16] filled, advanced frame +[FrameProcessor] Filling triple buffer: frame 17/19 +[RGBASurfaceBackend] AdvanceFrame: render 1->2, decode 1->2 +[FrameProcessor] Triple buffer[17] filled, advanced frame +[FrameProcessor] Filling triple buffer: frame 18/19 +[RGBASurfaceBackend] AdvanceFrame: render 2->0, decode 2->0 +[FrameProcessor] Triple buffer[18] filled, advanced frame +[FrameProcessor] Normal decoding: decode_idx=0, render_idx=0 +[FrameProcessor] Frame decoded, advanced to render_idx=1 +[FrameProcessor] DECODE: XX.X ms +[FrameProcessor] RENDER: XX.X ms | PRESENT: XX.X ms +``` + +--- + +## 테스트 계획 + +### Test Case 1: 초기 버퍼링 단계 검증 +**목적**: 16-frame + 3-frame 버퍼링이 정상 동작하는지 확인 + +**테스트 절차**: +1. 비디오 파일 로드 +2. 재생 시작 +3. time.log 확인 + +**예상 결과**: +``` +✅ Frame 0-15: "Initial buffering phase" 로그 16개 +✅ Frame 16-18: "Filling triple buffer" 로그 3개 +✅ Frame 19+: "Normal decoding" 로그 +✅ 각 triple buffer 채우기 후 "AdvanceFrame" 로그 +``` + +**실패 조건**: +- ❌ 19프레임 이전에 렌더링 시도 +- ❌ Triple buffer 채우기 중 인덱스 전환 실패 +- ❌ NULL 텍스처 에러 + +### Test Case 2: Triple Buffering 순환 검증 +**목적**: texture[0] → texture[1] → texture[2] → texture[0] 순환이 정상인지 확인 + +**테스트 절차**: +1. 30프레임 이상 재생 +2. time.log에서 render_idx, decode_idx 추적 + +**예상 결과**: +``` +✅ Frame 19: render_idx=0, decode_idx=0 +✅ Frame 20: render_idx=1, decode_idx=1 +✅ Frame 21: render_idx=2, decode_idx=2 +✅ Frame 22: render_idx=0, decode_idx=0 (순환 완료) +✅ 모든 프레임에서 NULL 텍스처 없음 +``` + +**실패 조건**: +- ❌ Frame 22에서 texture[0] NULL 발생 +- ❌ render_idx와 decode_idx가 동일하지 않음 +- ❌ 순환 패턴이 깨짐 + +### Test Case 3: 60fps 렌더링 안정성 +**목적**: 30fps 디코딩 + 60fps 렌더링 시나리오에서 안정성 확인 + +**테스트 절차**: +1. 60fps 모니터에서 비디오 재생 (VSync 활성화) +2. 1분 이상 재생 +3. 프레임 드롭, 스터터링 확인 + +**예상 결과**: +``` +✅ 부드러운 60fps 렌더링 +✅ 프레임 드롭 0개 +✅ 같은 디코딩 프레임이 2번 렌더링됨 +✅ 메모리 누수 없음 +``` + +**실패 조건**: +- ❌ 스터터링 발생 +- ❌ 프레임 드롭 발생 +- ❌ 디코딩/렌더링 비동기 깨짐 + +### Test Case 4: Seek 및 Reset 동작 +**목적**: 탐색 및 재시작 시 triple buffering이 올바르게 재초기화되는지 확인 + +**테스트 절차**: +1. 비디오 중간으로 seek +2. 재생 재시작 +3. 버퍼링 단계가 다시 정상 실행되는지 확인 + +**예상 결과**: +``` +✅ Seek 후 16-frame buffering 재시작 +✅ Triple buffer 재초기화 +✅ render_idx, decode_idx 모두 0으로 리셋 +✅ 정상 재생 재개 +``` + +**실패 조건**: +- ❌ Seek 후 인덱스 리셋 실패 +- ❌ 버퍼링 단계 스킵 +- ❌ 이전 프레임 잔상 + +### Test Case 5: 성능 측정 +**목적**: Staging texture 제거 후 성능 개선 확인 + +**측정 항목**: +- 디코딩 시간 (ms) +- 렌더링 시간 (ms) +- Present 시간 (ms) +- 전체 프레임 처리 시간 (ms) + +**예상 결과**: +``` +✅ 디코딩 시간: 10-15ms (기존과 동일) +✅ 렌더링 시간: 0.4-0.8ms (기존과 동일 또는 개선) +✅ GPU copy 제거로 2-5ms 절약 +✅ 전체 처리 시간: 11-13ms (개선) +``` + +**비교 기준** (기존 staging texture 방식): +``` +[FrameProcessor] DECODE: 34.0 ms (late binding 포함) +[FrameProcessor] RENDER: 0.8 ms | PRESENT: 1.9 ms | TOTAL: 37.1 ms +``` + +--- + +## 위험 요소 및 대응 + +### 위험 1: SRV 업데이트 오버헤드 +**문제**: 매 프레임마다 `UpdateSRVForCurrentRenderTexture()` 호출 시 성능 저하 가능 + +**대응 방안**: +- Option A: SRV를 3개 생성하고 descriptor table로 선택 +- Option B: SRV 업데이트가 충분히 빠르다면 현재 방식 유지 (권장) + +**검증 방법**: +```cpp +auto srvUpdateStart = std::chrono::high_resolution_clock::now(); +UpdateSRVForCurrentRenderTexture(); +auto srvUpdateEnd = std::chrono::high_resolution_clock::now(); +double srvUpdateTime = std::chrono::duration(srvUpdateEnd - srvUpdateStart).count(); +LOGF_DEBUG("[RGBASurfaceBackend] SRV update time: %.3f ms", srvUpdateTime); +``` + +### 위험 2: 인덱스 동기화 실패 +**문제**: render_idx와 decode_idx가 올바르게 전환되지 않아 같은 텍스처를 동시에 읽기/쓰기 + +**대응 방안**: +- 모든 AdvanceFrame() 호출에 로깅 추가 +- 인덱스 충돌 감지 로직 추가 + +**검증 코드**: +```cpp +void RGBASurfaceBackend::AdvanceFrame() { + int nextRender = (m_renderTextureIndex + 1) % BUFFER_COUNT; + int nextDecode = (m_decodeTextureIndex + 1) % BUFFER_COUNT; + + // Safety check: render and decode should move together + if (nextRender != nextDecode) { + LOGF_ERROR("[RGBASurfaceBackend] Index mismatch! render=%d, decode=%d", nextRender, nextDecode); + } + + m_renderTextureIndex = nextRender; + m_decodeTextureIndex = nextDecode; +} +``` + +### 위험 3: Triple buffer 채우기 실패 +**문제**: Frames 16-18 디코딩 중 에러 발생 시 버퍼가 부분적으로만 채워짐 + +**대응 방안**: +- 에러 발생 시 재시도 로직 +- 최소 버퍼 확보 검증 + +**검증 코드**: +```cpp +if (m_framesDecoded >= 19) { + // Verify all buffers are filled + for (int i = 0; i < BUFFER_COUNT; i++) { + if (!m_rgbaTextures[i].Get()) { + LOGF_ERROR("[RGBASurfaceBackend] Triple buffer[%d] is NULL!", i); + return E_FAIL; + } + } +} +``` + +--- + +## 성공 기준 + +### 필수 달성 목표 +1. ✅ **빌드 성공**: 모든 컴파일/링크 오류 해결 +2. ✅ **NULL 텍스처 제거**: Frame 19+ NULL 텍스처 문제 완전 해결 +3. ✅ **부드러운 재생**: 30fps 디코딩 + 60fps 렌더링 안정적 동작 +4. ✅ **코드 단순화**: Staging texture 관련 ~200줄 제거 + +### 우수 달성 목표 +1. ✅ **성능 개선**: GPU copy 제거로 2-5ms 절약 +2. ✅ **메모리 효율**: Staging texture 제거로 VRAM 절약 +3. ✅ **코드 가독성**: 명확한 triple buffering 구조 + +### 탁월 달성 목표 +1. ✅ **확장성**: 다른 렌더링 백엔드에도 적용 가능한 구조 +2. ✅ **문서화**: 완전한 설계 문서 및 코드 주석 +3. ✅ **테스트 커버리지**: 5개 테스트 케이스 모두 통과 + +--- + +## 일정 및 마일스톤 + +### Milestone 1: 코드 리팩토링 (1-2시간) +- [ ] Phase 1: RGBASurfaceBackend 리팩토링 +- [ ] Phase 2: FrameProcessor 수정 +- [ ] Phase 3: D3D12VideoRenderer 수정 +- [ ] Phase 4: 빌드 및 초기 테스트 + +### Milestone 2: 기능 검증 (30분) +- [ ] Test Case 1: 초기 버퍼링 단계 검증 +- [ ] Test Case 2: Triple Buffering 순환 검증 +- [ ] Test Case 3: 60fps 렌더링 안정성 + +### Milestone 3: 고급 테스트 (30분) +- [ ] Test Case 4: Seek 및 Reset 동작 +- [ ] Test Case 5: 성능 측정 +- [ ] 문서 업데이트 + +**총 예상 시간**: 2-3시간 + +--- + +## 참고 자료 + +### 관련 파일 +- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.h` +- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp` +- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.h` +- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp` +- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\D3D12VideoRenderer.h` +- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\D3D12VideoRenderer.cpp` + +### 기술 문서 +- [Vav2Player Stutter Fix Design](Vav2Player_Stutter_Fix_Design.md) - Staging texture 도입 배경 +- [VavCore NVDEC DPB Redesign](../../../docs/completed/windows/VavCore_NVDEC_DPB_Redesign.md) - CUDA DPB 구조 +- [Vav2Player NVDEC DPB Integration](../../../docs/completed/windows/Vav2Player_NVDEC_DPB_Integration.md) - Late binding 메커니즘 + +--- + +*문서 버전: 1.0* +*최종 수정: 2025-10-10* diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj b/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj index 5ac3392..d3e4054 100644 --- a/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj +++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj @@ -359,13 +359,19 @@ - echo Copying VavCore Debug DLL... -copy "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" "$(LayoutDir)\VavCore-debug.dll" -echo DLL copy completed. - Copying VavCore-debug.dll to output directory + echo Copying VavCore Debug DLL to AppX directory... +echo Source: "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" +echo Target: "$(LayoutDir)\VavCore-debug.dll" +copy /Y "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" "$(LayoutDir)\VavCore-debug.dll" +if errorlevel 1 ( + echo ERROR: Failed to copy VavCore-debug.dll + exit /b 1 +) +echo DLL copy completed successfully. + Copying VavCore-debug.dll to AppX directory - del "$(LayoutDir)\VavCore-debug.dll" + if exist "$(LayoutDir)\VavCore-debug.dll" del "$(LayoutDir)\VavCore-debug.dll" diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp index 7050825..c58e3db 100644 --- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp +++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp @@ -102,40 +102,65 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player, // Expected: VAVCORE_PACKET_ACCEPTED for first 16 frames // No rendering during buffering phase - } else { - // Phase 2: Normal decoding with D3D12 surface (17th frame onwards) - ID3D12Resource* rgbaTexture = m_renderer->GetNextRGBATextureForCUDAInterop(); - if (!rgbaTexture) { - LOGF_ERROR("[FrameProcessor] Failed to get next RGBA texture"); + } + // Phase 2: Triple buffer filling (frames 16-18) + // Fill textures 0, 1, 2 before starting normal operation + else if (m_framesDecoded < 19) { + auto backend = m_renderer->GetRGBASurfaceBackend(); + if (!backend) { + LOGF_ERROR("[FrameProcessor] Failed to get RGBASurfaceBackend"); m_frameProcessing.store(false); if (onComplete) onComplete(false); return false; } + ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture(); + int decodeIndex = backend->GetDecodeTextureIndex(); + LOGF_INFO("[FrameProcessor] Triple buffer filling: frame %llu -> texture[%d]", + m_framesDecoded.load(), decodeIndex); + result = vavcore_decode_to_surface( player, VAVCORE_SURFACE_D3D12_RESOURCE, - rgbaTexture, + decodeTexture, &vavFrame ); - // After successful decode, copy to staging texture for safe rendering + // After successful decode, advance decode index only (render index stays at 0) if (result == VAVCORE_SUCCESS) { - auto backend = m_renderer->GetRGBASurfaceBackend(); - if (backend) { - HRESULT hr = backend->CopyToStagingTexture(rgbaTexture); - if (FAILED(hr)) { - LOGF_ERROR("[FrameProcessor] Failed to copy to staging texture: 0x%08X", hr); - } else { - // Wait for GPU copy to complete before proceeding - hr = backend->WaitForCopyCompletion(); - if (FAILED(hr)) { - LOGF_ERROR("[FrameProcessor] Failed to wait for copy completion: 0x%08X", hr); - } else { - LOGF_INFO("[FrameProcessor] GPU copy completed, staging texture ready"); - } - } - } + backend->AdvanceDecodeOnly(); + LOGF_INFO("[FrameProcessor] Triple buffer filled: texture[%d] ready", decodeIndex); + } + } + // Phase 3: Normal operation (frame 19+) + // Render from current texture, decode into next texture + else { + auto backend = m_renderer->GetRGBASurfaceBackend(); + if (!backend) { + LOGF_ERROR("[FrameProcessor] Failed to get RGBASurfaceBackend"); + m_frameProcessing.store(false); + if (onComplete) onComplete(false); + return false; + } + + ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture(); + int decodeIndex = backend->GetDecodeTextureIndex(); + int renderIndex = backend->GetRenderTextureIndex(); + LOGF_DEBUG("[FrameProcessor] Normal operation: render[%d], decode[%d]", + renderIndex, decodeIndex); + + result = vavcore_decode_to_surface( + player, + VAVCORE_SURFACE_D3D12_RESOURCE, + decodeTexture, + &vavFrame + ); + + // After successful decode, advance frame indices + if (result == VAVCORE_SUCCESS) { + backend->AdvanceFrame(); + LOGF_DEBUG("[FrameProcessor] Frame advanced: render[%d]->render[%d]", + renderIndex, backend->GetRenderTextureIndex()); } } } @@ -157,6 +182,10 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player, // No frame is ready yet - VavCore will return it in a future call LOGF_DEBUG("[FrameProcessor] PACKET ACCEPTED - Frame buffered in VavCore CUDA DPB (16-frame buffering)"); + // CRITICAL: Increment m_framesDecoded for buffered packets + // This counter determines when we switch from NULL surface (buffering) to valid surface (rendering) + m_framesDecoded++; + // No action needed - just wait for next timing tick // VavCore will return the buffered frame when ready m_frameProcessing.store(false); diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp index eed30b3..84d9144 100644 --- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp +++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp @@ -268,13 +268,6 @@ ID3D12Resource* D3D12VideoRenderer::GetRGBATextureForCUDAInterop() const { return nullptr; } -ID3D12Resource* D3D12VideoRenderer::GetNextRGBATextureForCUDAInterop() { - if (m_rgbaSurfaceBackend) { - return m_rgbaSurfaceBackend->GetNextVideoTexture(); - } - return nullptr; -} - uint8_t* D3D12VideoRenderer::GetYMappedBuffer(uint32_t bufferIndex) const { if (m_yuv420pUploadBackend) { return m_yuv420pUploadBackend->GetYMappedBuffer(bufferIndex); @@ -496,6 +489,13 @@ IVideoBackend* D3D12VideoRenderer::SelectBackend(const VavCoreVideoFrame& frame) } HRESULT D3D12VideoRenderer::EnsureVideoTexture(const VavCoreVideoFrame& frame) { + // Skip if frame has invalid dimensions (can happen during CUDA DPB buffering) + if (frame.width == 0 || frame.height == 0) { + LOGF_DEBUG("[D3D12VideoRenderer] Skipping texture creation for invalid frame dimensions: %dx%d", + frame.width, frame.height); + return S_OK; // Not an error, just skip texture creation + } + // Check if we need to create/recreate video texture if (m_videoWidth != (uint32_t)frame.width || m_videoHeight != (uint32_t)frame.height) { IVideoBackend* backend = SelectBackend(frame); diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h index 30de1d8..e6fd999 100644 --- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h +++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h @@ -59,10 +59,9 @@ public: // Backend-specific texture access for CUDA interop ID3D12Resource* GetRGBATextureForCUDAInterop() const; - ID3D12Resource* GetNextRGBATextureForCUDAInterop(); // Rotates to next buffer for triple buffering ID3D12Resource* GetNV12TextureForCUDAInterop() const { return nullptr; } // Future: NV12DirectBackend - // Get RGBASurfaceBackend for staging texture operations + // Get RGBASurfaceBackend for triple buffer management RGBASurfaceBackend* GetRGBASurfaceBackend() const { return m_rgbaSurfaceBackend.get(); } // Legacy YUV420P upload buffer access (for backward compatibility) diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp index 409adad..f87a688 100644 --- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp +++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp @@ -54,21 +54,8 @@ void RGBASurfaceBackend::Shutdown() { for (int i = 0; i < BUFFER_COUNT; i++) { m_rgbaTextures[i].Reset(); } - m_currentTextureIndex = 0; - - // Release staging texture and copy command objects - m_copyCommandList.Reset(); - m_copyCommandAllocator.Reset(); - m_stagingTexture.Reset(); - - // Close fence event handle - if (m_copyFenceEvent != nullptr) { - CloseHandle(m_copyFenceEvent); - m_copyFenceEvent = nullptr; - } - - // Release fence - m_copyFence.Reset(); + m_renderTextureIndex = 0; + m_decodeTextureIndex = 0; // Clear references (not owned) m_device = nullptr; @@ -78,15 +65,13 @@ void RGBASurfaceBackend::Shutdown() { } HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) { + LOGF_INFO("[RGBASurfaceBackend] CreateVideoTexture called: %ux%u", width, height); m_videoWidth = width; m_videoHeight = height; HRESULT hr = S_OK; // Create RGBA texture descriptor for CUDA Surface Object write - // Format: DXGI_FORMAT_R8G8B8A8_UNORM (4 bytes per pixel) - // Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (enables CUDA Surface Object creation) - // Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, CUDA Surface Objects handle this automatically) D3D12_RESOURCE_DESC rgbaTextureDesc = {}; rgbaTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; rgbaTextureDesc.Width = width; @@ -96,8 +81,8 @@ HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) rgbaTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; rgbaTextureDesc.SampleDesc.Count = 1; rgbaTextureDesc.SampleDesc.Quality = 0; - rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; // Tiled layout - rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // Enable CUDA write + rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; D3D12_HEAP_PROPERTIES defaultHeapProps = {}; defaultHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; @@ -108,110 +93,48 @@ HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) for (int i = 0; i < BUFFER_COUNT; i++) { hr = m_device->CreateCommittedResource( &defaultHeapProps, - D3D12_HEAP_FLAG_SHARED, // Required for CUDA interop + D3D12_HEAP_FLAG_SHARED, &rgbaTextureDesc, - D3D12_RESOURCE_STATE_COMMON, // CUDA will transition as needed + D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&m_rgbaTextures[i]) ); if (FAILED(hr)) { - // Cleanup already created textures + LOGF_ERROR("[RGBASurfaceBackend] Failed to create RGBA texture[%d]: 0x%08X", i, hr); for (int j = 0; j < i; j++) { m_rgbaTextures[j].Reset(); } return hr; } + + LOGF_INFO("[RGBASurfaceBackend] Created RGBA texture[%d]: %p", i, m_rgbaTextures[i].Get()); } - m_currentTextureIndex = 0; + // Triple buffer filling logic: + // - Frames 16-18 fill textures 0, 1, 2 (decode only, no rendering yet) + // - Frame 19+ normal operation (decode into different texture than render) + // + // Initial state for filling phase: + // - decodeIndex = 0 (will fill texture[0], then [1], then [2]) + // - renderIndex = 2 (will render from texture[2] after filling completes) + // + // After filling completes (frame 18): + // - decodeIndex = 0 (wraps back after filling [2]) + // - renderIndex = 2 (will render from texture[2] at frame 19) + // - Frame 19: render from [2], decode into [0] (no conflict!) + m_renderTextureIndex = 2; + m_decodeTextureIndex = 0; - // Create staging texture (same format, but no UAV flag - only for rendering) - D3D12_RESOURCE_DESC stagingTextureDesc = rgbaTextureDesc; - stagingTextureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; // No CUDA access needed + LOGF_INFO("[RGBASurfaceBackend] All %d RGBA textures created successfully", BUFFER_COUNT); - hr = m_device->CreateCommittedResource( - &defaultHeapProps, - D3D12_HEAP_FLAG_NONE, - &stagingTextureDesc, - D3D12_RESOURCE_STATE_COPY_DEST, // Initial state for receiving copies - nullptr, - IID_PPV_ARGS(&m_stagingTexture) - ); - - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to create staging texture: 0x%08X", hr); - for (int i = 0; i < BUFFER_COUNT; i++) { - m_rgbaTextures[i].Reset(); - } - return hr; - } - - // Create fence for GPU copy synchronization - hr = m_device->CreateFence( - 0, - D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&m_copyFence) - ); - - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy fence: 0x%08X", hr); - m_stagingTexture.Reset(); - for (int i = 0; i < BUFFER_COUNT; i++) { - m_rgbaTextures[i].Reset(); - } - return hr; - } - - // Create fence event for CPU wait - m_copyFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (m_copyFenceEvent == nullptr) { - hr = HRESULT_FROM_WIN32(GetLastError()); - LOGF_ERROR("[RGBASurfaceBackend] Failed to create fence event: 0x%08X", hr); - m_copyFence.Reset(); - m_stagingTexture.Reset(); - for (int i = 0; i < BUFFER_COUNT; i++) { - m_rgbaTextures[i].Reset(); - } - return hr; - } - - LOGF_INFO("[RGBASurfaceBackend] Copy fence and event created successfully"); - - // Create command allocator and list for texture copy operations - hr = m_device->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&m_copyCommandAllocator) - ); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy command allocator: 0x%08X", hr); - return hr; - } - - hr = m_device->CreateCommandList( - 0, - D3D12_COMMAND_LIST_TYPE_DIRECT, - m_copyCommandAllocator.Get(), - nullptr, - IID_PPV_ARGS(&m_copyCommandList) - ); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy command list: 0x%08X", hr); - return hr; - } - - // Close the command list (will be reset when needed) - m_copyCommandList->Close(); - - LOGF_INFO("[RGBASurfaceBackend] Created staging texture for safe rendering"); - - // Create SRV for RGBA texture + // Create SRV for rendering hr = CreateSrvHeap(); if (FAILED(hr)) { return hr; } - // Update constant buffer with new aspect ratio + // Update constant buffer hr = UpdateConstantBuffer(); if (FAILED(hr)) { return hr; @@ -226,8 +149,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer( ID3D12GraphicsCommandList* commandList, D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle) { - // RGBASurfaceBackend doesn't need RTV (uses CopyResource) - (void)rtvHandle; + (void)rtvHandle; // RGBASurfaceBackend doesn't use external RTV if (!m_initialized) { return E_NOT_VALID_STATE; } @@ -236,17 +158,18 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer( return E_INVALIDARG; } - // Use staging texture for rendering (safe from decoder overwrites) - ID3D12Resource* renderTexture = m_stagingTexture.Get(); + // Use current render texture (already decoded, safe to read) + ID3D12Resource* renderTexture = m_rgbaTextures[m_renderTextureIndex].Get(); if (!renderTexture) { - LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: staging texture is NULL!"); + LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: render texture[%d] is NULL!", m_renderTextureIndex); return E_INVALIDARG; } - LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using staging texture, ptr=%p", renderTexture); + LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using texture[%d], ptr=%p", + m_renderTextureIndex, renderTexture); - // Staging texture is already in PIXEL_SHADER_RESOURCE state (set by CopyToStagingTexture) - // No barrier needed here + // Render texture is in COMMON state (CUDA managed) + // No barrier needed for reading in pixel shader // Transition back buffer to render target D3D12_RESOURCE_BARRIER barrierToRT = {}; @@ -258,13 +181,12 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer( barrierToRT.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; commandList->ResourceBarrier(1, &barrierToRT); - // Create RTV for back buffer (not needed anymore - use rtvHandle from parameter) + // Create RTV for back buffer D3D12_CPU_DESCRIPTOR_HANDLE backBufferRtvHandle; D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; rtvDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - // Create temporary RTV heap for back buffer ComPtr rtvHeap; D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; rtvHeapDesc.NumDescriptors = 1; @@ -287,7 +209,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer( ID3D12DescriptorHeap* heaps[] = { m_srvHeap.Get() }; commandList->SetDescriptorHeaps(1, heaps); - // Use descriptor for staging texture (index 0, the only descriptor) + // Use SRV for current render texture CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetGPUDescriptorHandleForHeapStart()); commandList->SetGraphicsRootDescriptorTable(0, srvHandle); commandList->SetGraphicsRootConstantBufferView(1, m_constantBuffer->GetGPUVirtualAddress()); @@ -314,7 +236,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer( // Draw fullscreen quad commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - commandList->DrawInstanced(6, 1, 0, 0); // Fullscreen quad (2 triangles) + commandList->DrawInstanced(6, 1, 0, 0); // Transition back buffer to present D3D12_RESOURCE_BARRIER barrierToPresent = {}; @@ -326,9 +248,6 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer( barrierToPresent.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; commandList->ResourceBarrier(1, &barrierToPresent); - // Staging texture remains in PIXEL_SHADER_RESOURCE state (no transition needed) - // It will be transitioned back to COPY_DEST when CopyToStagingTexture is called next - return S_OK; } @@ -536,9 +455,9 @@ HRESULT RGBASurfaceBackend::CreatePipelineState() { } HRESULT RGBASurfaceBackend::CreateSrvHeap() { - // Create descriptor heap with 1 descriptor for staging texture + // Create descriptor heap with 1 descriptor for current render texture D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; - srvHeapDesc.NumDescriptors = 1; // Only need SRV for staging texture + srvHeapDesc.NumDescriptors = 1; srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; @@ -547,22 +466,13 @@ HRESULT RGBASurfaceBackend::CreateSrvHeap() { return hr; } - // Create SRV for staging texture (the only texture used for rendering) - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = 1; + // Create initial SRV for texture[0] (m_renderTextureIndex = 0) + hr = UpdateSRVForCurrentRenderTexture(); + if (FAILED(hr)) { + return hr; + } - CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart()); - - m_device->CreateShaderResourceView( - m_stagingTexture.Get(), - &srvDesc, - srvHandle - ); - - LOGF_INFO("[RGBASurfaceBackend] Created SRV for staging texture"); + LOGF_INFO("[RGBASurfaceBackend] Created SRV heap for render texture"); return S_OK; } @@ -605,138 +515,80 @@ HRESULT RGBASurfaceBackend::UpdateConstantBuffer() { return S_OK; } -ID3D12Resource* RGBASurfaceBackend::GetNextVideoTexture() { - // Rotate to next buffer index - int prevIndex = m_currentTextureIndex; - m_currentTextureIndex = (m_currentTextureIndex + 1) % BUFFER_COUNT; - - LOGF_INFO("[RGBASurfaceBackend] GetNextVideoTexture: %d -> %d, texture=%p", - prevIndex, m_currentTextureIndex, m_rgbaTextures[m_currentTextureIndex].Get()); - - return m_rgbaTextures[m_currentTextureIndex].Get(); +// Triple buffering management functions +ID3D12Resource* RGBASurfaceBackend::GetCurrentRenderTexture() const { + return m_rgbaTextures[m_renderTextureIndex].Get(); } -HRESULT RGBASurfaceBackend::CopyToStagingTexture(ID3D12Resource* sourceTexture) { - if (!m_initialized || !m_stagingTexture || !sourceTexture) { +ID3D12Resource* RGBASurfaceBackend::GetNextDecodeTexture() const { + return m_rgbaTextures[m_decodeTextureIndex].Get(); +} + +void RGBASurfaceBackend::AdvanceDecodeOnly() { + int prevDecode = m_decodeTextureIndex; + + // Filling phase: Only advance decode index, render index stays at 0 + // This is used during frames 16-18 to fill all three textures + m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT; + + LOGF_INFO("[RGBASurfaceBackend] AdvanceDecodeOnly: decode %d->%d (render stays at %d)", + prevDecode, m_decodeTextureIndex, m_renderTextureIndex); +} + +void RGBASurfaceBackend::AdvanceFrame() { + int prevRender = m_renderTextureIndex; + int prevDecode = m_decodeTextureIndex; + + // Triple buffering advance logic: + // After Frame N decodes into decodeTexture, we want: + // - Render from the PREVIOUS frame's texture (N-1), not the current one! + // - Decode into the oldest texture (N-2) + // + // Current state: render=R, decode=D + // After decode completes: the old render texture becomes new decode target + // The old decode texture will be rendered NEXT frame (not this frame!) + // + // Example with 3 textures: + // State: render=2, decode=0 + // - Frame 19 decodes into texture[0] (now contains frame 19) + // - Advance: render=2 (still showing frame 18!), decode=1 + // - Frame 20 decodes into texture[1] (now contains frame 20) + // - Advance: render=0 (now showing frame 19), decode=2 + // + // This ensures we NEVER render from a texture that was just written! + m_decodeTextureIndex = m_renderTextureIndex; // Old render texture becomes next decode target + m_renderTextureIndex = prevDecode; // Old decode texture becomes new render texture + + LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render %d->%d, decode %d->%d", + prevRender, m_renderTextureIndex, prevDecode, m_decodeTextureIndex); + + // Update SRV to point to new render texture + HRESULT hr = UpdateSRVForCurrentRenderTexture(); + if (FAILED(hr)) { + LOGF_ERROR("[RGBASurfaceBackend] Failed to update SRV: 0x%08X", hr); + } +} + +HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() { + if (!m_srvHeap || !m_rgbaTextures[m_renderTextureIndex]) { return E_NOT_VALID_STATE; } - // Reset command allocator and list - HRESULT hr = m_copyCommandAllocator->Reset(); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to reset copy command allocator: 0x%08X", hr); - return hr; - } + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; - hr = m_copyCommandList->Reset(m_copyCommandAllocator.Get(), nullptr); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to reset copy command list: 0x%08X", hr); - return hr; - } + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart()); - // Transition source texture to COPY_SOURCE - D3D12_RESOURCE_BARRIER sourceBarrier = {}; - sourceBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - sourceBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - sourceBarrier.Transition.pResource = sourceTexture; - sourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; // CUDA uses COMMON - sourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - sourceBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - - // Transition staging texture to COPY_DEST (only if not first copy) - // First copy: staging texture already in COPY_DEST state (created with that state) - // Subsequent copies: staging texture in PIXEL_SHADER_RESOURCE state (from previous render) - if (m_firstCopy) { - // First copy: only transition source - m_copyCommandList->ResourceBarrier(1, &sourceBarrier); - m_firstCopy = false; - LOGF_DEBUG("[RGBASurfaceBackend] First copy: staging texture already in COPY_DEST state"); - } else { - // Subsequent copies: transition both staging and source - D3D12_RESOURCE_BARRIER stagingToCopyDest = {}; - stagingToCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - stagingToCopyDest.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - stagingToCopyDest.Transition.pResource = m_stagingTexture.Get(); - stagingToCopyDest.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; - stagingToCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - stagingToCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - - D3D12_RESOURCE_BARRIER barriers[] = { stagingToCopyDest, sourceBarrier }; - m_copyCommandList->ResourceBarrier(2, barriers); - } - - // Copy texture - m_copyCommandList->CopyResource(m_stagingTexture.Get(), sourceTexture); - - // Transition source back to COMMON (for CUDA) - sourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - sourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - m_copyCommandList->ResourceBarrier(1, &sourceBarrier); - - // Transition staging texture to PIXEL_SHADER_RESOURCE for rendering - D3D12_RESOURCE_BARRIER stagingBarrier = {}; - stagingBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - stagingBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - stagingBarrier.Transition.pResource = m_stagingTexture.Get(); - stagingBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - stagingBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; - stagingBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - - m_copyCommandList->ResourceBarrier(1, &stagingBarrier); - - // Close command list - hr = m_copyCommandList->Close(); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to close copy command list: 0x%08X", hr); - return hr; - } - - // Execute command list - ID3D12CommandList* commandLists[] = { m_copyCommandList.Get() }; - m_commandQueue->ExecuteCommandLists(1, commandLists); - - // Signal fence after copy submission - m_copyFenceValue++; - hr = m_commandQueue->Signal(m_copyFence.Get(), m_copyFenceValue); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] Failed to signal copy fence: 0x%08X", hr); - return hr; - } - - LOGF_DEBUG("[RGBASurfaceBackend] GPU copy submitted (fence value: %llu)", m_copyFenceValue); - - return S_OK; -} - -HRESULT RGBASurfaceBackend::WaitForCopyCompletion() { - if (!m_copyFence || m_copyFenceEvent == nullptr) { - LOGF_ERROR("[RGBASurfaceBackend] Copy fence or event not initialized"); - return E_NOT_VALID_STATE; - } - - // Check if copy already completed - if (m_copyFence->GetCompletedValue() >= m_copyFenceValue) { - LOGF_DEBUG("[RGBASurfaceBackend] GPU copy already complete (fence value: %llu)", m_copyFenceValue); - return S_OK; // Already complete - } - - // Wait for GPU copy to complete - HRESULT hr = m_copyFence->SetEventOnCompletion( - m_copyFenceValue, - m_copyFenceEvent + m_device->CreateShaderResourceView( + m_rgbaTextures[m_renderTextureIndex].Get(), + &srvDesc, + srvHandle ); - if (FAILED(hr)) { - LOGF_ERROR("[RGBASurfaceBackend] SetEventOnCompletion failed: 0x%08X", hr); - return hr; - } - DWORD waitResult = WaitForSingleObject(m_copyFenceEvent, 5000); // 5 second timeout - if (waitResult != WAIT_OBJECT_0) { - LOGF_ERROR("[RGBASurfaceBackend] Wait failed or timed out: %lu", waitResult); - return E_FAIL; - } - - LOGF_DEBUG("[RGBASurfaceBackend] GPU copy completed (fence value: %llu)", m_copyFenceValue); + LOGF_DEBUG("[RGBASurfaceBackend] Updated SRV for render texture[%d]", m_renderTextureIndex); return S_OK; } diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h index dc3f506..6857190 100644 --- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h +++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h @@ -39,24 +39,25 @@ public: HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override; ID3D12Resource* GetVideoTexture() const override { - return m_rgbaTextures[m_currentTextureIndex].Get(); + return m_rgbaTextures[m_renderTextureIndex].Get(); } - // Get next available texture for decoding (rotates buffer index) - ID3D12Resource* GetNextVideoTexture(); + // Triple buffering management + // Get current texture being rendered to screen + ID3D12Resource* GetCurrentRenderTexture() const; - // Get current rendering texture index - int GetCurrentTextureIndex() const { return m_currentTextureIndex; } + // Get next texture for decoding (not currently being rendered) + ID3D12Resource* GetNextDecodeTexture() const; - // Staging texture management for safe rendering - // Copy decoder texture to stable staging texture - HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture); + // Advance frame: switch render/decode indices after decoding completes (normal operation) + void AdvanceFrame(); - // Wait for GPU copy to complete - HRESULT WaitForCopyCompletion(); + // Advance decode only: move to next decode texture without changing render index (filling phase) + void AdvanceDecodeOnly(); - // Get stable staging texture for rendering (never overwritten by decoder) - ID3D12Resource* GetStagingTexture() const { return m_stagingTexture.Get(); } + // Get current indices for debugging + int GetRenderTextureIndex() const { return m_renderTextureIndex; } + int GetDecodeTextureIndex() const { return m_decodeTextureIndex; } HRESULT RenderToBackBuffer( const VavCoreVideoFrame& frame, @@ -73,27 +74,19 @@ private: ID3D12Device* m_device = nullptr; ID3D12CommandQueue* m_commandQueue = nullptr; - // RGBA video textures (triple buffering) - Decoder writes here + // RGBA video textures (triple buffering) // Format: DXGI_FORMAT_R8G8B8A8_UNORM // Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (for CUDA Surface Object) // Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, handled by CUDA Surface Objects) + // + // Triple buffering roles: + // - m_renderTextureIndex: Currently rendering to screen (safe to read) + // - m_decodeTextureIndex: Next target for decoding (safe to write) + // - Third texture: Idle, previously decoded (ready to become render texture) static const int BUFFER_COUNT = 3; ComPtr m_rgbaTextures[BUFFER_COUNT]; - int m_currentTextureIndex = 0; - - // Staging texture - Stable copy for rendering (decoder never touches this) - // This texture is copied from m_rgbaTextures at 30fps - // Renderer always reads from this texture (safe from race conditions) - ComPtr m_stagingTexture; - - // Command allocator and list for async texture copy - ComPtr m_copyCommandAllocator; - ComPtr m_copyCommandList; - - // GPU synchronization for copy operations - ComPtr m_copyFence; - UINT64 m_copyFenceValue = 0; - HANDLE m_copyFenceEvent = nullptr; + int m_renderTextureIndex = 0; // Texture currently being rendered + int m_decodeTextureIndex = 0; // Texture for next decode operation // Graphics pipeline for simple RGBA texture sampling ComPtr m_rootSignature; @@ -122,7 +115,6 @@ private: uint32_t m_height = 0; // Container height uint32_t m_videoWidth = 0; uint32_t m_videoHeight = 0; - bool m_firstCopy = true; // Track first copy to handle initial state // Helper methods HRESULT CreateGraphicsResources(); @@ -131,6 +123,7 @@ private: HRESULT CreatePipelineState(); HRESULT CreateSrvHeap(); HRESULT UpdateConstantBuffer(); + HRESULT UpdateSRVForCurrentRenderTexture(); }; } // namespace Vav2Player diff --git a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp index a1ca7f9..29ca8d4 100644 --- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp +++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp @@ -1374,12 +1374,19 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI slot.pts = timestamp; slot.ready_for_display.store(true); - // Enqueue picture_index for display queue (for B-frame reordering) + // Enqueue DisplayQueueEntry with PTS for B-frame reordering { std::lock_guard lock(decoder->m_displayMutex); - decoder->m_displayQueue.push(pic_idx); - LOGF_DEBUG("[HandlePictureDisplay] Pushed picture_index=%d (pts=%lld) to display queue (size: %zu)", - pic_idx, timestamp, decoder->m_displayQueue.size()); + + DisplayQueueEntry entry; + entry.frame_slot_index = pic_idx; + entry.pts = timestamp; + entry.submission_id = slot.submission_id; + + decoder->m_displayQueue.push(entry); + + LOGF_DEBUG("[HandlePictureDisplay] Pushed DisplayQueueEntry: slot=%d, pts=%lld, submission_id=%llu (queue size: %zu)", + pic_idx, timestamp, slot.submission_id, decoder->m_displayQueue.size()); } return 1; @@ -1572,11 +1579,17 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_ VideoFrame& output_frame) { LOGF_DEBUG("[DecodeToSurface] Called with target_type=%d", static_cast(target_type)); - if (!m_initialized || !packet_data) { - LOGF_ERROR("[DecodeToSurface] Not initialized or null packet_data"); + if (!m_initialized) { + LOGF_ERROR("[DecodeToSurface] Not initialized"); return false; } + // Handle NULL packet_data as flush mode (end of file reached) + if (!packet_data || packet_size == 0) { + LOGF_DEBUG("[DecodeToSurface] NULL packet - flush mode (end of file)"); + m_state = DecoderState::FLUSHING; + } + // Set CUDA context for current thread { std::lock_guard contextLock(m_cudaContextMutex); @@ -1617,13 +1630,25 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_ // ===== Step 2: Submit packet to NVDEC parser ===== // This triggers HandlePictureDecode (if new frame) and HandlePictureDisplay (always) CUVIDSOURCEDATAPACKET packet = {}; - packet.payload = packet_data; - packet.payload_size = static_cast(packet_size); - packet.flags = CUVID_PKT_ENDOFPICTURE; - packet.timestamp = 0; // Not used - NVDEC parser overwrites this value - LOGF_INFO("[DecodeToSurface] Calling cuvidParseVideoData (submission_id=%llu)...", - my_submission_id); + // Handle flush mode (NULL packet) + if (m_state == DecoderState::FLUSHING) { + // Flush mode: send end-of-stream packet to drain CUDA DPB + packet.flags = CUVID_PKT_ENDOFSTREAM; + packet.payload = nullptr; + packet.payload_size = 0; + LOGF_INFO("[DecodeToSurface] Flush mode: sending ENDOFSTREAM packet (submission_id=%llu)", + my_submission_id); + } else { + // Normal mode: send actual packet data + packet.payload = packet_data; + packet.payload_size = static_cast(packet_size); + packet.flags = CUVID_PKT_ENDOFPICTURE; + packet.timestamp = 0; // Not used - NVDEC parser overwrites this value + + LOGF_INFO("[DecodeToSurface] Normal mode: calling cuvidParseVideoData (submission_id=%llu)...", + my_submission_id); + } CUresult result = cuvidParseVideoData(m_parser, &packet); // cuvidParseVideoData is SYNCHRONOUS - all callbacks execute before return @@ -1647,33 +1672,63 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_ { std::lock_guard lock(m_displayMutex); - // During initial buffering, accept packets until display queue has frames - if (m_displayQueue.empty() && !m_initialBufferingComplete) { - LOGF_DEBUG("[DecodeToSurface] PACKET ACCEPTED - Initial buffering (queue size: 0)"); - return VAVCORE_PACKET_ACCEPTED; + // Transition from READY to BUFFERING on first packet + if (m_state == DecoderState::READY && m_displayQueue.empty()) { + m_state = DecoderState::BUFFERING; + LOGF_DEBUG("[DecodeToSurface] State transition: READY → BUFFERING"); } - // Once we have frames in queue, mark buffering complete - if (!m_displayQueue.empty() && !m_initialBufferingComplete) { - m_initialBufferingComplete = true; - LOGF_INFO("[DecodeToSurface] Initial buffering complete, queue size: %zu", m_displayQueue.size()); + // During initial buffering, accept packets until display queue has frames + if (m_displayQueue.empty() && m_state == DecoderState::BUFFERING) { + LOGF_DEBUG("[DecodeToSurface] PACKET ACCEPTED - Initial buffering (queue size: 0)"); + // Return false to indicate no frame yet (still buffering) + // The C API wrapper will convert this to VAVCORE_PACKET_ACCEPTED + return false; + } + + // Once we have frames in queue, transition to DECODING + if (!m_displayQueue.empty() && m_state == DecoderState::BUFFERING) { + m_state = DecoderState::DECODING; + LOGF_INFO("[DecodeToSurface] State transition: BUFFERING → DECODING (queue size: %zu)", m_displayQueue.size()); } } - // ===== Step 4: Pop from display queue to get picture_index ===== + // ===== Step 4: Pop from display queue to get picture_index (PTS-ordered) ===== + DisplayQueueEntry entry; int pic_idx = -1; { std::lock_guard lock(m_displayMutex); if (m_displayQueue.empty()) { - LOGF_ERROR("[DecodeToSurface] Display queue EMPTY after buffering complete (SHOULD NOT HAPPEN!)"); - return false; + // Check if we're in flush mode + if (m_state == DecoderState::FLUSHING) { + // Flush mode: no more frames in CUDA DPB + // Return false to indicate no frame, caller will check end-of-stream + LOGF_INFO("[DecodeToSurface] Flush complete: all frames drained from CUDA DPB"); + + // Release pending submission before returning + { + std::lock_guard lock2(m_submissionMutex); + m_pendingSubmissions[pending_idx].in_use.store(false); + } + + // Return false - the C API wrapper will convert this to VAVCORE_END_OF_STREAM + // when combined with file reader's IsEndOfFile() check + return false; + } else { + // Normal mode: queue empty unexpectedly + LOGF_ERROR("[DecodeToSurface] Display queue EMPTY after buffering complete (SHOULD NOT HAPPEN!)"); + return false; + } } - pic_idx = m_displayQueue.front(); + // Pop from priority queue (PTS-ordered) + entry = m_displayQueue.top(); m_displayQueue.pop(); - LOGF_INFO("[DecodeToSurface] Popped picture_index=%d from display queue (queue size now: %zu)", - pic_idx, m_displayQueue.size()); + pic_idx = entry.frame_slot_index; + + LOGF_INFO("[DecodeToSurface] Popped DisplayQueueEntry: slot=%d, pts=%lld, submission_id=%llu (queue size now: %zu)", + pic_idx, entry.pts, entry.submission_id, m_displayQueue.size()); } if (pic_idx < 0 || pic_idx >= RING_BUFFER_SIZE) { @@ -1697,21 +1752,37 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_ LOGF_DEBUG("[DecodeToSurface] Frame slot %d ready for display", pic_idx); - // ===== Step 6: Copy from CUDA DPB to target surface ===== - if (!CopyFromCUDADPB(pic_idx, target_type, target_surface, output_frame)) { + // ===== Step 6: Update target_surface for this frame ===== + // CRITICAL: SwapChain provides different target_surface each frame! + // Always update slot.target_surface to current one. + LOGF_DEBUG("[DecodeToSurface] Updating target_surface: %p -> %p (pic_idx=%d)", + slot.target_surface, target_surface, pic_idx); + + if (target_surface == nullptr) { + LOGF_ERROR("[DecodeToSurface] ERROR: target_surface is NULL for pic_idx=%d", pic_idx); + return false; + } + + // Always update to current target_surface (SwapChain back buffer changes each frame) + slot.target_surface = target_surface; + slot.surface_type = target_type; + + // ===== Step 7: Copy from CUDA DPB to target surface ===== + // Now use slot.target_surface which is guaranteed to be valid (either from decode or late binding) + if (!CopyFromCUDADPB(pic_idx, slot.surface_type, slot.target_surface, output_frame)) { LOGF_ERROR("[DecodeToSurface] CopyFromCUDADPB failed for picture_index=%d", pic_idx); return false; } LOGF_INFO("[DecodeToSurface] SUCCESS - Frame rendered from CUDA DPB (pic_idx=%d)", pic_idx); - // ===== Step 7: Mark slot as reusable ===== + // ===== Step 8: Mark slot as reusable ===== slot.ready_for_display.store(false); slot.in_use.store(false); LOGF_DEBUG("[DecodeToSurface] Released frame slot %d", pic_idx); - // ===== Step 8: Release pending submission ===== + // ===== Step 9: Release pending submission ===== { std::lock_guard lock(m_submissionMutex); m_pendingSubmissions[pending_idx].in_use.store(false); diff --git a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h index fb3ce4a..c5590a7 100644 --- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h +++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h @@ -102,6 +102,14 @@ protected: void LogCUDAError(CUresult result, const std::string& operation) const; private: + // Decoder state enum (simple inline approach) + enum class DecoderState { + READY, // Initialized and ready for first packet + BUFFERING, // Initial buffering (0-15 frames) + DECODING, // Normal frame-by-frame decoding + FLUSHING // End-of-file reached, draining DPB + }; + // CUDA and NVDEC objects CUcontext m_cuContext = nullptr; CUdevice m_cudaDevice = 0; @@ -224,7 +232,23 @@ private: void PollingThreadFunc(); // Polling thread function // Display-only packet handling (B-frame reordering) - std::queue m_displayQueue; // Queue of picture_index from HandlePictureDisplay + // DisplayQueueEntry: Frame information for PTS-based reordering + struct DisplayQueueEntry { + int frame_slot_index; // FrameSlot index in m_frameSlots[] + int64_t pts; // Presentation timestamp + uint64_t submission_id; // Original submission order + }; + + // PTSComparator: PTS ascending order (Min-heap for earliest PTS first) + struct PTSComparator { + bool operator()(const DisplayQueueEntry& a, const DisplayQueueEntry& b) const { + return a.pts > b.pts; // Min-heap: smallest PTS has highest priority + } + }; + + std::priority_queue, + PTSComparator> m_displayQueue; // PTS-based priority queue std::mutex m_displayMutex; // Helper methods @@ -245,8 +269,8 @@ private: bool CopyFromCUDADPB(int pic_idx, VavCoreSurfaceType target_type, void* target_surface, VideoFrame& output_frame); - // Initial buffering state - std::atomic m_initialBufferingComplete{false}; + // Decoder state (replaces m_initialBufferingComplete and m_endOfFileReached) + DecoderState m_state = DecoderState::READY; // NV12ToRGBAConverter reinitialization flag (set by HandleVideoSequence) std::atomic m_converterNeedsReinit{false};