diff --git a/vav2/docs/working/NVDEC_State_Machine_Refactoring.md b/vav2/docs/working/NVDEC_State_Machine_Refactoring.md
new file mode 100644
index 0000000..538b974
--- /dev/null
+++ b/vav2/docs/working/NVDEC_State_Machine_Refactoring.md
@@ -0,0 +1,416 @@
+# NVDEC Decoder State Machine Refactoring Design
+
+## Problem Statement
+
+The current `NVDECAV1Decoder::DecodeToSurface()` has excessive complexity:
+- **13+ state variables** tracked across multiple atomic flags and mutexes
+- **9+ conditional branches** with nested conditions
+- **~150 lines** in a single function
+- **High cyclomatic complexity** (2^9 = 512 possible code paths)
+
+This makes the code:
+- Hard to maintain and debug
+- Difficult to test comprehensively
+- Prone to race conditions and edge cases
+- Challenging to extend with new features
+
+## Solution: State Machine Pattern
+
+### Core Design Principle
+
+**Consolidate all decoder state into a single enum** with clear transitions, replacing scattered atomic flags and conditional checks.
+
+### State Machine States
+
+```cpp
+enum class DecoderState {
+    UNINITIALIZED,              // Before Initialize() is called
+    READY,                      // Initialized and ready for decoding
+    BUFFERING,                  // Initial buffering (0-15 frames)
+    DECODING,                   // Normal frame-by-frame decoding
+    FLUSHING,                   // End-of-file reached, draining DPB
+    FLUSH_COMPLETE,             // All frames drained
+    ERROR                       // Unrecoverable error state
+};
+```
+
+### State Transitions
+
+```
+UNINITIALIZED → READY                 (Initialize() called successfully)
+READY → BUFFERING                     (First DecodeToSurface() call)
+BUFFERING → DECODING                  (Display queue has frames)
+DECODING → FLUSHING                   (End-of-file reached, NULL packet)
+FLUSHING → FLUSH_COMPLETE             (Display queue empty)
+FLUSH_COMPLETE → READY                (Reset() called)
+* → ERROR                             (Any state can transition to ERROR on failure)
+ERROR → READY                         (Reset() called)
+```
+
+### State Machine Class
+
+```cpp
+class DecoderStateMachine {
+public:
+    DecoderStateMachine() : m_state(DecoderState::UNINITIALIZED) {}
+
+    // State queries
+    DecoderState GetState() const { return m_state.load(); }
+    bool IsState(DecoderState state) const { return m_state.load() == state; }
+    bool CanDecode() const {
+        auto state = m_state.load();
+        return state == DecoderState::READY ||
+               state == DecoderState::BUFFERING ||
+               state == DecoderState::DECODING ||
+               state == DecoderState::FLUSHING;
+    }
+
+    // State transitions
+    bool TransitionTo(DecoderState new_state) {
+        DecoderState expected = m_state.load();
+        if (IsValidTransition(expected, new_state)) {
+            m_state.store(new_state);
+            LOGF_DEBUG("[DecoderStateMachine] State transition: %s → %s",
+                       StateToString(expected), StateToString(new_state));
+            return true;
+        }
+        LOGF_ERROR("[DecoderStateMachine] Invalid transition: %s → %s",
+                   StateToString(expected), StateToString(new_state));
+        return false;
+    }
+
+    // Specific transition helpers
+    void OnInitializeSuccess() {
+        TransitionTo(DecoderState::READY);
+    }
+
+    void OnFirstPacket() {
+        if (IsState(DecoderState::READY)) {
+            TransitionTo(DecoderState::BUFFERING);
+        }
+    }
+
+    void OnBufferingComplete(size_t queue_size) {
+        if (IsState(DecoderState::BUFFERING) && queue_size > 0) {
+            TransitionTo(DecoderState::DECODING);
+        }
+    }
+
+    void OnEndOfFile() {
+        if (IsState(DecoderState::DECODING) || IsState(DecoderState::BUFFERING)) {
+            TransitionTo(DecoderState::FLUSHING);
+        }
+    }
+
+    void OnFlushComplete() {
+        if (IsState(DecoderState::FLUSHING)) {
+            TransitionTo(DecoderState::FLUSH_COMPLETE);
+        }
+    }
+
+    void OnError() {
+        TransitionTo(DecoderState::ERROR);
+    }
+
+    void OnReset() {
+        TransitionTo(DecoderState::READY);
+    }
+
+private:
+    std::atomic<DecoderState> m_state;
+
+    bool IsValidTransition(DecoderState from, DecoderState to) const {
+        // Define valid state transitions
+        switch (from) {
+            case DecoderState::UNINITIALIZED:
+                return to == DecoderState::READY || to == DecoderState::ERROR;
+            case DecoderState::READY:
+                return to == DecoderState::BUFFERING || to == DecoderState::ERROR;
+            case DecoderState::BUFFERING:
+                return to == DecoderState::DECODING || to == DecoderState::FLUSHING ||
+                       to == DecoderState::ERROR || to == DecoderState::READY;
+            case DecoderState::DECODING:
+                return to == DecoderState::FLUSHING || to == DecoderState::ERROR ||
+                       to == DecoderState::READY;
+            case DecoderState::FLUSHING:
+                return to == DecoderState::FLUSH_COMPLETE || to == DecoderState::ERROR ||
+                       to == DecoderState::READY;
+            case DecoderState::FLUSH_COMPLETE:
+                return to == DecoderState::READY || to == DecoderState::ERROR;
+            case DecoderState::ERROR:
+                return to == DecoderState::READY;
+            default:
+                return false;
+        }
+    }
+
+    const char* StateToString(DecoderState state) const {
+        switch (state) {
+            case DecoderState::UNINITIALIZED: return "UNINITIALIZED";
+            case DecoderState::READY: return "READY";
+            case DecoderState::BUFFERING: return "BUFFERING";
+            case DecoderState::DECODING: return "DECODING";
+            case DecoderState::FLUSHING: return "FLUSHING";
+            case DecoderState::FLUSH_COMPLETE: return "FLUSH_COMPLETE";
+            case DecoderState::ERROR: return "ERROR";
+            default: return "UNKNOWN";
+        }
+    }
+};
+```
+
+## Refactored DecodeToSurface()
+
+### Before (Complex Branching):
+
+```cpp
+bool DecodeToSurface(...) {
+    // Step 1: Check if initialized
+    if (!m_initialized) { ... }
+
+    // Handle NULL packet_data as flush mode
+    if (!packet_data || packet_size == 0) {
+        m_endOfFileReached = true;
+    }
+
+    // Step 2: Submit packet
+    if (m_endOfFileReached) {
+        // Flush mode logic
+    } else {
+        // Normal mode logic
+    }
+
+    // Step 3: Check initial buffering
+    if (m_displayQueue.empty() && !m_initialBufferingComplete) {
+        // Buffering logic
+    }
+    if (!m_displayQueue.empty() && !m_initialBufferingComplete) {
+        m_initialBufferingComplete = true;
+    }
+
+    // Step 4: Pop from display queue
+    if (m_displayQueue.empty()) {
+        if (m_endOfFileReached) {
+            // Flush complete logic
+        } else {
+            // Error - queue empty unexpectedly
+        }
+    }
+
+    // ... (continues for 150 more lines)
+}
+```
+
+### After (State Machine):
+
+```cpp
+bool DecodeToSurface(const uint8_t* packet_data, size_t packet_size,
+                     VavCoreSurfaceType target_type,
+                     void* target_surface,
+                     VideoFrame& output_frame) {
+    // State validation
+    if (!m_stateMachine.CanDecode()) {
+        LOGF_ERROR("[DecodeToSurface] Invalid state: %s",
+                   m_stateMachine.GetStateString());
+        return false;
+    }
+
+    // Handle end-of-file
+    if (!packet_data || packet_size == 0) {
+        return HandleFlushMode(output_frame);
+    }
+
+    // Delegate to state-specific handler
+    switch (m_stateMachine.GetState()) {
+        case DecoderState::READY:
+        case DecoderState::BUFFERING:
+            return HandleBufferingMode(packet_data, packet_size, target_type,
+                                        target_surface, output_frame);
+        case DecoderState::DECODING:
+            return HandleDecodingMode(packet_data, packet_size, target_type,
+                                       target_surface, output_frame);
+        default:
+            LOGF_ERROR("[DecodeToSurface] Unexpected state in DecodeToSurface");
+            return false;
+    }
+}
+```
+
+### Helper Methods (State-Specific Logic):
+
+```cpp
+bool HandleBufferingMode(const uint8_t* packet_data, size_t packet_size,
+                         VavCoreSurfaceType target_type,
+                         void* target_surface,
+                         VideoFrame& output_frame) {
+    // Transition to buffering on first packet
+    if (m_stateMachine.IsState(DecoderState::READY)) {
+        m_stateMachine.OnFirstPacket();
+    }
+
+    // Submit packet to NVDEC
+    if (!SubmitPacketToParser(packet_data, packet_size)) {
+        return false;
+    }
+
+    // Check if buffering is complete
+    {
+        std::lock_guard<std::mutex> lock(m_displayMutex);
+        if (m_displayQueue.empty()) {
+            // Still buffering
+            return false; // VAVCORE_PACKET_ACCEPTED
+        } else {
+            // Buffering complete
+            m_stateMachine.OnBufferingComplete(m_displayQueue.size());
+            // Fall through to decode the first frame
+        }
+    }
+
+    return RetrieveAndRenderFrame(target_type, target_surface, output_frame);
+}
+
+bool HandleDecodingMode(const uint8_t* packet_data, size_t packet_size,
+                        VavCoreSurfaceType target_type,
+                        void* target_surface,
+                        VideoFrame& output_frame) {
+    // Submit packet to NVDEC
+    if (!SubmitPacketToParser(packet_data, packet_size)) {
+        return false;
+    }
+
+    // Retrieve and render frame
+    return RetrieveAndRenderFrame(target_type, target_surface, output_frame);
+}
+
+bool HandleFlushMode(VideoFrame& output_frame) {
+    // Transition to flushing if not already
+    if (!m_stateMachine.IsState(DecoderState::FLUSHING)) {
+        m_stateMachine.OnEndOfFile();
+    }
+
+    // Submit end-of-stream packet
+    if (!SubmitFlushPacket()) {
+        return false;
+    }
+
+    // Check if flush is complete
+    {
+        std::lock_guard<std::mutex> lock(m_displayMutex);
+        if (m_displayQueue.empty()) {
+            m_stateMachine.OnFlushComplete();
+            return false; // VAVCORE_END_OF_STREAM
+        }
+    }
+
+    // Still have frames to drain
+    return RetrieveAndRenderFrame(...);
+}
+```
+
+## Removed/Consolidated State Variables
+
+### Before:
+```cpp
+// 13+ state variables
+std::atomic<bool> m_initialBufferingComplete{false};
+std::atomic<bool> m_endOfFileReached{false};
+std::atomic<bool> m_converterNeedsReinit{false};
+std::atomic<uint64_t> m_submissionCounter{0};
+std::atomic<uint64_t> m_returnCounter{0};
+std::atomic<bool> m_pollingRunning{false};
+std::mutex m_frameQueueMutex;
+std::mutex m_cudaContextMutex;
+std::mutex m_submissionMutex;
+std::mutex m_displayMutex;
+std::queue<int> m_displayQueue;
+FrameSlot m_frameSlots[16]; // Each has 5 atomic flags
+```
+
+### After:
+```cpp
+// Single state machine + minimal supporting variables
+DecoderStateMachine m_stateMachine;
+
+// Still needed (but usage clarified by state machine):
+std::mutex m_displayMutex;
+std::queue<int> m_displayQueue;
+FrameSlot m_frameSlots[16]; // Frame-specific state (not global decoder state)
+std::atomic<uint64_t> m_submissionCounter{0}; // Submission ordering
+std::mutex m_submissionMutex;
+```
+
+**Eliminated:**
+- `m_initialBufferingComplete` → Replaced by `DecoderState::BUFFERING` vs `DECODING`
+- `m_endOfFileReached` → Replaced by `DecoderState::FLUSHING`
+- `m_converterNeedsReinit` → Moved to NV12ToRGBAConverter internal state
+
+## Benefits
+
+### 1. Complexity Reduction
+- **13+ state variables → 1 state machine** with 7 well-defined states
+- **9+ conditional branches → State-driven dispatch** (1 switch statement)
+- **~150 lines → ~40 lines** per state handler (modular functions)
+
+### 2. Improved Maintainability
+- **Clear state transitions** with validation (no illegal states)
+- **State-specific logic** isolated in dedicated functions
+- **Easy debugging** with state transition logging
+
+### 3. Better Testability
+- **Test individual states** independently
+- **Verify state transitions** explicitly
+- **Mock state machine** for unit tests
+
+### 4. Enhanced Readability
+- **Self-documenting code** (state names describe decoder status)
+- **Linear flow** instead of nested conditions
+- **Clear intent** from state-specific handler names
+
+## Implementation Plan
+
+### Phase 1: Create State Machine Class (CURRENT)
+- [x] Design state machine enum and transitions
+- [ ] Implement DecoderStateMachine class
+- [ ] Add state transition logging
+
+### Phase 2: Extract Helper Methods
+- [ ] Create `SubmitPacketToParser()`
+- [ ] Create `RetrieveAndRenderFrame()`
+- [ ] Create `SubmitFlushPacket()`
+
+### Phase 3: Refactor DecodeToSurface()
+- [ ] Replace state flags with state machine
+- [ ] Implement `HandleBufferingMode()`
+- [ ] Implement `HandleDecodingMode()`
+- [ ] Implement `HandleFlushMode()`
+
+### Phase 4: Update Other Methods
+- [ ] Update `Initialize()` → call `m_stateMachine.OnInitializeSuccess()`
+- [ ] Update `Reset()` → call `m_stateMachine.OnReset()`
+- [ ] Update `Cleanup()` → call `m_stateMachine.TransitionTo(UNINITIALIZED)`
+
+### Phase 5: Remove Obsolete State Variables
+- [ ] Remove `m_initialBufferingComplete`
+- [ ] Remove `m_endOfFileReached`
+- [ ] Verify no regressions with existing tests
+
+## Testing Strategy
+
+### Unit Tests
+- State transition validation (legal/illegal transitions)
+- State-specific handler behavior
+- Error state recovery
+
+### Integration Tests
+- Full decode pipeline with state transitions
+- Edge cases (empty files, flush mode, errors)
+- Multi-threaded decoding with state machine
+
+### Regression Tests
+- Existing RedSurfaceNVDECTest
+- Vav2PlayerHeadless tests
+- Vav2Player GUI tests
+
+---
+**Status**: Design complete, implementation in progress
+**Last Updated**: 2025-10-11
diff --git a/vav2/docs/working/Triple_Buffering_Refactoring_Design.md b/vav2/docs/working/Triple_Buffering_Refactoring_Design.md
new file mode 100644
index 0000000..650da60
--- /dev/null
+++ b/vav2/docs/working/Triple_Buffering_Refactoring_Design.md
@@ -0,0 +1,1139 @@
+# Triple Buffering Refactoring Design
+
+**문서 작성일**: 2025-10-10
+**작성자**: Claude Code
+**상태**: 설계 완료, 구현 대기 중
+
+## 📋 목차
+1. [개요](#개요)
+2. [현재 문제점](#현재-문제점)
+3. [새로운 설계](#새로운-설계)
+4. [구현 계획](#구현-계획)
+5. [테스트 계획](#테스트-계획)
+
+---
+
+## 개요
+
+### 목적
+현재 staging texture 기반 복사 구조를 제거하고, 진정한 triple buffering 구조로 리팩토링하여 30fps 디코딩 + 60fps 렌더링을 안정적으로 지원
+
+### 핵심 아이디어
+- **Staging texture 제거**: 불필요한 복사 및 동기화 오버헤드 제거
+- **명확한 버퍼 역할 분리**: Render, Decode, Idle 3가지 상태로 텍스처 관리
+- **33ms 간격 프레임 전환**: 디코딩 완료 시 렌더링/디코딩 인덱스 동시 전환
+
+### 기대 효과
+- ✅ 코드 복잡도 감소 (staging texture 관련 ~200줄 제거)
+- ✅ 명확한 소유권 (렌더링 중인 텍스처는 디코더가 건드리지 않음)
+- ✅ 자연스러운 동기화 (33ms 간격으로 자동 동기화)
+- ✅ NULL 텍스처 문제 근본 해결
+
+---
+
+## 현재 문제점
+
+### 1. Staging Texture 기반 복사 구조의 문제
+```
+DecodeToSurface → texture[0/1/2] → CopyToStagingTexture → staging → Render
+                     (33ms)              (GPU copy)         (읽기 전용)
+```
+
+**문제점**:
+- **불필요한 복사**: GPU 메모리 간 복사 오버헤드
+- **복잡한 동기화**: CopyToStagingTexture + WaitForCopyCompletion
+- **애매한 소유권**: 원본 텍스처가 언제 재사용 가능한지 불명확
+- **NULL 텍스처 버그**: Frame 19에서 texture[0]이 NULL이 되는 문제
+
+### 2. 현재 코드의 동기화 구조
+```cpp
+// FrameProcessor.cpp (lines 107-139)
+if (m_framesDecoded >= 16) {
+    ID3D12Resource* rgbaTexture = m_renderer->GetNextRGBATextureForCUDAInterop();
+    result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, rgbaTexture, &vavFrame);
+
+    if (result == VAVCORE_SUCCESS) {
+        auto backend = m_renderer->GetRGBASurfaceBackend();
+        if (backend) {
+            // 문제: 여기서 staging으로 복사하는데 원본 텍스처 재사용 타이밍이 애매함
+            HRESULT hr = backend->CopyToStagingTexture(rgbaTexture);
+            hr = backend->WaitForCopyCompletion();
+        }
+    }
+}
+```
+
+### 3. Triple Buffering 순환 문제
+```
+Frame 16: texture[0] → staging → render
+Frame 17: texture[1] → staging → render (33ms)
+Frame 18: texture[2] → staging → render (33ms)
+Frame 19: texture[0] 재사용 시도 → NULL ❌
+```
+
+---
+
+## 새로운 설계
+
+### 1. Triple Buffering 구조
+
+#### 버퍼 상태 정의
+```
+texture[0]: RENDERING  (현재 화면에 출력 중)
+texture[1]: IDLE       (대기 중, 이미 디코딩 완료)
+texture[2]: DECODING   (현재 디코딩 작업 중)
+```
+
+#### 프레임 전환 시퀀스
+```
+초기화 단계 (Frames 0-15):
+  DecodeToSurface(NULL) × 16번 → VavCore 내부 CUDA DPB 채우기
+
+Triple Buffer 채우기 (Frames 16-18):
+  Frame 16: DecodeToSurface → texture[0]
+  Frame 17: DecodeToSurface → texture[1]
+  Frame 18: DecodeToSurface → texture[2]
+
+정상 디코딩/렌더링 (Frame 19+):
+  [State: R=0, D=0]
+  Render: texture[0] 화면 출력 (60fps로 여러 번)
+
+  33ms 후 디코딩 완료...
+  [State: R=1, D=1]
+  AdvanceFrame() 호출:
+    - m_renderTextureIndex = 1 (texture[1]로 렌더링 전환)
+    - m_decodeTextureIndex = 1 (texture[1]을 다음 디코딩 타겟으로)
+  DecodeToSurface → texture[0] 덮어쓰기 (이제 안전)
+  Render: texture[1] 화면 출력
+
+  33ms 후 디코딩 완료...
+  [State: R=2, D=2]
+  AdvanceFrame() 호출:
+    - m_renderTextureIndex = 2
+    - m_decodeTextureIndex = 2
+  DecodeToSurface → texture[1] 덮어쓰기
+  Render: texture[2] 화면 출력
+```
+
+### 2. 클래스 구조 변경
+
+#### RGBASurfaceBackend.h 변경사항
+```cpp
+class RGBASurfaceBackend : public IVideoBackend {
+public:
+    // 삭제될 메서드
+    // ❌ ID3D12Resource* GetNextVideoTexture();
+    // ❌ ID3D12Resource* GetStagingTexture() const;
+    // ❌ HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture);
+    // ❌ HRESULT WaitForCopyCompletion();
+
+    // 새로운 메서드
+    // ✅ GetCurrentRenderTexture() - 렌더링에서 사용
+    ID3D12Resource* GetCurrentRenderTexture() const {
+        return m_rgbaTextures[m_renderTextureIndex].Get();
+    }
+
+    // ✅ GetNextDecodeTexture() - 디코딩에서 사용
+    ID3D12Resource* GetNextDecodeTexture() const {
+        return m_rgbaTextures[m_decodeTextureIndex].Get();
+    }
+
+    // ✅ AdvanceFrame() - 33ms마다 호출 (디코딩 완료 시)
+    void AdvanceFrame() {
+        // 렌더링을 다음 텍스처로 전환
+        m_renderTextureIndex = (m_renderTextureIndex + 1) % BUFFER_COUNT;
+        // 디코딩도 다음 텍스처로 전환 (이전 렌더링 텍스처를 덮어씀)
+        m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT;
+
+        LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render=%d, decode=%d",
+                  m_renderTextureIndex, m_decodeTextureIndex);
+    }
+
+    // ✅ GetRenderTextureIndex() - 디버깅용
+    int GetRenderTextureIndex() const { return m_renderTextureIndex; }
+    int GetDecodeTextureIndex() const { return m_decodeTextureIndex; }
+
+private:
+    // 삭제될 멤버 변수
+    // ❌ ComPtr<ID3D12Resource> m_stagingTexture;
+    // ❌ ComPtr<ID3D12CommandAllocator> m_copyCommandAllocator;
+    // ❌ ComPtr<ID3D12GraphicsCommandList> m_copyCommandList;
+    // ❌ ComPtr<ID3D12Fence> m_copyFence;
+    // ❌ UINT64 m_copyFenceValue = 0;
+    // ❌ HANDLE m_copyFenceEvent = nullptr;
+    // ❌ bool m_firstCopy = true;
+    // ❌ int m_currentTextureIndex = 0;
+
+    // 새로운 멤버 변수
+    // ✅ 렌더링용 텍스처 인덱스 (현재 화면에 출력 중)
+    int m_renderTextureIndex = 0;
+
+    // ✅ 디코딩용 텍스처 인덱스 (다음 디코딩 타겟)
+    int m_decodeTextureIndex = 0;
+
+    // 기존 유지
+    ComPtr<ID3D12Resource> m_rgbaTextures[BUFFER_COUNT];  // 3개 텍스처 유지
+};
+```
+
+#### FrameProcessor.cpp 변경사항
+```cpp
+bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
+                                  std::function<void(bool success)> onComplete)
+{
+    // ... (기존 초기 검증 코드 유지)
+
+    VavCoreVideoFrame vavFrame = {};
+    VavCoreResult result;
+
+    if (m_decoderType == VAVCORE_DECODER_DAV1D) {
+        // DAV1D: CPU 디코딩 (기존 로직 유지)
+        result = vavcore_decode_next_frame(player, &vavFrame);
+        if (result == VAVCORE_SUCCESS) {
+            vavFrame.surface_type = VAVCORE_SURFACE_CPU;
+        }
+    } else {
+        // NVDEC/Hardware: D3D12 surface decoding with triple buffering
+
+        // Phase 1: Initial 16-frame buffering (NULL surface)
+        if (m_framesDecoded < 16) {
+            LOGF_DEBUG("[FrameProcessor] Initial buffering phase: frame %llu/16", m_framesDecoded.load());
+            result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, nullptr, &vavFrame);
+        }
+        // Phase 2: Fill triple buffer (frames 16, 17, 18)
+        else if (m_framesDecoded < 19) {
+            LOGF_DEBUG("[FrameProcessor] Filling triple buffer: frame %llu/19", m_framesDecoded.load());
+
+            auto backend = m_renderer->GetRGBASurfaceBackend();
+            ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
+
+            result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, decodeTexture, &vavFrame);
+
+            if (result == VAVCORE_SUCCESS) {
+                // Triple buffer 채우기 완료 시 인덱스 전진
+                backend->AdvanceFrame();
+            }
+        }
+        // Phase 3: Normal decoding (frame 19+)
+        else {
+            auto backend = m_renderer->GetRGBASurfaceBackend();
+
+            // 다음 디코딩용 텍스처 가져오기 (현재 렌더링 중이 아닌 텍스처)
+            ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
+
+            LOGF_DEBUG("[FrameProcessor] Normal decoding: decode_idx=%d, render_idx=%d",
+                      backend->GetDecodeTextureIndex(), backend->GetRenderTextureIndex());
+
+            result = vavcore_decode_to_surface(player, VAVCORE_SURFACE_D3D12_RESOURCE, decodeTexture, &vavFrame);
+
+            if (result == VAVCORE_SUCCESS) {
+                // 디코딩 완료 - 렌더링/디코딩 인덱스 전환
+                backend->AdvanceFrame();
+                LOGF_INFO("[FrameProcessor] Frame decoded, advanced to render_idx=%d",
+                         backend->GetRenderTextureIndex());
+            }
+        }
+    }
+
+    // Phase 1 & 2: Buffering 단계는 렌더링 안 함
+    if (m_framesDecoded < 19) {
+        if (result == VAVCORE_PACKET_ACCEPTED || result == VAVCORE_SUCCESS) {
+            m_framesDecoded++;
+            m_frameProcessing.store(false);
+            if (onComplete) onComplete(true);
+            return true;
+        }
+    }
+
+    // Phase 3: 정상 렌더링 (기존 로직 유지)
+    if (result != VAVCORE_SUCCESS) {
+        // ... (기존 에러 처리)
+    }
+
+    m_framesDecoded++;
+
+    // 렌더링 큐에 추가 (기존 로직 유지)
+    bool enqueued = m_dispatcherQueue.TryEnqueue([this, vavFrame, onComplete, player, processStart]() {
+        // ... (기존 렌더링 로직)
+    });
+
+    return true;
+}
+```
+
+#### D3D12VideoRenderer.cpp 변경사항
+```cpp
+HRESULT D3D12VideoRenderer::RenderVideoFrame(const VavCoreVideoFrame& frame, VavCorePlayer* player)
+{
+    // ... (기존 초기 검증 코드)
+
+    if (frame.surface_type == VAVCORE_SURFACE_D3D12_RESOURCE) {
+        auto backend = m_rgbaSurfaceBackend.get();
+        if (!backend) {
+            return E_NOT_VALID_STATE;
+        }
+
+        // 현재 렌더링용 텍스처 사용 (이미 디코딩 완료된 안정적인 텍스처)
+        ID3D12Resource* renderTexture = backend->GetCurrentRenderTexture();
+
+        if (!renderTexture) {
+            LOGF_ERROR("[D3D12VideoRenderer] Current render texture is NULL!");
+            return E_INVALIDARG;
+        }
+
+        LOGF_DEBUG("[D3D12VideoRenderer] Rendering texture at index %d",
+                   backend->GetRenderTextureIndex());
+
+        // SRV 업데이트 (렌더링용 텍스처로)
+        UpdateSRVForTexture(renderTexture);
+
+        // 렌더링 수행 (기존 RenderToBackBuffer 로직)
+        hr = backend->RenderToBackBuffer(frame, backBuffer, commandList.Get(), rtvHandle);
+    }
+
+    // ... (나머지 기존 로직)
+}
+```
+
+### 3. CreateSrvHeap() 수정
+```cpp
+HRESULT RGBASurfaceBackend::CreateSrvHeap() {
+    // SRV를 동적으로 업데이트하거나, 3개 텍스처 모두 SRV 생성 후 인덱스로 선택
+
+    // Option A: 단일 SRV, 매 프레임 업데이트 (간단)
+    D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
+    srvHeapDesc.NumDescriptors = 1;
+    srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+    srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+
+    HRESULT hr = m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_srvHeap));
+    if (FAILED(hr)) return hr;
+
+    // 초기에는 texture[0]의 SRV 생성
+    UpdateSRVForCurrentRenderTexture();
+
+    return S_OK;
+}
+
+// 새로운 헬퍼 메서드
+HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() {
+    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+    srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+    srvDesc.Texture2D.MipLevels = 1;
+
+    CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart());
+
+    m_device->CreateShaderResourceView(
+        m_rgbaTextures[m_renderTextureIndex].Get(),
+        &srvDesc,
+        srvHandle
+    );
+
+    return S_OK;
+}
+```
+
+---
+
+## 구현 계획
+
+### Phase 1: RGBASurfaceBackend 리팩토링
+
+#### Step 1.1: 멤버 변수 정리
+- [ ] `RGBASurfaceBackend.h`: Staging texture 관련 멤버 변수 제거
+- [ ] `RGBASurfaceBackend.h`: 새로운 인덱스 변수 추가 (`m_renderTextureIndex`, `m_decodeTextureIndex`)
+- [ ] `RGBASurfaceBackend.h`: 새로운 메서드 선언 추가
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.h`
+
+**제거할 코드** (lines 84-96):
+```cpp
+// ❌ 제거
+ComPtr<ID3D12Resource> m_stagingTexture;
+ComPtr<ID3D12CommandAllocator> m_copyCommandAllocator;
+ComPtr<ID3D12GraphicsCommandList> m_copyCommandList;
+ComPtr<ID3D12Fence> m_copyFence;
+UINT64 m_copyFenceValue = 0;
+HANDLE m_copyFenceEvent = nullptr;
+int m_currentTextureIndex = 0;
+bool m_firstCopy = true;
+```
+
+**추가할 코드**:
+```cpp
+// ✅ 추가
+int m_renderTextureIndex = 0;  // Current texture being rendered
+int m_decodeTextureIndex = 0;  // Next texture for decoding
+```
+
+#### Step 1.2: 메서드 시그니처 변경
+- [ ] `RGBASurfaceBackend.h`: 삭제될 메서드 제거 선언
+- [ ] `RGBASurfaceBackend.h`: 새로운 메서드 추가 선언
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.h`
+
+**제거할 메서드 선언** (lines 45-56):
+```cpp
+// ❌ 제거
+ID3D12Resource* GetNextVideoTexture();
+HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture);
+HRESULT WaitForCopyCompletion();
+ID3D12Resource* GetStagingTexture() const;
+int GetCurrentTextureIndex() const;
+```
+
+**추가할 메서드 선언**:
+```cpp
+// ✅ 추가
+ID3D12Resource* GetCurrentRenderTexture() const;
+ID3D12Resource* GetNextDecodeTexture() const;
+void AdvanceFrame();
+int GetRenderTextureIndex() const;
+int GetDecodeTextureIndex() const;
+HRESULT UpdateSRVForCurrentRenderTexture();
+```
+
+#### Step 1.3: 구현 파일 수정
+- [ ] `RGBASurfaceBackend.cpp`: `CreateVideoTexture()` 수정 (staging texture 생성 제거)
+- [ ] `RGBASurfaceBackend.cpp`: `Shutdown()` 수정 (staging texture 정리 제거)
+- [ ] `RGBASurfaceBackend.cpp`: 삭제될 메서드 구현 제거
+- [ ] `RGBASurfaceBackend.cpp`: 새로운 메서드 구현 추가
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp`
+
+**수정 범위**:
+- Lines 80-223: `CreateVideoTexture()` - staging texture 생성 코드 제거
+- Lines 44-78: `Shutdown()` - staging texture 정리 코드 제거
+- Lines 608-627: `GetNextVideoTexture()` - 완전 제거
+- Lines 628-696: `CopyToStagingTexture()` - 완전 제거
+- Lines 698-744: `WaitForCopyCompletion()` - 완전 제거
+
+**새로 추가할 구현**:
+```cpp
+ID3D12Resource* RGBASurfaceBackend::GetCurrentRenderTexture() const {
+    return m_rgbaTextures[m_renderTextureIndex].Get();
+}
+
+ID3D12Resource* RGBASurfaceBackend::GetNextDecodeTexture() const {
+    return m_rgbaTextures[m_decodeTextureIndex].Get();
+}
+
+void RGBASurfaceBackend::AdvanceFrame() {
+    int prevRender = m_renderTextureIndex;
+    int prevDecode = m_decodeTextureIndex;
+
+    m_renderTextureIndex = (m_renderTextureIndex + 1) % BUFFER_COUNT;
+    m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT;
+
+    LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render %d->%d, decode %d->%d",
+              prevRender, m_renderTextureIndex, prevDecode, m_decodeTextureIndex);
+
+    // Update SRV to point to new render texture
+    UpdateSRVForCurrentRenderTexture();
+}
+
+int RGBASurfaceBackend::GetRenderTextureIndex() const {
+    return m_renderTextureIndex;
+}
+
+int RGBASurfaceBackend::GetDecodeTextureIndex() const {
+    return m_decodeTextureIndex;
+}
+
+HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() {
+    if (!m_srvHeap || !m_rgbaTextures[m_renderTextureIndex]) {
+        return E_NOT_VALID_STATE;
+    }
+
+    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+    srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+    srvDesc.Texture2D.MipLevels = 1;
+
+    CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart());
+
+    m_device->CreateShaderResourceView(
+        m_rgbaTextures[m_renderTextureIndex].Get(),
+        &srvDesc,
+        srvHandle
+    );
+
+    LOGF_DEBUG("[RGBASurfaceBackend] Updated SRV for render texture[%d]", m_renderTextureIndex);
+
+    return S_OK;
+}
+```
+
+#### Step 1.4: CreateVideoTexture() 간소화
+- [ ] Staging texture 생성 코드 제거 (lines 143-223)
+- [ ] Copy command allocator/list 생성 제거
+- [ ] Copy fence 생성 제거
+
+**수정 후 CreateVideoTexture() 구조**:
+```cpp
+HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) {
+    LOGF_INFO("[RGBASurfaceBackend] CreateVideoTexture called: %ux%u", width, height);
+    m_videoWidth = width;
+    m_videoHeight = height;
+
+    HRESULT hr = S_OK;
+
+    // Create RGBA texture descriptor for CUDA Surface Object write
+    D3D12_RESOURCE_DESC rgbaTextureDesc = {};
+    // ... (기존 descriptor 설정)
+
+    D3D12_HEAP_PROPERTIES defaultHeapProps = {};
+    defaultHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
+
+    // Create triple-buffered textures
+    for (int i = 0; i < BUFFER_COUNT; i++) {
+        hr = m_device->CreateCommittedResource(
+            &defaultHeapProps,
+            D3D12_HEAP_FLAG_SHARED,
+            &rgbaTextureDesc,
+            D3D12_RESOURCE_STATE_COMMON,
+            nullptr,
+            IID_PPV_ARGS(&m_rgbaTextures[i])
+        );
+
+        if (FAILED(hr)) {
+            LOGF_ERROR("[RGBASurfaceBackend] Failed to create RGBA texture[%d]: 0x%08X", i, hr);
+            for (int j = 0; j < i; j++) {
+                m_rgbaTextures[j].Reset();
+            }
+            return hr;
+        }
+
+        LOGF_INFO("[RGBASurfaceBackend] Created RGBA texture[%d]: %p", i, m_rgbaTextures[i].Get());
+    }
+
+    m_renderTextureIndex = 0;
+    m_decodeTextureIndex = 0;
+
+    LOGF_INFO("[RGBASurfaceBackend] All %d RGBA textures created successfully", BUFFER_COUNT);
+
+    // Create SRV for rendering
+    hr = CreateSrvHeap();
+    if (FAILED(hr)) {
+        return hr;
+    }
+
+    // Update constant buffer
+    hr = UpdateConstantBuffer();
+    if (FAILED(hr)) {
+        return hr;
+    }
+
+    return S_OK;
+}
+```
+
+#### Step 1.5: CreateSrvHeap() 수정
+- [ ] Staging texture 대신 현재 render texture로 SRV 생성
+- [ ] 초기화 시 texture[0]의 SRV 생성
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp`
+
+**수정 위치**: Lines 379-409
+
+**수정 후 코드**:
+```cpp
+HRESULT RGBASurfaceBackend::CreateSrvHeap() {
+    // Create descriptor heap with 1 descriptor for current render texture
+    D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
+    srvHeapDesc.NumDescriptors = 1;
+    srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+    srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+
+    HRESULT hr = m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_srvHeap));
+    if (FAILED(hr)) {
+        return hr;
+    }
+
+    // Create initial SRV for texture[0] (m_renderTextureIndex = 0)
+    hr = UpdateSRVForCurrentRenderTexture();
+    if (FAILED(hr)) {
+        return hr;
+    }
+
+    LOGF_INFO("[RGBASurfaceBackend] Created SRV heap for render texture");
+
+    return S_OK;
+}
+```
+
+#### Step 1.6: RenderToBackBuffer() 수정
+- [ ] Staging texture 참조 제거
+- [ ] 현재 render texture 사용하도록 변경
+- [ ] Staging texture 관련 주석 제거
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp`
+
+**수정 위치**: Lines 225-301 (RenderToBackBuffer 함수 전체)
+
+**주요 변경사항**:
+```cpp
+// Line 233-238: 기존 staging texture 사용 제거
+// ❌ 제거
+ID3D12Resource* renderTexture = m_stagingTexture.Get();
+if (!renderTexture) {
+    LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: staging texture is NULL!");
+    return E_INVALIDARG;
+}
+
+// ✅ 추가
+ID3D12Resource* renderTexture = m_rgbaTextures[m_renderTextureIndex].Get();
+if (!renderTexture) {
+    LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: render texture[%d] is NULL!", m_renderTextureIndex);
+    return E_INVALIDARG;
+}
+
+LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using texture[%d], ptr=%p",
+           m_renderTextureIndex, renderTexture);
+
+// Line 241-243: Staging texture 관련 주석 제거
+// ❌ 제거
+// Staging texture is already in PIXEL_SHADER_RESOURCE state (set by CopyToStagingTexture)
+// No barrier needed here
+
+// ✅ 추가
+// Render texture is in COMMON state (CUDA managed)
+// No barrier needed for reading in pixel shader
+```
+
+#### Step 1.7: Shutdown() 간소화
+- [ ] Staging texture Release 제거
+- [ ] Copy command objects Release 제거
+- [ ] Copy fence Release 제거
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp`
+
+**수정 위치**: Lines 44-78
+
+**제거할 코드** (lines 59-72):
+```cpp
+// ❌ 제거
+// Release staging texture and copy command objects
+m_copyCommandList.Reset();
+m_copyCommandAllocator.Reset();
+m_stagingTexture.Reset();
+
+// Close fence event handle
+if (m_copyFenceEvent != nullptr) {
+    CloseHandle(m_copyFenceEvent);
+    m_copyFenceEvent = nullptr;
+}
+
+// Release fence
+m_copyFence.Reset();
+```
+
+**수정 후 Shutdown() 구조**:
+```cpp
+void RGBASurfaceBackend::Shutdown() {
+    // Release resources
+    m_constantBuffer.Reset();
+    m_pixelShaderBlob.Reset();
+    m_vertexShaderBlob.Reset();
+    m_srvHeap.Reset();
+    m_pipelineState.Reset();
+    m_rootSignature.Reset();
+
+    // Release all texture buffers
+    for (int i = 0; i < BUFFER_COUNT; i++) {
+        m_rgbaTextures[i].Reset();
+    }
+    m_renderTextureIndex = 0;
+    m_decodeTextureIndex = 0;
+
+    // Clear references (not owned)
+    m_device = nullptr;
+    m_commandQueue = nullptr;
+
+    m_initialized = false;
+}
+```
+
+### Phase 2: FrameProcessor 수정
+
+#### Step 2.1: ProcessFrame() 로직 변경
+- [ ] 초기 16-frame buffering (NULL surface) - 기존 유지
+- [ ] Triple buffer 채우기 (frames 16-18) - 새로운 로직
+- [ ] 정상 디코딩 (frame 19+) - 새로운 로직
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp`
+
+**수정 위치**: Lines 88-140 (NVDEC/Hardware 디코딩 부분)
+
+**수정 후 코드**:
+```cpp
+} else {
+    // NVDEC/Hardware: D3D12 surface decoding with triple buffering
+
+    // Phase 1: Initial 16-frame buffering (NULL surface)
+    if (m_framesDecoded < 16) {
+        LOGF_DEBUG("[FrameProcessor] Initial buffering phase: frame %llu/16", m_framesDecoded.load());
+
+        result = vavcore_decode_to_surface(
+            player,
+            VAVCORE_SURFACE_D3D12_RESOURCE,
+            nullptr,
+            &vavFrame
+        );
+    }
+    // Phase 2: Fill triple buffer (frames 16, 17, 18)
+    else if (m_framesDecoded < 19) {
+        LOGF_DEBUG("[FrameProcessor] Filling triple buffer: frame %llu/19", m_framesDecoded.load());
+
+        auto backend = m_renderer->GetRGBASurfaceBackend();
+        if (!backend) {
+            LOGF_ERROR("[FrameProcessor] RGBASurfaceBackend is NULL");
+            m_frameProcessing.store(false);
+            if (onComplete) onComplete(false);
+            return false;
+        }
+
+        ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
+        if (!decodeTexture) {
+            LOGF_ERROR("[FrameProcessor] Failed to get decode texture");
+            m_frameProcessing.store(false);
+            if (onComplete) onComplete(false);
+            return false;
+        }
+
+        result = vavcore_decode_to_surface(
+            player,
+            VAVCORE_SURFACE_D3D12_RESOURCE,
+            decodeTexture,
+            &vavFrame
+        );
+
+        if (result == VAVCORE_SUCCESS) {
+            // Triple buffer 채우기 완료 시 인덱스 전진
+            backend->AdvanceFrame();
+            LOGF_INFO("[FrameProcessor] Triple buffer[%llu] filled, advanced frame", m_framesDecoded.load());
+        }
+    }
+    // Phase 3: Normal decoding (frame 19+)
+    else {
+        auto backend = m_renderer->GetRGBASurfaceBackend();
+        if (!backend) {
+            LOGF_ERROR("[FrameProcessor] RGBASurfaceBackend is NULL");
+            m_frameProcessing.store(false);
+            if (onComplete) onComplete(false);
+            return false;
+        }
+
+        // 다음 디코딩용 텍스처 가져오기 (현재 렌더링 중이 아닌 텍스처)
+        ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
+        if (!decodeTexture) {
+            LOGF_ERROR("[FrameProcessor] Failed to get decode texture");
+            m_frameProcessing.store(false);
+            if (onComplete) onComplete(false);
+            return false;
+        }
+
+        LOGF_DEBUG("[FrameProcessor] Normal decoding: decode_idx=%d, render_idx=%d",
+                  backend->GetDecodeTextureIndex(), backend->GetRenderTextureIndex());
+
+        result = vavcore_decode_to_surface(
+            player,
+            VAVCORE_SURFACE_D3D12_RESOURCE,
+            decodeTexture,
+            &vavFrame
+        );
+
+        if (result == VAVCORE_SUCCESS) {
+            // 디코딩 완료 - 렌더링/디코딩 인덱스 전환
+            backend->AdvanceFrame();
+            LOGF_INFO("[FrameProcessor] Frame decoded, advanced to render_idx=%d",
+                     backend->GetRenderTextureIndex());
+        }
+    }
+}
+```
+
+#### Step 2.2: 렌더링 시작 조건 변경
+- [ ] Phase 1 & 2 (frames 0-18): 렌더링 안 함
+- [ ] Phase 3 (frame 19+): 정상 렌더링
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp`
+
+**수정 위치**: Lines 146-179
+
+**수정 후 코드**:
+```cpp
+auto decodeEnd = std::chrono::high_resolution_clock::now();
+double decodeTime = std::chrono::duration<double, std::milli>(decodeEnd - decodeStart).count();
+
+// Phase 1 & 2: Buffering 단계는 렌더링 안 함
+if (m_framesDecoded < 19) {
+    if (result == VAVCORE_PACKET_ACCEPTED || result == VAVCORE_SUCCESS) {
+        m_framesDecoded++;
+        LOGF_DEBUG("[FrameProcessor] Buffering frame %llu, no rendering yet", m_framesDecoded.load());
+        m_frameProcessing.store(false);
+        if (onComplete) onComplete(true);
+        return true;
+    }
+}
+
+// Error handling for all phases
+if (result != VAVCORE_SUCCESS) {
+    if (result == VAVCORE_END_OF_STREAM) {
+        LOGF_INFO("[FrameProcessor] End of stream");
+        m_frameProcessing.store(false);
+        if (onComplete) onComplete(true);
+        return false;
+    }
+
+    if (result == VAVCORE_PACKET_ACCEPTED) {
+        // VavCore CUDA DPB buffering
+        LOGF_DEBUG("[FrameProcessor] PACKET ACCEPTED - Frame buffered");
+        m_framesDecoded++;
+        m_frameProcessing.store(false);
+        if (onComplete) onComplete(true);
+        return true;
+    }
+
+    // All other errors
+    m_decodeErrors++;
+    LOGF_ERROR("[FrameProcessor] Decode ERROR: result=%d", result);
+    m_frameProcessing.store(false);
+    if (onComplete) onComplete(false);
+    return false;
+}
+
+m_framesDecoded++;
+LOGF_INFO("[FrameProcessor] DECODE: %.1f ms", decodeTime);
+
+// Phase 3: Enqueue render on UI thread (frame 19+)
+bool enqueued = m_dispatcherQueue.TryEnqueue([this, vavFrame, onComplete, player, processStart]() {
+    // ... (기존 렌더링 로직 유지)
+});
+```
+
+#### Step 2.3: CopyToStagingTexture 호출 제거
+- [ ] Lines 123-139의 CopyToStagingTexture + WaitForCopyCompletion 제거
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp`
+
+**제거할 코드** (lines 123-139):
+```cpp
+// ❌ 완전 제거
+// After successful decode, copy to staging texture for safe rendering
+if (result == VAVCORE_SUCCESS) {
+    auto backend = m_renderer->GetRGBASurfaceBackend();
+    if (backend) {
+        HRESULT hr = backend->CopyToStagingTexture(rgbaTexture);
+        if (FAILED(hr)) {
+            LOGF_ERROR("[FrameProcessor] Failed to copy to staging texture: 0x%08X", hr);
+        } else {
+            // Wait for GPU copy to complete before proceeding
+            hr = backend->WaitForCopyCompletion();
+            if (FAILED(hr)) {
+                LOGF_ERROR("[FrameProcessor] Failed to wait for copy completion: 0x%08X", hr);
+            } else {
+                LOGF_INFO("[FrameProcessor] GPU copy completed, staging texture ready");
+            }
+        }
+    }
+}
+```
+
+### Phase 3: D3D12VideoRenderer 수정
+
+#### Step 3.1: RenderVideoFrame() 수정
+- [ ] GetStagingTexture() 제거
+- [ ] GetCurrentRenderTexture() 사용
+
+**파일**: `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\D3D12VideoRenderer.cpp`
+
+**수정 위치**: RenderVideoFrame() 함수 내부
+
+**주요 변경사항**:
+```cpp
+// ❌ 제거 (기존 코드 없음, RenderToBackBuffer가 내부에서 처리)
+
+// ✅ RenderToBackBuffer에서 자동으로 GetCurrentRenderTexture() 사용
+// 추가 수정 불필요 (RGBASurfaceBackend::RenderToBackBuffer가 이미 올바른 텍스처 사용)
+```
+
+### Phase 4: 빌드 및 초기 테스트
+
+#### Step 4.1: 빌드 오류 수정
+- [ ] Vav2Player.vcxproj 빌드
+- [ ] 컴파일 오류 수정
+- [ ] 링크 오류 수정
+
+**빌드 명령어**:
+```bash
+cd "D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player"
+"/c/Program Files/Microsoft Visual Studio/2022/Community/MSBuild/Current/Bin/MSBuild.exe" Vav2Player.vcxproj //p:Configuration=Debug //p:Platform=x64 //v:minimal
+```
+
+#### Step 4.2: 기본 동작 테스트
+- [ ] 애플리케이션 실행
+- [ ] 비디오 로드
+- [ ] 초기 19프레임 버퍼링 확인
+- [ ] 정상 재생 시작 확인
+
+**예상 로그 패턴**:
+```
+[FrameProcessor] Initial buffering phase: frame 0/16
+...
+[FrameProcessor] Initial buffering phase: frame 15/16
+[FrameProcessor] Filling triple buffer: frame 16/19
+[RGBASurfaceBackend] AdvanceFrame: render 0->1, decode 0->1
+[FrameProcessor] Triple buffer[16] filled, advanced frame
+[FrameProcessor] Filling triple buffer: frame 17/19
+[RGBASurfaceBackend] AdvanceFrame: render 1->2, decode 1->2
+[FrameProcessor] Triple buffer[17] filled, advanced frame
+[FrameProcessor] Filling triple buffer: frame 18/19
+[RGBASurfaceBackend] AdvanceFrame: render 2->0, decode 2->0
+[FrameProcessor] Triple buffer[18] filled, advanced frame
+[FrameProcessor] Normal decoding: decode_idx=0, render_idx=0
+[FrameProcessor] Frame decoded, advanced to render_idx=1
+[FrameProcessor] DECODE: XX.X ms
+[FrameProcessor] RENDER: XX.X ms | PRESENT: XX.X ms
+```
+
+---
+
+## 테스트 계획
+
+### Test Case 1: 초기 버퍼링 단계 검증
+**목적**: 16-frame + 3-frame 버퍼링이 정상 동작하는지 확인
+
+**테스트 절차**:
+1. 비디오 파일 로드
+2. 재생 시작
+3. time.log 확인
+
+**예상 결과**:
+```
+✅ Frame 0-15: "Initial buffering phase" 로그 16개
+✅ Frame 16-18: "Filling triple buffer" 로그 3개
+✅ Frame 19+: "Normal decoding" 로그
+✅ 각 triple buffer 채우기 후 "AdvanceFrame" 로그
+```
+
+**실패 조건**:
+- ❌ 19프레임 이전에 렌더링 시도
+- ❌ Triple buffer 채우기 중 인덱스 전환 실패
+- ❌ NULL 텍스처 에러
+
+### Test Case 2: Triple Buffering 순환 검증
+**목적**: texture[0] → texture[1] → texture[2] → texture[0] 순환이 정상인지 확인
+
+**테스트 절차**:
+1. 30프레임 이상 재생
+2. time.log에서 render_idx, decode_idx 추적
+
+**예상 결과**:
+```
+✅ Frame 19: render_idx=0, decode_idx=0
+✅ Frame 20: render_idx=1, decode_idx=1
+✅ Frame 21: render_idx=2, decode_idx=2
+✅ Frame 22: render_idx=0, decode_idx=0 (순환 완료)
+✅ 모든 프레임에서 NULL 텍스처 없음
+```
+
+**실패 조건**:
+- ❌ Frame 22에서 texture[0] NULL 발생
+- ❌ render_idx와 decode_idx가 동일하지 않음
+- ❌ 순환 패턴이 깨짐
+
+### Test Case 3: 60fps 렌더링 안정성
+**목적**: 30fps 디코딩 + 60fps 렌더링 시나리오에서 안정성 확인
+
+**테스트 절차**:
+1. 60fps 모니터에서 비디오 재생 (VSync 활성화)
+2. 1분 이상 재생
+3. 프레임 드롭, 스터터링 확인
+
+**예상 결과**:
+```
+✅ 부드러운 60fps 렌더링
+✅ 프레임 드롭 0개
+✅ 같은 디코딩 프레임이 2번 렌더링됨
+✅ 메모리 누수 없음
+```
+
+**실패 조건**:
+- ❌ 스터터링 발생
+- ❌ 프레임 드롭 발생
+- ❌ 디코딩/렌더링 비동기 깨짐
+
+### Test Case 4: Seek 및 Reset 동작
+**목적**: 탐색 및 재시작 시 triple buffering이 올바르게 재초기화되는지 확인
+
+**테스트 절차**:
+1. 비디오 중간으로 seek
+2. 재생 재시작
+3. 버퍼링 단계가 다시 정상 실행되는지 확인
+
+**예상 결과**:
+```
+✅ Seek 후 16-frame buffering 재시작
+✅ Triple buffer 재초기화
+✅ render_idx, decode_idx 모두 0으로 리셋
+✅ 정상 재생 재개
+```
+
+**실패 조건**:
+- ❌ Seek 후 인덱스 리셋 실패
+- ❌ 버퍼링 단계 스킵
+- ❌ 이전 프레임 잔상
+
+### Test Case 5: 성능 측정
+**목적**: Staging texture 제거 후 성능 개선 확인
+
+**측정 항목**:
+- 디코딩 시간 (ms)
+- 렌더링 시간 (ms)
+- Present 시간 (ms)
+- 전체 프레임 처리 시간 (ms)
+
+**예상 결과**:
+```
+✅ 디코딩 시간: 10-15ms (기존과 동일)
+✅ 렌더링 시간: 0.4-0.8ms (기존과 동일 또는 개선)
+✅ GPU copy 제거로 2-5ms 절약
+✅ 전체 처리 시간: 11-13ms (개선)
+```
+
+**비교 기준** (기존 staging texture 방식):
+```
+[FrameProcessor] DECODE: 34.0 ms (late binding 포함)
+[FrameProcessor] RENDER: 0.8 ms | PRESENT: 1.9 ms | TOTAL: 37.1 ms
+```
+
+---
+
+## 위험 요소 및 대응
+
+### 위험 1: SRV 업데이트 오버헤드
+**문제**: 매 프레임마다 `UpdateSRVForCurrentRenderTexture()` 호출 시 성능 저하 가능
+
+**대응 방안**:
+- Option A: SRV를 3개 생성하고 descriptor table로 선택
+- Option B: SRV 업데이트가 충분히 빠르다면 현재 방식 유지 (권장)
+
+**검증 방법**:
+```cpp
+auto srvUpdateStart = std::chrono::high_resolution_clock::now();
+UpdateSRVForCurrentRenderTexture();
+auto srvUpdateEnd = std::chrono::high_resolution_clock::now();
+double srvUpdateTime = std::chrono::duration<double, std::milli>(srvUpdateEnd - srvUpdateStart).count();
+LOGF_DEBUG("[RGBASurfaceBackend] SRV update time: %.3f ms", srvUpdateTime);
+```
+
+### 위험 2: 인덱스 동기화 실패
+**문제**: render_idx와 decode_idx가 올바르게 전환되지 않아 같은 텍스처를 동시에 읽기/쓰기
+
+**대응 방안**:
+- 모든 AdvanceFrame() 호출에 로깅 추가
+- 인덱스 충돌 감지 로직 추가
+
+**검증 코드**:
+```cpp
+void RGBASurfaceBackend::AdvanceFrame() {
+    int nextRender = (m_renderTextureIndex + 1) % BUFFER_COUNT;
+    int nextDecode = (m_decodeTextureIndex + 1) % BUFFER_COUNT;
+
+    // Safety check: render and decode should move together
+    if (nextRender != nextDecode) {
+        LOGF_ERROR("[RGBASurfaceBackend] Index mismatch! render=%d, decode=%d", nextRender, nextDecode);
+    }
+
+    m_renderTextureIndex = nextRender;
+    m_decodeTextureIndex = nextDecode;
+}
+```
+
+### 위험 3: Triple buffer 채우기 실패
+**문제**: Frames 16-18 디코딩 중 에러 발생 시 버퍼가 부분적으로만 채워짐
+
+**대응 방안**:
+- 에러 발생 시 재시도 로직
+- 최소 버퍼 확보 검증
+
+**검증 코드**:
+```cpp
+if (m_framesDecoded >= 19) {
+    // Verify all buffers are filled
+    for (int i = 0; i < BUFFER_COUNT; i++) {
+        if (!m_rgbaTextures[i].Get()) {
+            LOGF_ERROR("[RGBASurfaceBackend] Triple buffer[%d] is NULL!", i);
+            return E_FAIL;
+        }
+    }
+}
+```
+
+---
+
+## 성공 기준
+
+### 필수 달성 목표
+1. ✅ **빌드 성공**: 모든 컴파일/링크 오류 해결
+2. ✅ **NULL 텍스처 제거**: Frame 19+ NULL 텍스처 문제 완전 해결
+3. ✅ **부드러운 재생**: 30fps 디코딩 + 60fps 렌더링 안정적 동작
+4. ✅ **코드 단순화**: Staging texture 관련 ~200줄 제거
+
+### 우수 달성 목표
+1. ✅ **성능 개선**: GPU copy 제거로 2-5ms 절약
+2. ✅ **메모리 효율**: Staging texture 제거로 VRAM 절약
+3. ✅ **코드 가독성**: 명확한 triple buffering 구조
+
+### 탁월 달성 목표
+1. ✅ **확장성**: 다른 렌더링 백엔드에도 적용 가능한 구조
+2. ✅ **문서화**: 완전한 설계 문서 및 코드 주석
+3. ✅ **테스트 커버리지**: 5개 테스트 케이스 모두 통과
+
+---
+
+## 일정 및 마일스톤
+
+### Milestone 1: 코드 리팩토링 (1-2시간)
+- [ ] Phase 1: RGBASurfaceBackend 리팩토링
+- [ ] Phase 2: FrameProcessor 수정
+- [ ] Phase 3: D3D12VideoRenderer 수정
+- [ ] Phase 4: 빌드 및 초기 테스트
+
+### Milestone 2: 기능 검증 (30분)
+- [ ] Test Case 1: 초기 버퍼링 단계 검증
+- [ ] Test Case 2: Triple Buffering 순환 검증
+- [ ] Test Case 3: 60fps 렌더링 안정성
+
+### Milestone 3: 고급 테스트 (30분)
+- [ ] Test Case 4: Seek 및 Reset 동작
+- [ ] Test Case 5: 성능 측정
+- [ ] 문서 업데이트
+
+**총 예상 시간**: 2-3시간
+
+---
+
+## 참고 자료
+
+### 관련 파일
+- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.h`
+- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\RGBASurfaceBackend.cpp`
+- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.h`
+- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Playback\FrameProcessor.cpp`
+- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\D3D12VideoRenderer.h`
+- `D:\Project\video-av1\vav2\platforms\windows\applications\vav2player\Vav2Player\src\Rendering\D3D12VideoRenderer.cpp`
+
+### 기술 문서
+- [Vav2Player Stutter Fix Design](Vav2Player_Stutter_Fix_Design.md) - Staging texture 도입 배경
+- [VavCore NVDEC DPB Redesign](../../../docs/completed/windows/VavCore_NVDEC_DPB_Redesign.md) - CUDA DPB 구조
+- [Vav2Player NVDEC DPB Integration](../../../docs/completed/windows/Vav2Player_NVDEC_DPB_Integration.md) - Late binding 메커니즘
+
+---
+
+*문서 버전: 1.0*
+*최종 수정: 2025-10-10*
diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj b/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj
index 5ac3392..d3e4054 100644
--- a/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj
+++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/Vav2Player.vcxproj
@@ -359,13 +359,19 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <PostBuildEvent>
-      <Command>echo Copying VavCore Debug DLL...
-copy "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" "$(LayoutDir)\VavCore-debug.dll"
-echo DLL copy completed.</Command>
-      <Message>Copying VavCore-debug.dll to output directory</Message>
+      <Command>echo Copying VavCore Debug DLL to AppX directory...
+echo Source: "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll"
+echo Target: "$(LayoutDir)\VavCore-debug.dll"
+copy /Y "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" "$(LayoutDir)\VavCore-debug.dll"
+if errorlevel 1 (
+  echo ERROR: Failed to copy VavCore-debug.dll
+  exit /b 1
+)
+echo DLL copy completed successfully.</Command>
+      <Message>Copying VavCore-debug.dll to AppX directory</Message>
     </PostBuildEvent>
     <PreBuildEvent>
-      <Command>del "$(LayoutDir)\VavCore-debug.dll"</Command>
+      <Command>if exist "$(LayoutDir)\VavCore-debug.dll" del "$(LayoutDir)\VavCore-debug.dll"</Command>
     </PreBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp
index 7050825..c58e3db 100644
--- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp
+++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Playback/FrameProcessor.cpp
@@ -102,40 +102,65 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
 
             // Expected: VAVCORE_PACKET_ACCEPTED for first 16 frames
             // No rendering during buffering phase
-        } else {
-            // Phase 2: Normal decoding with D3D12 surface (17th frame onwards)
-            ID3D12Resource* rgbaTexture = m_renderer->GetNextRGBATextureForCUDAInterop();
-            if (!rgbaTexture) {
-                LOGF_ERROR("[FrameProcessor] Failed to get next RGBA texture");
+        }
+        // Phase 2: Triple buffer filling (frames 16-18)
+        // Fill textures 0, 1, 2 before starting normal operation
+        else if (m_framesDecoded < 19) {
+            auto backend = m_renderer->GetRGBASurfaceBackend();
+            if (!backend) {
+                LOGF_ERROR("[FrameProcessor] Failed to get RGBASurfaceBackend");
                 m_frameProcessing.store(false);
                 if (onComplete) onComplete(false);
                 return false;
             }
 
+            ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
+            int decodeIndex = backend->GetDecodeTextureIndex();
+            LOGF_INFO("[FrameProcessor] Triple buffer filling: frame %llu -> texture[%d]",
+                      m_framesDecoded.load(), decodeIndex);
+
             result = vavcore_decode_to_surface(
                 player,
                 VAVCORE_SURFACE_D3D12_RESOURCE,
-                rgbaTexture,
+                decodeTexture,
                 &vavFrame
             );
 
-            // After successful decode, copy to staging texture for safe rendering
+            // After successful decode, advance decode index only (render index stays at 0)
             if (result == VAVCORE_SUCCESS) {
-                auto backend = m_renderer->GetRGBASurfaceBackend();
-                if (backend) {
-                    HRESULT hr = backend->CopyToStagingTexture(rgbaTexture);
-                    if (FAILED(hr)) {
-                        LOGF_ERROR("[FrameProcessor] Failed to copy to staging texture: 0x%08X", hr);
-                    } else {
-                        // Wait for GPU copy to complete before proceeding
-                        hr = backend->WaitForCopyCompletion();
-                        if (FAILED(hr)) {
-                            LOGF_ERROR("[FrameProcessor] Failed to wait for copy completion: 0x%08X", hr);
-                        } else {
-                            LOGF_INFO("[FrameProcessor] GPU copy completed, staging texture ready");
-                        }
-                    }
-                }
+                backend->AdvanceDecodeOnly();
+                LOGF_INFO("[FrameProcessor] Triple buffer filled: texture[%d] ready", decodeIndex);
+            }
+        }
+        // Phase 3: Normal operation (frame 19+)
+        // Render from current texture, decode into next texture
+        else {
+            auto backend = m_renderer->GetRGBASurfaceBackend();
+            if (!backend) {
+                LOGF_ERROR("[FrameProcessor] Failed to get RGBASurfaceBackend");
+                m_frameProcessing.store(false);
+                if (onComplete) onComplete(false);
+                return false;
+            }
+
+            ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
+            int decodeIndex = backend->GetDecodeTextureIndex();
+            int renderIndex = backend->GetRenderTextureIndex();
+            LOGF_DEBUG("[FrameProcessor] Normal operation: render[%d], decode[%d]",
+                       renderIndex, decodeIndex);
+
+            result = vavcore_decode_to_surface(
+                player,
+                VAVCORE_SURFACE_D3D12_RESOURCE,
+                decodeTexture,
+                &vavFrame
+            );
+
+            // After successful decode, advance frame indices
+            if (result == VAVCORE_SUCCESS) {
+                backend->AdvanceFrame();
+                LOGF_DEBUG("[FrameProcessor] Frame advanced: render[%d]->render[%d]",
+                           renderIndex, backend->GetRenderTextureIndex());
             }
         }
     }
@@ -157,6 +182,10 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
             // No frame is ready yet - VavCore will return it in a future call
             LOGF_DEBUG("[FrameProcessor] PACKET ACCEPTED - Frame buffered in VavCore CUDA DPB (16-frame buffering)");
 
+            // CRITICAL: Increment m_framesDecoded for buffered packets
+            // This counter determines when we switch from NULL surface (buffering) to valid surface (rendering)
+            m_framesDecoded++;
+
             // No action needed - just wait for next timing tick
             // VavCore will return the buffered frame when ready
             m_frameProcessing.store(false);
diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp
index eed30b3..84d9144 100644
--- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp
+++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.cpp
@@ -268,13 +268,6 @@ ID3D12Resource* D3D12VideoRenderer::GetRGBATextureForCUDAInterop() const {
     return nullptr;
 }
 
-ID3D12Resource* D3D12VideoRenderer::GetNextRGBATextureForCUDAInterop() {
-    if (m_rgbaSurfaceBackend) {
-        return m_rgbaSurfaceBackend->GetNextVideoTexture();
-    }
-    return nullptr;
-}
-
 uint8_t* D3D12VideoRenderer::GetYMappedBuffer(uint32_t bufferIndex) const {
     if (m_yuv420pUploadBackend) {
         return m_yuv420pUploadBackend->GetYMappedBuffer(bufferIndex);
@@ -496,6 +489,13 @@ IVideoBackend* D3D12VideoRenderer::SelectBackend(const VavCoreVideoFrame& frame)
 }
 
 HRESULT D3D12VideoRenderer::EnsureVideoTexture(const VavCoreVideoFrame& frame) {
+    // Skip if frame has invalid dimensions (can happen during CUDA DPB buffering)
+    if (frame.width == 0 || frame.height == 0) {
+        LOGF_DEBUG("[D3D12VideoRenderer] Skipping texture creation for invalid frame dimensions: %dx%d",
+                   frame.width, frame.height);
+        return S_OK;  // Not an error, just skip texture creation
+    }
+
     // Check if we need to create/recreate video texture
     if (m_videoWidth != (uint32_t)frame.width || m_videoHeight != (uint32_t)frame.height) {
         IVideoBackend* backend = SelectBackend(frame);
diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h
index 30de1d8..e6fd999 100644
--- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h
+++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/D3D12VideoRenderer.h
@@ -59,10 +59,9 @@ public:
 
     // Backend-specific texture access for CUDA interop
     ID3D12Resource* GetRGBATextureForCUDAInterop() const;
-    ID3D12Resource* GetNextRGBATextureForCUDAInterop();  // Rotates to next buffer for triple buffering
     ID3D12Resource* GetNV12TextureForCUDAInterop() const { return nullptr; }  // Future: NV12DirectBackend
 
-    // Get RGBASurfaceBackend for staging texture operations
+    // Get RGBASurfaceBackend for triple buffer management
     RGBASurfaceBackend* GetRGBASurfaceBackend() const { return m_rgbaSurfaceBackend.get(); }
 
     // Legacy YUV420P upload buffer access (for backward compatibility)
diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp
index 409adad..f87a688 100644
--- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp
+++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.cpp
@@ -54,21 +54,8 @@ void RGBASurfaceBackend::Shutdown() {
     for (int i = 0; i < BUFFER_COUNT; i++) {
         m_rgbaTextures[i].Reset();
     }
-    m_currentTextureIndex = 0;
-
-    // Release staging texture and copy command objects
-    m_copyCommandList.Reset();
-    m_copyCommandAllocator.Reset();
-    m_stagingTexture.Reset();
-
-    // Close fence event handle
-    if (m_copyFenceEvent != nullptr) {
-        CloseHandle(m_copyFenceEvent);
-        m_copyFenceEvent = nullptr;
-    }
-
-    // Release fence
-    m_copyFence.Reset();
+    m_renderTextureIndex = 0;
+    m_decodeTextureIndex = 0;
 
     // Clear references (not owned)
     m_device = nullptr;
@@ -78,15 +65,13 @@ void RGBASurfaceBackend::Shutdown() {
 }
 
 HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) {
+    LOGF_INFO("[RGBASurfaceBackend] CreateVideoTexture called: %ux%u", width, height);
     m_videoWidth = width;
     m_videoHeight = height;
 
     HRESULT hr = S_OK;
 
     // Create RGBA texture descriptor for CUDA Surface Object write
-    // Format: DXGI_FORMAT_R8G8B8A8_UNORM (4 bytes per pixel)
-    // Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (enables CUDA Surface Object creation)
-    // Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, CUDA Surface Objects handle this automatically)
     D3D12_RESOURCE_DESC rgbaTextureDesc = {};
     rgbaTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
     rgbaTextureDesc.Width = width;
@@ -96,8 +81,8 @@ HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height)
     rgbaTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
     rgbaTextureDesc.SampleDesc.Count = 1;
     rgbaTextureDesc.SampleDesc.Quality = 0;
-    rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;  // Tiled layout
-    rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;  // Enable CUDA write
+    rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
+    rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
 
     D3D12_HEAP_PROPERTIES defaultHeapProps = {};
     defaultHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
@@ -108,110 +93,48 @@ HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height)
     for (int i = 0; i < BUFFER_COUNT; i++) {
         hr = m_device->CreateCommittedResource(
             &defaultHeapProps,
-            D3D12_HEAP_FLAG_SHARED,  // Required for CUDA interop
+            D3D12_HEAP_FLAG_SHARED,
             &rgbaTextureDesc,
-            D3D12_RESOURCE_STATE_COMMON,  // CUDA will transition as needed
+            D3D12_RESOURCE_STATE_COMMON,
             nullptr,
             IID_PPV_ARGS(&m_rgbaTextures[i])
         );
 
         if (FAILED(hr)) {
-            // Cleanup already created textures
+            LOGF_ERROR("[RGBASurfaceBackend] Failed to create RGBA texture[%d]: 0x%08X", i, hr);
             for (int j = 0; j < i; j++) {
                 m_rgbaTextures[j].Reset();
             }
             return hr;
         }
+
+        LOGF_INFO("[RGBASurfaceBackend] Created RGBA texture[%d]: %p", i, m_rgbaTextures[i].Get());
     }
 
-    m_currentTextureIndex = 0;
+    // Triple buffer filling logic:
+    // - Frames 16-18 fill textures 0, 1, 2 (decode only, no rendering yet)
+    // - Frame 19+ normal operation (decode into different texture than render)
+    //
+    // Initial state for filling phase:
+    // - decodeIndex = 0 (will fill texture[0], then [1], then [2])
+    // - renderIndex = 2 (will render from texture[2] after filling completes)
+    //
+    // After filling completes (frame 18):
+    // - decodeIndex = 0 (wraps back after filling [2])
+    // - renderIndex = 2 (will render from texture[2] at frame 19)
+    // - Frame 19: render from [2], decode into [0] (no conflict!)
+    m_renderTextureIndex = 2;
+    m_decodeTextureIndex = 0;
 
-    // Create staging texture (same format, but no UAV flag - only for rendering)
-    D3D12_RESOURCE_DESC stagingTextureDesc = rgbaTextureDesc;
-    stagingTextureDesc.Flags = D3D12_RESOURCE_FLAG_NONE;  // No CUDA access needed
+    LOGF_INFO("[RGBASurfaceBackend] All %d RGBA textures created successfully", BUFFER_COUNT);
 
-    hr = m_device->CreateCommittedResource(
-        &defaultHeapProps,
-        D3D12_HEAP_FLAG_NONE,
-        &stagingTextureDesc,
-        D3D12_RESOURCE_STATE_COPY_DEST,  // Initial state for receiving copies
-        nullptr,
-        IID_PPV_ARGS(&m_stagingTexture)
-    );
-
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to create staging texture: 0x%08X", hr);
-        for (int i = 0; i < BUFFER_COUNT; i++) {
-            m_rgbaTextures[i].Reset();
-        }
-        return hr;
-    }
-
-    // Create fence for GPU copy synchronization
-    hr = m_device->CreateFence(
-        0,
-        D3D12_FENCE_FLAG_NONE,
-        IID_PPV_ARGS(&m_copyFence)
-    );
-
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy fence: 0x%08X", hr);
-        m_stagingTexture.Reset();
-        for (int i = 0; i < BUFFER_COUNT; i++) {
-            m_rgbaTextures[i].Reset();
-        }
-        return hr;
-    }
-
-    // Create fence event for CPU wait
-    m_copyFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
-    if (m_copyFenceEvent == nullptr) {
-        hr = HRESULT_FROM_WIN32(GetLastError());
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to create fence event: 0x%08X", hr);
-        m_copyFence.Reset();
-        m_stagingTexture.Reset();
-        for (int i = 0; i < BUFFER_COUNT; i++) {
-            m_rgbaTextures[i].Reset();
-        }
-        return hr;
-    }
-
-    LOGF_INFO("[RGBASurfaceBackend] Copy fence and event created successfully");
-
-    // Create command allocator and list for texture copy operations
-    hr = m_device->CreateCommandAllocator(
-        D3D12_COMMAND_LIST_TYPE_DIRECT,
-        IID_PPV_ARGS(&m_copyCommandAllocator)
-    );
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy command allocator: 0x%08X", hr);
-        return hr;
-    }
-
-    hr = m_device->CreateCommandList(
-        0,
-        D3D12_COMMAND_LIST_TYPE_DIRECT,
-        m_copyCommandAllocator.Get(),
-        nullptr,
-        IID_PPV_ARGS(&m_copyCommandList)
-    );
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy command list: 0x%08X", hr);
-        return hr;
-    }
-
-    // Close the command list (will be reset when needed)
-    m_copyCommandList->Close();
-
-    LOGF_INFO("[RGBASurfaceBackend] Created staging texture for safe rendering");
-
-    // Create SRV for RGBA texture
+    // Create SRV for rendering
     hr = CreateSrvHeap();
     if (FAILED(hr)) {
         return hr;
     }
 
-    // Update constant buffer with new aspect ratio
+    // Update constant buffer
     hr = UpdateConstantBuffer();
     if (FAILED(hr)) {
         return hr;
@@ -226,8 +149,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
     ID3D12GraphicsCommandList* commandList,
     D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle)
 {
-    // RGBASurfaceBackend doesn't need RTV (uses CopyResource)
-    (void)rtvHandle;
+    (void)rtvHandle;  // RGBASurfaceBackend doesn't use external RTV
     if (!m_initialized) {
         return E_NOT_VALID_STATE;
     }
@@ -236,17 +158,18 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
         return E_INVALIDARG;
     }
 
-    // Use staging texture for rendering (safe from decoder overwrites)
-    ID3D12Resource* renderTexture = m_stagingTexture.Get();
+    // Use current render texture (already decoded, safe to read)
+    ID3D12Resource* renderTexture = m_rgbaTextures[m_renderTextureIndex].Get();
     if (!renderTexture) {
-        LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: staging texture is NULL!");
+        LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: render texture[%d] is NULL!", m_renderTextureIndex);
         return E_INVALIDARG;
     }
 
-    LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using staging texture, ptr=%p", renderTexture);
+    LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using texture[%d], ptr=%p",
+               m_renderTextureIndex, renderTexture);
 
-    // Staging texture is already in PIXEL_SHADER_RESOURCE state (set by CopyToStagingTexture)
-    // No barrier needed here
+    // Render texture is in COMMON state (CUDA managed)
+    // No barrier needed for reading in pixel shader
 
     // Transition back buffer to render target
     D3D12_RESOURCE_BARRIER barrierToRT = {};
@@ -258,13 +181,12 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
     barrierToRT.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
     commandList->ResourceBarrier(1, &barrierToRT);
 
-    // Create RTV for back buffer (not needed anymore - use rtvHandle from parameter)
+    // Create RTV for back buffer
     D3D12_CPU_DESCRIPTOR_HANDLE backBufferRtvHandle;
     D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
     rtvDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
     rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
 
-    // Create temporary RTV heap for back buffer
     ComPtr<ID3D12DescriptorHeap> rtvHeap;
     D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
     rtvHeapDesc.NumDescriptors = 1;
@@ -287,7 +209,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
     ID3D12DescriptorHeap* heaps[] = { m_srvHeap.Get() };
     commandList->SetDescriptorHeaps(1, heaps);
 
-    // Use descriptor for staging texture (index 0, the only descriptor)
+    // Use SRV for current render texture
     CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetGPUDescriptorHandleForHeapStart());
     commandList->SetGraphicsRootDescriptorTable(0, srvHandle);
     commandList->SetGraphicsRootConstantBufferView(1, m_constantBuffer->GetGPUVirtualAddress());
@@ -314,7 +236,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
 
     // Draw fullscreen quad
     commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
-    commandList->DrawInstanced(6, 1, 0, 0);  // Fullscreen quad (2 triangles)
+    commandList->DrawInstanced(6, 1, 0, 0);
 
     // Transition back buffer to present
     D3D12_RESOURCE_BARRIER barrierToPresent = {};
@@ -326,9 +248,6 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
     barrierToPresent.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
     commandList->ResourceBarrier(1, &barrierToPresent);
 
-    // Staging texture remains in PIXEL_SHADER_RESOURCE state (no transition needed)
-    // It will be transitioned back to COPY_DEST when CopyToStagingTexture is called next
-
     return S_OK;
 }
 
@@ -536,9 +455,9 @@ HRESULT RGBASurfaceBackend::CreatePipelineState() {
 }
 
 HRESULT RGBASurfaceBackend::CreateSrvHeap() {
-    // Create descriptor heap with 1 descriptor for staging texture
+    // Create descriptor heap with 1 descriptor for current render texture
     D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
-    srvHeapDesc.NumDescriptors = 1;  // Only need SRV for staging texture
+    srvHeapDesc.NumDescriptors = 1;
     srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
     srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
 
@@ -547,22 +466,13 @@ HRESULT RGBASurfaceBackend::CreateSrvHeap() {
         return hr;
     }
 
-    // Create SRV for staging texture (the only texture used for rendering)
-    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
-    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
-    srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
-    srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
-    srvDesc.Texture2D.MipLevels = 1;
+    // Create initial SRV for texture[0] (m_renderTextureIndex = 0)
+    hr = UpdateSRVForCurrentRenderTexture();
+    if (FAILED(hr)) {
+        return hr;
+    }
 
-    CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart());
-
-    m_device->CreateShaderResourceView(
-        m_stagingTexture.Get(),
-        &srvDesc,
-        srvHandle
-    );
-
-    LOGF_INFO("[RGBASurfaceBackend] Created SRV for staging texture");
+    LOGF_INFO("[RGBASurfaceBackend] Created SRV heap for render texture");
 
     return S_OK;
 }
@@ -605,138 +515,80 @@ HRESULT RGBASurfaceBackend::UpdateConstantBuffer() {
     return S_OK;
 }
 
-ID3D12Resource* RGBASurfaceBackend::GetNextVideoTexture() {
-    // Rotate to next buffer index
-    int prevIndex = m_currentTextureIndex;
-    m_currentTextureIndex = (m_currentTextureIndex + 1) % BUFFER_COUNT;
-
-    LOGF_INFO("[RGBASurfaceBackend] GetNextVideoTexture: %d -> %d, texture=%p",
-              prevIndex, m_currentTextureIndex, m_rgbaTextures[m_currentTextureIndex].Get());
-
-    return m_rgbaTextures[m_currentTextureIndex].Get();
+// Triple buffering management functions
+ID3D12Resource* RGBASurfaceBackend::GetCurrentRenderTexture() const {
+    return m_rgbaTextures[m_renderTextureIndex].Get();
 }
 
-HRESULT RGBASurfaceBackend::CopyToStagingTexture(ID3D12Resource* sourceTexture) {
-    if (!m_initialized || !m_stagingTexture || !sourceTexture) {
+ID3D12Resource* RGBASurfaceBackend::GetNextDecodeTexture() const {
+    return m_rgbaTextures[m_decodeTextureIndex].Get();
+}
+
+void RGBASurfaceBackend::AdvanceDecodeOnly() {
+    int prevDecode = m_decodeTextureIndex;
+
+    // Filling phase: Only advance decode index, render index stays at 0
+    // This is used during frames 16-18 to fill all three textures
+    m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT;
+
+    LOGF_INFO("[RGBASurfaceBackend] AdvanceDecodeOnly: decode %d->%d (render stays at %d)",
+              prevDecode, m_decodeTextureIndex, m_renderTextureIndex);
+}
+
+void RGBASurfaceBackend::AdvanceFrame() {
+    int prevRender = m_renderTextureIndex;
+    int prevDecode = m_decodeTextureIndex;
+
+    // Triple buffering advance logic:
+    // After Frame N decodes into decodeTexture, we want:
+    // - Render from the PREVIOUS frame's texture (N-1), not the current one!
+    // - Decode into the oldest texture (N-2)
+    //
+    // Current state: render=R, decode=D
+    // After decode completes: the old render texture becomes new decode target
+    // The old decode texture will be rendered NEXT frame (not this frame!)
+    //
+    // Example with 3 textures:
+    // State: render=2, decode=0
+    // - Frame 19 decodes into texture[0] (now contains frame 19)
+    // - Advance: render=2 (still showing frame 18!), decode=1
+    // - Frame 20 decodes into texture[1] (now contains frame 20)
+    // - Advance: render=0 (now showing frame 19), decode=2
+    //
+    // This ensures we NEVER render from a texture that was just written!
+    m_decodeTextureIndex = m_renderTextureIndex;  // Old render texture becomes next decode target
+    m_renderTextureIndex = prevDecode;             // Old decode texture becomes new render texture
+
+    LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render %d->%d, decode %d->%d",
+              prevRender, m_renderTextureIndex, prevDecode, m_decodeTextureIndex);
+
+    // Update SRV to point to new render texture
+    HRESULT hr = UpdateSRVForCurrentRenderTexture();
+    if (FAILED(hr)) {
+        LOGF_ERROR("[RGBASurfaceBackend] Failed to update SRV: 0x%08X", hr);
+    }
+}
+
+HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() {
+    if (!m_srvHeap || !m_rgbaTextures[m_renderTextureIndex]) {
         return E_NOT_VALID_STATE;
     }
 
-    // Reset command allocator and list
-    HRESULT hr = m_copyCommandAllocator->Reset();
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to reset copy command allocator: 0x%08X", hr);
-        return hr;
-    }
+    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+    srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+    srvDesc.Texture2D.MipLevels = 1;
 
-    hr = m_copyCommandList->Reset(m_copyCommandAllocator.Get(), nullptr);
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to reset copy command list: 0x%08X", hr);
-        return hr;
-    }
+    CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart());
 
-    // Transition source texture to COPY_SOURCE
-    D3D12_RESOURCE_BARRIER sourceBarrier = {};
-    sourceBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
-    sourceBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
-    sourceBarrier.Transition.pResource = sourceTexture;
-    sourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;  // CUDA uses COMMON
-    sourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
-    sourceBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
-
-    // Transition staging texture to COPY_DEST (only if not first copy)
-    // First copy: staging texture already in COPY_DEST state (created with that state)
-    // Subsequent copies: staging texture in PIXEL_SHADER_RESOURCE state (from previous render)
-    if (m_firstCopy) {
-        // First copy: only transition source
-        m_copyCommandList->ResourceBarrier(1, &sourceBarrier);
-        m_firstCopy = false;
-        LOGF_DEBUG("[RGBASurfaceBackend] First copy: staging texture already in COPY_DEST state");
-    } else {
-        // Subsequent copies: transition both staging and source
-        D3D12_RESOURCE_BARRIER stagingToCopyDest = {};
-        stagingToCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
-        stagingToCopyDest.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
-        stagingToCopyDest.Transition.pResource = m_stagingTexture.Get();
-        stagingToCopyDest.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
-        stagingToCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
-        stagingToCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
-
-        D3D12_RESOURCE_BARRIER barriers[] = { stagingToCopyDest, sourceBarrier };
-        m_copyCommandList->ResourceBarrier(2, barriers);
-    }
-
-    // Copy texture
-    m_copyCommandList->CopyResource(m_stagingTexture.Get(), sourceTexture);
-
-    // Transition source back to COMMON (for CUDA)
-    sourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
-    sourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
-    m_copyCommandList->ResourceBarrier(1, &sourceBarrier);
-
-    // Transition staging texture to PIXEL_SHADER_RESOURCE for rendering
-    D3D12_RESOURCE_BARRIER stagingBarrier = {};
-    stagingBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
-    stagingBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
-    stagingBarrier.Transition.pResource = m_stagingTexture.Get();
-    stagingBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
-    stagingBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
-    stagingBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
-
-    m_copyCommandList->ResourceBarrier(1, &stagingBarrier);
-
-    // Close command list
-    hr = m_copyCommandList->Close();
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to close copy command list: 0x%08X", hr);
-        return hr;
-    }
-
-    // Execute command list
-    ID3D12CommandList* commandLists[] = { m_copyCommandList.Get() };
-    m_commandQueue->ExecuteCommandLists(1, commandLists);
-
-    // Signal fence after copy submission
-    m_copyFenceValue++;
-    hr = m_commandQueue->Signal(m_copyFence.Get(), m_copyFenceValue);
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] Failed to signal copy fence: 0x%08X", hr);
-        return hr;
-    }
-
-    LOGF_DEBUG("[RGBASurfaceBackend] GPU copy submitted (fence value: %llu)", m_copyFenceValue);
-
-    return S_OK;
-}
-
-HRESULT RGBASurfaceBackend::WaitForCopyCompletion() {
-    if (!m_copyFence || m_copyFenceEvent == nullptr) {
-        LOGF_ERROR("[RGBASurfaceBackend] Copy fence or event not initialized");
-        return E_NOT_VALID_STATE;
-    }
-
-    // Check if copy already completed
-    if (m_copyFence->GetCompletedValue() >= m_copyFenceValue) {
-        LOGF_DEBUG("[RGBASurfaceBackend] GPU copy already complete (fence value: %llu)", m_copyFenceValue);
-        return S_OK;  // Already complete
-    }
-
-    // Wait for GPU copy to complete
-    HRESULT hr = m_copyFence->SetEventOnCompletion(
-        m_copyFenceValue,
-        m_copyFenceEvent
+    m_device->CreateShaderResourceView(
+        m_rgbaTextures[m_renderTextureIndex].Get(),
+        &srvDesc,
+        srvHandle
     );
-    if (FAILED(hr)) {
-        LOGF_ERROR("[RGBASurfaceBackend] SetEventOnCompletion failed: 0x%08X", hr);
-        return hr;
-    }
 
-    DWORD waitResult = WaitForSingleObject(m_copyFenceEvent, 5000);  // 5 second timeout
-    if (waitResult != WAIT_OBJECT_0) {
-        LOGF_ERROR("[RGBASurfaceBackend] Wait failed or timed out: %lu", waitResult);
-        return E_FAIL;
-    }
-
-    LOGF_DEBUG("[RGBASurfaceBackend] GPU copy completed (fence value: %llu)", m_copyFenceValue);
+    LOGF_DEBUG("[RGBASurfaceBackend] Updated SRV for render texture[%d]", m_renderTextureIndex);
 
     return S_OK;
 }
diff --git a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h
index dc3f506..6857190 100644
--- a/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h
+++ b/vav2/platforms/windows/applications/vav2player/Vav2Player/src/Rendering/RGBASurfaceBackend.h
@@ -39,24 +39,25 @@ public:
 
     HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
     ID3D12Resource* GetVideoTexture() const override {
-        return m_rgbaTextures[m_currentTextureIndex].Get();
+        return m_rgbaTextures[m_renderTextureIndex].Get();
     }
 
-    // Get next available texture for decoding (rotates buffer index)
-    ID3D12Resource* GetNextVideoTexture();
+    // Triple buffering management
+    // Get current texture being rendered to screen
+    ID3D12Resource* GetCurrentRenderTexture() const;
 
-    // Get current rendering texture index
-    int GetCurrentTextureIndex() const { return m_currentTextureIndex; }
+    // Get next texture for decoding (not currently being rendered)
+    ID3D12Resource* GetNextDecodeTexture() const;
 
-    // Staging texture management for safe rendering
-    // Copy decoder texture to stable staging texture
-    HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture);
+    // Advance frame: switch render/decode indices after decoding completes (normal operation)
+    void AdvanceFrame();
 
-    // Wait for GPU copy to complete
-    HRESULT WaitForCopyCompletion();
+    // Advance decode only: move to next decode texture without changing render index (filling phase)
+    void AdvanceDecodeOnly();
 
-    // Get stable staging texture for rendering (never overwritten by decoder)
-    ID3D12Resource* GetStagingTexture() const { return m_stagingTexture.Get(); }
+    // Get current indices for debugging
+    int GetRenderTextureIndex() const { return m_renderTextureIndex; }
+    int GetDecodeTextureIndex() const { return m_decodeTextureIndex; }
 
     HRESULT RenderToBackBuffer(
         const VavCoreVideoFrame& frame,
@@ -73,27 +74,19 @@ private:
     ID3D12Device* m_device = nullptr;
     ID3D12CommandQueue* m_commandQueue = nullptr;
 
-    // RGBA video textures (triple buffering) - Decoder writes here
+    // RGBA video textures (triple buffering)
     // Format: DXGI_FORMAT_R8G8B8A8_UNORM
     // Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (for CUDA Surface Object)
     // Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, handled by CUDA Surface Objects)
+    //
+    // Triple buffering roles:
+    // - m_renderTextureIndex: Currently rendering to screen (safe to read)
+    // - m_decodeTextureIndex: Next target for decoding (safe to write)
+    // - Third texture: Idle, previously decoded (ready to become render texture)
     static const int BUFFER_COUNT = 3;
     ComPtr<ID3D12Resource> m_rgbaTextures[BUFFER_COUNT];
-    int m_currentTextureIndex = 0;
-
-    // Staging texture - Stable copy for rendering (decoder never touches this)
-    // This texture is copied from m_rgbaTextures at 30fps
-    // Renderer always reads from this texture (safe from race conditions)
-    ComPtr<ID3D12Resource> m_stagingTexture;
-
-    // Command allocator and list for async texture copy
-    ComPtr<ID3D12CommandAllocator> m_copyCommandAllocator;
-    ComPtr<ID3D12GraphicsCommandList> m_copyCommandList;
-
-    // GPU synchronization for copy operations
-    ComPtr<ID3D12Fence> m_copyFence;
-    UINT64 m_copyFenceValue = 0;
-    HANDLE m_copyFenceEvent = nullptr;
+    int m_renderTextureIndex = 0;  // Texture currently being rendered
+    int m_decodeTextureIndex = 0;  // Texture for next decode operation
 
     // Graphics pipeline for simple RGBA texture sampling
     ComPtr<ID3D12RootSignature> m_rootSignature;
@@ -122,7 +115,6 @@ private:
     uint32_t m_height = 0;  // Container height
     uint32_t m_videoWidth = 0;
     uint32_t m_videoHeight = 0;
-    bool m_firstCopy = true;  // Track first copy to handle initial state
 
     // Helper methods
     HRESULT CreateGraphicsResources();
@@ -131,6 +123,7 @@ private:
     HRESULT CreatePipelineState();
     HRESULT CreateSrvHeap();
     HRESULT UpdateConstantBuffer();
+    HRESULT UpdateSRVForCurrentRenderTexture();
 };
 
 } // namespace Vav2Player
diff --git a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
index a1ca7f9..29ca8d4 100644
--- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
+++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
@@ -1374,12 +1374,19 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
     slot.pts = timestamp;
     slot.ready_for_display.store(true);
 
-    // Enqueue picture_index for display queue (for B-frame reordering)
+    // Enqueue DisplayQueueEntry with PTS for B-frame reordering
     {
         std::lock_guard<std::mutex> lock(decoder->m_displayMutex);
-        decoder->m_displayQueue.push(pic_idx);
-        LOGF_DEBUG("[HandlePictureDisplay] Pushed picture_index=%d (pts=%lld) to display queue (size: %zu)",
-                   pic_idx, timestamp, decoder->m_displayQueue.size());
+
+        DisplayQueueEntry entry;
+        entry.frame_slot_index = pic_idx;
+        entry.pts = timestamp;
+        entry.submission_id = slot.submission_id;
+
+        decoder->m_displayQueue.push(entry);
+
+        LOGF_DEBUG("[HandlePictureDisplay] Pushed DisplayQueueEntry: slot=%d, pts=%lld, submission_id=%llu (queue size: %zu)",
+                   pic_idx, timestamp, slot.submission_id, decoder->m_displayQueue.size());
     }
 
     return 1;
@@ -1572,11 +1579,17 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
                                      VideoFrame& output_frame) {
     LOGF_DEBUG("[DecodeToSurface] Called with target_type=%d", static_cast<int>(target_type));
 
-    if (!m_initialized || !packet_data) {
-        LOGF_ERROR("[DecodeToSurface] Not initialized or null packet_data");
+    if (!m_initialized) {
+        LOGF_ERROR("[DecodeToSurface] Not initialized");
         return false;
     }
 
+    // Handle NULL packet_data as flush mode (end of file reached)
+    if (!packet_data || packet_size == 0) {
+        LOGF_DEBUG("[DecodeToSurface] NULL packet - flush mode (end of file)");
+        m_state = DecoderState::FLUSHING;
+    }
+
     // Set CUDA context for current thread
     {
         std::lock_guard<std::mutex> contextLock(m_cudaContextMutex);
@@ -1617,13 +1630,25 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
         // ===== Step 2: Submit packet to NVDEC parser =====
         // This triggers HandlePictureDecode (if new frame) and HandlePictureDisplay (always)
         CUVIDSOURCEDATAPACKET packet = {};
-        packet.payload = packet_data;
-        packet.payload_size = static_cast<unsigned long>(packet_size);
-        packet.flags = CUVID_PKT_ENDOFPICTURE;
-        packet.timestamp = 0;  // Not used - NVDEC parser overwrites this value
 
-        LOGF_INFO("[DecodeToSurface] Calling cuvidParseVideoData (submission_id=%llu)...",
-                  my_submission_id);
+        // Handle flush mode (NULL packet)
+        if (m_state == DecoderState::FLUSHING) {
+            // Flush mode: send end-of-stream packet to drain CUDA DPB
+            packet.flags = CUVID_PKT_ENDOFSTREAM;
+            packet.payload = nullptr;
+            packet.payload_size = 0;
+            LOGF_INFO("[DecodeToSurface] Flush mode: sending ENDOFSTREAM packet (submission_id=%llu)",
+                      my_submission_id);
+        } else {
+            // Normal mode: send actual packet data
+            packet.payload = packet_data;
+            packet.payload_size = static_cast<unsigned long>(packet_size);
+            packet.flags = CUVID_PKT_ENDOFPICTURE;
+            packet.timestamp = 0;  // Not used - NVDEC parser overwrites this value
+
+            LOGF_INFO("[DecodeToSurface] Normal mode: calling cuvidParseVideoData (submission_id=%llu)...",
+                      my_submission_id);
+        }
 
         CUresult result = cuvidParseVideoData(m_parser, &packet);
         // cuvidParseVideoData is SYNCHRONOUS - all callbacks execute before return
@@ -1647,33 +1672,63 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
         {
             std::lock_guard<std::mutex> lock(m_displayMutex);
 
-            // During initial buffering, accept packets until display queue has frames
-            if (m_displayQueue.empty() && !m_initialBufferingComplete) {
-                LOGF_DEBUG("[DecodeToSurface] PACKET ACCEPTED - Initial buffering (queue size: 0)");
-                return VAVCORE_PACKET_ACCEPTED;
+            // Transition from READY to BUFFERING on first packet
+            if (m_state == DecoderState::READY && m_displayQueue.empty()) {
+                m_state = DecoderState::BUFFERING;
+                LOGF_DEBUG("[DecodeToSurface] State transition: READY → BUFFERING");
             }
 
-            // Once we have frames in queue, mark buffering complete
-            if (!m_displayQueue.empty() && !m_initialBufferingComplete) {
-                m_initialBufferingComplete = true;
-                LOGF_INFO("[DecodeToSurface] Initial buffering complete, queue size: %zu", m_displayQueue.size());
+            // During initial buffering, accept packets until display queue has frames
+            if (m_displayQueue.empty() && m_state == DecoderState::BUFFERING) {
+                LOGF_DEBUG("[DecodeToSurface] PACKET ACCEPTED - Initial buffering (queue size: 0)");
+                // Return false to indicate no frame yet (still buffering)
+                // The C API wrapper will convert this to VAVCORE_PACKET_ACCEPTED
+                return false;
+            }
+
+            // Once we have frames in queue, transition to DECODING
+            if (!m_displayQueue.empty() && m_state == DecoderState::BUFFERING) {
+                m_state = DecoderState::DECODING;
+                LOGF_INFO("[DecodeToSurface] State transition: BUFFERING → DECODING (queue size: %zu)", m_displayQueue.size());
             }
         }
 
-        // ===== Step 4: Pop from display queue to get picture_index =====
+        // ===== Step 4: Pop from display queue to get picture_index (PTS-ordered) =====
+        DisplayQueueEntry entry;
         int pic_idx = -1;
         {
             std::lock_guard<std::mutex> lock(m_displayMutex);
 
             if (m_displayQueue.empty()) {
-                LOGF_ERROR("[DecodeToSurface] Display queue EMPTY after buffering complete (SHOULD NOT HAPPEN!)");
-                return false;
+                // Check if we're in flush mode
+                if (m_state == DecoderState::FLUSHING) {
+                    // Flush mode: no more frames in CUDA DPB
+                    // Return false to indicate no frame, caller will check end-of-stream
+                    LOGF_INFO("[DecodeToSurface] Flush complete: all frames drained from CUDA DPB");
+
+                    // Release pending submission before returning
+                    {
+                        std::lock_guard<std::mutex> lock2(m_submissionMutex);
+                        m_pendingSubmissions[pending_idx].in_use.store(false);
+                    }
+
+                    // Return false - the C API wrapper will convert this to VAVCORE_END_OF_STREAM
+                    // when combined with file reader's IsEndOfFile() check
+                    return false;
+                } else {
+                    // Normal mode: queue empty unexpectedly
+                    LOGF_ERROR("[DecodeToSurface] Display queue EMPTY after buffering complete (SHOULD NOT HAPPEN!)");
+                    return false;
+                }
             }
 
-            pic_idx = m_displayQueue.front();
+            // Pop from priority queue (PTS-ordered)
+            entry = m_displayQueue.top();
             m_displayQueue.pop();
-            LOGF_INFO("[DecodeToSurface] Popped picture_index=%d from display queue (queue size now: %zu)",
-                     pic_idx, m_displayQueue.size());
+            pic_idx = entry.frame_slot_index;
+
+            LOGF_INFO("[DecodeToSurface] Popped DisplayQueueEntry: slot=%d, pts=%lld, submission_id=%llu (queue size now: %zu)",
+                     pic_idx, entry.pts, entry.submission_id, m_displayQueue.size());
         }
 
         if (pic_idx < 0 || pic_idx >= RING_BUFFER_SIZE) {
@@ -1697,21 +1752,37 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
 
         LOGF_DEBUG("[DecodeToSurface] Frame slot %d ready for display", pic_idx);
 
-        // ===== Step 6: Copy from CUDA DPB to target surface =====
-        if (!CopyFromCUDADPB(pic_idx, target_type, target_surface, output_frame)) {
+        // ===== Step 6: Update target_surface for this frame =====
+        // CRITICAL: SwapChain provides different target_surface each frame!
+        // Always update slot.target_surface to current one.
+        LOGF_DEBUG("[DecodeToSurface] Updating target_surface: %p -> %p (pic_idx=%d)",
+                   slot.target_surface, target_surface, pic_idx);
+
+        if (target_surface == nullptr) {
+            LOGF_ERROR("[DecodeToSurface] ERROR: target_surface is NULL for pic_idx=%d", pic_idx);
+            return false;
+        }
+
+        // Always update to current target_surface (SwapChain back buffer changes each frame)
+        slot.target_surface = target_surface;
+        slot.surface_type = target_type;
+
+        // ===== Step 7: Copy from CUDA DPB to target surface =====
+        // Now use slot.target_surface which is guaranteed to be valid (either from decode or late binding)
+        if (!CopyFromCUDADPB(pic_idx, slot.surface_type, slot.target_surface, output_frame)) {
             LOGF_ERROR("[DecodeToSurface] CopyFromCUDADPB failed for picture_index=%d", pic_idx);
             return false;
         }
 
         LOGF_INFO("[DecodeToSurface] SUCCESS - Frame rendered from CUDA DPB (pic_idx=%d)", pic_idx);
 
-        // ===== Step 7: Mark slot as reusable =====
+        // ===== Step 8: Mark slot as reusable =====
         slot.ready_for_display.store(false);
         slot.in_use.store(false);
 
         LOGF_DEBUG("[DecodeToSurface] Released frame slot %d", pic_idx);
 
-        // ===== Step 8: Release pending submission =====
+        // ===== Step 9: Release pending submission =====
         {
             std::lock_guard<std::mutex> lock(m_submissionMutex);
             m_pendingSubmissions[pending_idx].in_use.store(false);
diff --git a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
index fb3ce4a..c5590a7 100644
--- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
+++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
@@ -102,6 +102,14 @@ protected:
     void LogCUDAError(CUresult result, const std::string& operation) const;
 
 private:
+    // Decoder state enum (simple inline approach)
+    enum class DecoderState {
+        READY,           // Initialized and ready for first packet
+        BUFFERING,       // Initial buffering (0-15 frames)
+        DECODING,        // Normal frame-by-frame decoding
+        FLUSHING         // End-of-file reached, draining DPB
+    };
+
     // CUDA and NVDEC objects
     CUcontext m_cuContext = nullptr;
     CUdevice m_cudaDevice = 0;
@@ -224,7 +232,23 @@ private:
     void PollingThreadFunc();                     // Polling thread function
 
     // Display-only packet handling (B-frame reordering)
-    std::queue<int> m_displayQueue;  // Queue of picture_index from HandlePictureDisplay
+    // DisplayQueueEntry: Frame information for PTS-based reordering
+    struct DisplayQueueEntry {
+        int frame_slot_index;    // FrameSlot index in m_frameSlots[]
+        int64_t pts;             // Presentation timestamp
+        uint64_t submission_id;  // Original submission order
+    };
+
+    // PTSComparator: PTS ascending order (Min-heap for earliest PTS first)
+    struct PTSComparator {
+        bool operator()(const DisplayQueueEntry& a, const DisplayQueueEntry& b) const {
+            return a.pts > b.pts;  // Min-heap: smallest PTS has highest priority
+        }
+    };
+
+    std::priority_queue<DisplayQueueEntry,
+                        std::vector<DisplayQueueEntry>,
+                        PTSComparator> m_displayQueue;  // PTS-based priority queue
     std::mutex m_displayMutex;
 
     // Helper methods
@@ -245,8 +269,8 @@ private:
     bool CopyFromCUDADPB(int pic_idx, VavCoreSurfaceType target_type,
                          void* target_surface, VideoFrame& output_frame);
 
-    // Initial buffering state
-    std::atomic<bool> m_initialBufferingComplete{false};
+    // Decoder state (replaces m_initialBufferingComplete and m_endOfFileReached)
+    DecoderState m_state = DecoderState::READY;
 
     // NV12ToRGBAConverter reinitialization flag (set by HandleVideoSequence)
     std::atomic<bool> m_converterNeedsReinit{false};