WIP

2025-10-08 15:26:42 +09:00
parent dcee03b1a7
commit bbb2bf2d5c
4 changed files with 2024 additions and 9 deletions
--- a/todo21.txt
+++ b/todo21.txt
@@ -2184,3 +2184,18 @@ Fence만" 이것을 삭제해도 비동기로 가져가면서 얻는 성능 잇



+● Vav2Player가 실행되었습니다. B-frame reordering 구현이 완료되어 빌드도 성공했습니다.
+
+  구현된 내용:
+
+  NVDECAV1Decoder.cpp의 Display-only 패킷 처리:
+  1. HandlePictureDisplay: NVDEC이 표시할 picture_index를 m_displayOnlyPictureIndex에 저장
+  2. DecodeToSurface: Display-only 패킷 감지 시 저장된 picture_index를 사용하여 NVDEC DPB에서
+  프레임 추출
+  3. NV12→RGBA 변환: ConvertNV12ToRGBA() 사용
+  4. D3D12 복사: CopyRGBAFrame() 사용하여 D3D12 텍스처에 복사
+  5. 메타데이터 설정: output_frame에 올바른 메타데이터 설정
+
+  이제 비디오를 재생하면 B-frame reordering이 정상적으로 동작하여 프레임 점프 없이 부드럽게 재생될
+   것입니다. 테스트 비디오를 열어서 확인해보세요!
+
--- a/todo22.txt
+++ b/todo22.txt
--- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
+++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
@@ -939,8 +939,12 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS
    // Use NVDEC's CurrPicIdx directly as slot index (0-7)
    int slot_idx = pic_params->CurrPicIdx;

-    LOGF_DEBUG("[HandlePictureDecode] CurrPicIdx=%d, IntraPicFlag=%d",
-               slot_idx, pic_params->intra_pic_flag);
+    // Count how many HandlePictureDecode calls for current submission
+    static std::atomic<int> decode_call_count{0};
+    int current_call = decode_call_count.fetch_add(1) + 1;
+
+    LOGF_DEBUG("[HandlePictureDecode] Call #%d: CurrPicIdx=%d, IntraPicFlag=%d",
+               current_call, slot_idx, pic_params->intra_pic_flag);

    // Validate slot index (should always be 0-7)
    if (slot_idx < 0 || slot_idx >= RING_BUFFER_SIZE) {
@@ -1032,11 +1036,17 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
    auto* decoder = static_cast<NVDECAV1Decoder*>(user_data);

    int pic_idx = disp_info->picture_index;
-    LOGF_DEBUG("[HandlePictureDisplay] picture_index=%d ready for display", pic_idx);

-    // Store picture_index for display-only packets (B-frame reordering)
-    // This will be used in DecodeToSurface when no new frame is decoded
-    decoder->m_displayOnlyPictureIndex.store(pic_idx);
+    // Enqueue picture_index for display-only packets (B-frame reordering)
+    // Each HandlePictureDisplay call adds to queue, DecodeToSurface pops from queue
+    size_t queue_size_before = 0;
+    {
+        std::lock_guard<std::mutex> lock(decoder->m_displayMutex);
+        queue_size_before = decoder->m_displayQueue.size();
+        decoder->m_displayQueue.push(pic_idx);
+        LOGF_DEBUG("[HandlePictureDisplay] Pushed picture_index=%d to display queue (queue size: %zu -> %zu)",
+                  pic_idx, queue_size_before, decoder->m_displayQueue.size());
+    }

    return 1;
 }
@@ -1304,7 +1314,12 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
            return false;
        }

-        LOGF_DEBUG("[DecodeToSurface] Packet submitted, synchronous callbacks completed");
+        // Log display queue state after cuvidParseVideoData (all callbacks completed)
+        {
+            std::lock_guard<std::mutex> lock(m_displayMutex);
+            LOGF_DEBUG("[DecodeToSurface] Packet submitted, callbacks completed. Display queue size: %zu",
+                      m_displayQueue.size());
+        }

        // IMPORTANT: Do NOT release pending submission here!
        // Even though cuvidParseVideoData is documented as synchronous, NVDEC's B-frame
@@ -1326,7 +1341,22 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
        if (my_slot_idx == -1) {
            // Display-only packet: HandlePictureDisplay was called without HandlePictureDecode
            // This happens with B-frame reordering - we need to display a previously decoded frame
-            int display_pic_idx = m_displayOnlyPictureIndex.load();
+            int display_pic_idx = -1;
+            size_t queue_size_before = 0;
+            {
+                std::lock_guard<std::mutex> lock(m_displayMutex);
+                queue_size_before = m_displayQueue.size();
+                if (m_displayQueue.empty()) {
+                    LOGF_ERROR("[DecodeToSurface] Display queue EMPTY for submission_id=%llu (SHOULD NOT HAPPEN!)",
+                              my_submission_id);
+                    m_returnCounter.fetch_add(1);
+                    return false;
+                }
+                display_pic_idx = m_displayQueue.front();
+                m_displayQueue.pop();
+                LOGF_INFO("[DecodeToSurface] Display-only: popped picture_index=%d from queue (size: %zu -> %zu)",
+                         display_pic_idx, queue_size_before, m_displayQueue.size());
+            }

            LOGF_INFO("[DecodeToSurface] Display-only packet for submission_id=%llu, picture_index=%d",
                      my_submission_id, display_pic_idx);
@@ -1459,6 +1489,20 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_

        LOGF_DEBUG("[DecodeToSurface] Decode complete for slot %d", my_slot_idx);

+        // Pop display queue for normal decode (HandlePictureDisplay was called for this frame too)
+        {
+            std::lock_guard<std::mutex> lock(m_displayMutex);
+            if (!m_displayQueue.empty()) {
+                int popped_pic_idx = m_displayQueue.front();
+                m_displayQueue.pop();  // Discard, we use slot's picture_index instead
+                LOGF_DEBUG("[DecodeToSurface] Popped display queue: picture_index=%d (queue size now: %zu)",
+                          popped_pic_idx, m_displayQueue.size());
+            } else {
+                LOGF_WARNING("[DecodeToSurface] Display queue empty for normal decode (submission_id=%llu)",
+                            my_submission_id);
+            }
+        }
+
        // ===== Component 5: Frame Retrieval & Cleanup =====
        // 8. Map decoded frame from NVDEC using the slot's picture_index
        int pic_idx = my_slot.picture_index;  // CurrPicIdx from NVDEC
--- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
+++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
@@ -212,7 +212,7 @@ private:
    void PollingThreadFunc();                     // Polling thread function

    // Display-only packet handling (B-frame reordering)
-    std::atomic<int> m_displayOnlyPictureIndex{-1};  // picture_index from HandlePictureDisplay
+    std::queue<int> m_displayQueue;  // Queue of picture_index from HandlePictureDisplay
    std::mutex m_displayMutex;

    // Helper methods