This commit is contained in:
2025-10-08 15:26:42 +09:00
parent dcee03b1a7
commit bbb2bf2d5c
4 changed files with 2024 additions and 9 deletions

View File

@@ -2184,3 +2184,18 @@ Fence만" 이것을 삭제해도 비동기로 가져가면서 얻는 성능 잇
● Vav2Player가 실행되었습니다. B-frame reordering 구현이 완료되어 빌드도 성공했습니다.
구현된 내용:
NVDECAV1Decoder.cpp의 Display-only 패킷 처리:
1. HandlePictureDisplay: NVDEC이 표시할 picture_index를 m_displayOnlyPictureIndex에 저장
2. DecodeToSurface: Display-only 패킷 감지 시 저장된 picture_index를 사용하여 NVDEC DPB에서
프레임 추출
3. NV12→RGBA 변환: ConvertNV12ToRGBA() 사용
4. D3D12 복사: CopyRGBAFrame() 사용하여 D3D12 텍스처에 복사
5. 메타데이터 설정: output_frame에 올바른 메타데이터 설정
이제 비디오를 재생하면 B-frame reordering이 정상적으로 동작하여 프레임 점프 없이 부드럽게 재생될
것입니다. 테스트 비디오를 열어서 확인해보세요!

1956
todo22.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -939,8 +939,12 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS
// Use NVDEC's CurrPicIdx directly as slot index (0-7)
int slot_idx = pic_params->CurrPicIdx;
LOGF_DEBUG("[HandlePictureDecode] CurrPicIdx=%d, IntraPicFlag=%d",
slot_idx, pic_params->intra_pic_flag);
// Count how many HandlePictureDecode calls for current submission
static std::atomic<int> decode_call_count{0};
int current_call = decode_call_count.fetch_add(1) + 1;
LOGF_DEBUG("[HandlePictureDecode] Call #%d: CurrPicIdx=%d, IntraPicFlag=%d",
current_call, slot_idx, pic_params->intra_pic_flag);
// Validate slot index (should always be 0-7)
if (slot_idx < 0 || slot_idx >= RING_BUFFER_SIZE) {
@@ -1032,11 +1036,17 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
auto* decoder = static_cast<NVDECAV1Decoder*>(user_data);
int pic_idx = disp_info->picture_index;
LOGF_DEBUG("[HandlePictureDisplay] picture_index=%d ready for display", pic_idx);
// Store picture_index for display-only packets (B-frame reordering)
// This will be used in DecodeToSurface when no new frame is decoded
decoder->m_displayOnlyPictureIndex.store(pic_idx);
// Enqueue picture_index for display-only packets (B-frame reordering)
// Each HandlePictureDisplay call adds to queue, DecodeToSurface pops from queue
size_t queue_size_before = 0;
{
std::lock_guard<std::mutex> lock(decoder->m_displayMutex);
queue_size_before = decoder->m_displayQueue.size();
decoder->m_displayQueue.push(pic_idx);
LOGF_DEBUG("[HandlePictureDisplay] Pushed picture_index=%d to display queue (queue size: %zu -> %zu)",
pic_idx, queue_size_before, decoder->m_displayQueue.size());
}
return 1;
}
@@ -1304,7 +1314,12 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
return false;
}
LOGF_DEBUG("[DecodeToSurface] Packet submitted, synchronous callbacks completed");
// Log display queue state after cuvidParseVideoData (all callbacks completed)
{
std::lock_guard<std::mutex> lock(m_displayMutex);
LOGF_DEBUG("[DecodeToSurface] Packet submitted, callbacks completed. Display queue size: %zu",
m_displayQueue.size());
}
// IMPORTANT: Do NOT release pending submission here!
// Even though cuvidParseVideoData is documented as synchronous, NVDEC's B-frame
@@ -1326,7 +1341,22 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
if (my_slot_idx == -1) {
// Display-only packet: HandlePictureDisplay was called without HandlePictureDecode
// This happens with B-frame reordering - we need to display a previously decoded frame
int display_pic_idx = m_displayOnlyPictureIndex.load();
int display_pic_idx = -1;
size_t queue_size_before = 0;
{
std::lock_guard<std::mutex> lock(m_displayMutex);
queue_size_before = m_displayQueue.size();
if (m_displayQueue.empty()) {
LOGF_ERROR("[DecodeToSurface] Display queue EMPTY for submission_id=%llu (SHOULD NOT HAPPEN!)",
my_submission_id);
m_returnCounter.fetch_add(1);
return false;
}
display_pic_idx = m_displayQueue.front();
m_displayQueue.pop();
LOGF_INFO("[DecodeToSurface] Display-only: popped picture_index=%d from queue (size: %zu -> %zu)",
display_pic_idx, queue_size_before, m_displayQueue.size());
}
LOGF_INFO("[DecodeToSurface] Display-only packet for submission_id=%llu, picture_index=%d",
my_submission_id, display_pic_idx);
@@ -1459,6 +1489,20 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
LOGF_DEBUG("[DecodeToSurface] Decode complete for slot %d", my_slot_idx);
// Pop display queue for normal decode (HandlePictureDisplay was called for this frame too)
{
std::lock_guard<std::mutex> lock(m_displayMutex);
if (!m_displayQueue.empty()) {
int popped_pic_idx = m_displayQueue.front();
m_displayQueue.pop(); // Discard, we use slot's picture_index instead
LOGF_DEBUG("[DecodeToSurface] Popped display queue: picture_index=%d (queue size now: %zu)",
popped_pic_idx, m_displayQueue.size());
} else {
LOGF_WARNING("[DecodeToSurface] Display queue empty for normal decode (submission_id=%llu)",
my_submission_id);
}
}
// ===== Component 5: Frame Retrieval & Cleanup =====
// 8. Map decoded frame from NVDEC using the slot's picture_index
int pic_idx = my_slot.picture_index; // CurrPicIdx from NVDEC

View File

@@ -212,7 +212,7 @@ private:
void PollingThreadFunc(); // Polling thread function
// Display-only packet handling (B-frame reordering)
std::atomic<int> m_displayOnlyPictureIndex{-1}; // picture_index from HandlePictureDisplay
std::queue<int> m_displayQueue; // Queue of picture_index from HandlePictureDisplay
std::mutex m_displayMutex;
// Helper methods