Fix bug
This commit is contained in:
@@ -904,34 +904,41 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS
|
||||
|
||||
DecodeSlot& slot = decoder->m_ringBuffer[slot_idx];
|
||||
|
||||
// Find pending submission context using most recent submission_id
|
||||
// cuvidParseVideoData is SYNCHRONOUS - the callback is for the packet we just submitted
|
||||
// Therefore, m_submissionCounter - 1 is the submission_id for THIS packet
|
||||
// Find pending submission context for this decode slot
|
||||
// CRITICAL: Search for the HIGHEST submission_id (most recent packet being processed)
|
||||
// This handles the case where cuvidParseVideoData callbacks may execute asynchronously
|
||||
uint64_t submission_id = 0;
|
||||
size_t pending_idx = 0;
|
||||
bool found = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(decoder->m_submissionMutex);
|
||||
|
||||
// Get the most recent submission (the one that triggered this callback)
|
||||
uint64_t current_submission_id = decoder->m_submissionCounter.load() - 1;
|
||||
pending_idx = current_submission_id % RING_BUFFER_SIZE;
|
||||
// Search backwards from current submission counter to find most recent active pending
|
||||
uint64_t current_counter = decoder->m_submissionCounter.load();
|
||||
uint64_t max_submission_id = 0;
|
||||
bool found_any = false;
|
||||
|
||||
auto& pending = decoder->m_pendingSubmissions[pending_idx];
|
||||
for (size_t i = 0; i < RING_BUFFER_SIZE; i++) {
|
||||
auto& pending = decoder->m_pendingSubmissions[i];
|
||||
|
||||
// Verify this pending submission is in use and matches the slot
|
||||
if (pending.in_use.load()) {
|
||||
// Copy pending submission context to decode slot
|
||||
slot.target_surface = pending.target_surface;
|
||||
slot.surface_type = pending.surface_type;
|
||||
slot.submission_id = pending.submission_id;
|
||||
submission_id = pending.submission_id;
|
||||
if (pending.in_use.load()) {
|
||||
// Found an active pending submission
|
||||
if (!found_any || pending.submission_id > max_submission_id) {
|
||||
// This is the newest one so far
|
||||
slot.target_surface = pending.target_surface;
|
||||
slot.surface_type = pending.surface_type;
|
||||
slot.submission_id = pending.submission_id;
|
||||
submission_id = pending.submission_id;
|
||||
pending_idx = i;
|
||||
max_submission_id = pending.submission_id;
|
||||
found_any = true;
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Release pending slot for reuse
|
||||
pending.in_use.store(false);
|
||||
found = true;
|
||||
} else {
|
||||
LOGF_ERROR("[HandlePictureDecode] Pending submission slot %zu not in use!", pending_idx);
|
||||
if (!found) {
|
||||
LOGF_ERROR("[HandlePictureDecode] No active pending submission found for slot %d", slot_idx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1181,12 +1188,11 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
LOGF_DEBUG("[DecodeToSurface] Allocated submission_id=%llu, pending_idx=%zu",
|
||||
my_submission_id, pending_idx);
|
||||
|
||||
// 2. Wait if pending slot is still in use (overflow protection)
|
||||
while (m_pendingSubmissions[pending_idx].in_use.load()) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
// 3. Store submission context in ring buffer slot
|
||||
// 2. Store submission context in ring buffer slot (overwrite old data)
|
||||
// No need to wait - ring buffer naturally cycles after 16 submissions
|
||||
// Old pending submissions will be overwritten, which is safe because:
|
||||
// - Decode slots already have their copy of pending data
|
||||
// - 16 slots is enough buffer for B-frame reordering
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_submissionMutex);
|
||||
auto& pending = m_pendingSubmissions[pending_idx];
|
||||
@@ -1194,7 +1200,7 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
pending.target_surface = target_surface;
|
||||
pending.surface_type = target_type;
|
||||
pending.submission_id = my_submission_id;
|
||||
pending.in_use.store(true);
|
||||
pending.in_use.store(true); // Mark as active for HandlePictureDecode search
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Prepared submission_id=%llu, pending_idx=%zu",
|
||||
@@ -1212,7 +1218,7 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
my_submission_id);
|
||||
|
||||
CUresult result = cuvidParseVideoData(m_parser, &packet);
|
||||
// cuvidParseVideoData is SYNCHRONOUS - HandlePictureDecode called before return
|
||||
// cuvidParseVideoData is SYNCHRONOUS - all callbacks execute before return
|
||||
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LOGF_ERROR("[DecodeToSurface] cuvidParseVideoData failed with code %d", result);
|
||||
@@ -1227,7 +1233,14 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Packet submitted, callback completed");
|
||||
LOGF_DEBUG("[DecodeToSurface] Packet submitted, synchronous callbacks completed");
|
||||
|
||||
// IMPORTANT: Do NOT release pending submission here!
|
||||
// Even though cuvidParseVideoData is documented as synchronous, NVDEC's B-frame
|
||||
// reordering means callbacks from THIS packet may execute during FUTURE packets.
|
||||
// Pending submissions will naturally be overwritten when ring buffer wraps (16 slots).
|
||||
LOGF_DEBUG("[DecodeToSurface] Keeping pending_idx=%zu active (will be reused after %d submissions)",
|
||||
pending_idx, RING_BUFFER_SIZE);
|
||||
|
||||
// ===== Component 4: Wait and Retrieve =====
|
||||
// 5. Find which slot NVDEC used (check all slots for our submission_id)
|
||||
@@ -1240,8 +1253,13 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
}
|
||||
|
||||
if (my_slot_idx == -1) {
|
||||
LOGF_ERROR("[DecodeToSurface] Failed to find slot for submission_id=%llu", my_submission_id);
|
||||
return false;
|
||||
// Display-only packet: HandlePictureDisplay was called without HandlePictureDecode
|
||||
// This happens when a packet only triggers display of a previously decoded frame
|
||||
// No new frame was decoded, so we return false to indicate no frame is available
|
||||
LOGF_DEBUG("[DecodeToSurface] Display-only packet (no decode) for submission_id=%llu - returning false", my_submission_id);
|
||||
|
||||
m_returnCounter.fetch_add(1); // Advance counter to unblock FIFO queue
|
||||
return false; // No frame decoded - caller should use previous frame
|
||||
}
|
||||
|
||||
DecodeSlot& my_slot = m_ringBuffer[my_slot_idx];
|
||||
@@ -1254,14 +1272,27 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] My turn! submission_id=%llu", my_submission_id);
|
||||
|
||||
// 7. Wait for decode to complete
|
||||
// 7. Wait for decode to complete with adaptive timeout based on resolution
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(my_slot.slot_mutex);
|
||||
|
||||
if (!my_slot.frame_ready.wait_for(lock, std::chrono::milliseconds(500),
|
||||
// Adaptive timeout: base 500ms for 720p, scale by pixel count
|
||||
// 720p (1280x720 = 921,600 pixels) -> 500ms
|
||||
// 1080p (1920x1080 = 2,073,600 pixels) -> 1,125ms
|
||||
// 4K (3840x2160 = 8,294,400 pixels) -> 4,500ms
|
||||
const uint64_t base_pixels = 1280 * 720; // 720p reference
|
||||
const uint64_t base_timeout_ms = 500;
|
||||
const uint64_t current_pixels = static_cast<uint64_t>(m_width) * m_height;
|
||||
const uint64_t timeout_ms = std::max<uint64_t>(base_timeout_ms,
|
||||
(current_pixels * base_timeout_ms) / base_pixels);
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Adaptive timeout: %llums for %dx%d (%llu pixels)",
|
||||
timeout_ms, m_width, m_height, current_pixels);
|
||||
|
||||
if (!my_slot.frame_ready.wait_for(lock, std::chrono::milliseconds(timeout_ms),
|
||||
[&my_slot]() { return my_slot.is_ready.load(); })) {
|
||||
// Timeout - decode took too long
|
||||
LOGF_ERROR("[DecodeToSurface] Decode timeout for slot %d", my_slot_idx);
|
||||
LOGF_ERROR("[DecodeToSurface] Decode timeout for slot %d after %llums", my_slot_idx, timeout_ms);
|
||||
my_slot.in_use.store(false);
|
||||
m_returnCounter.fetch_add(1); // Skip to avoid deadlock
|
||||
return false;
|
||||
@@ -1443,6 +1474,8 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Released slot %d", my_slot_idx);
|
||||
|
||||
// Note: pending submission already released immediately after cuvidParseVideoData (line 1237)
|
||||
|
||||
// 10. Advance return counter (FIFO order)
|
||||
m_returnCounter.fetch_add(1);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user