WIP
This commit is contained in:
416
vav2/docs/working/NVDEC_State_Machine_Refactoring.md
Normal file
416
vav2/docs/working/NVDEC_State_Machine_Refactoring.md
Normal file
@@ -0,0 +1,416 @@
|
||||
# NVDEC Decoder State Machine Refactoring Design
|
||||
|
||||
## Problem Statement
|
||||
|
||||
The current `NVDECAV1Decoder::DecodeToSurface()` has excessive complexity:
|
||||
- **13+ state variables** tracked across multiple atomic flags and mutexes
|
||||
- **9+ conditional branches** with nested conditions
|
||||
- **~150 lines** in a single function
|
||||
- **High cyclomatic complexity** (2^9 = 512 possible code paths)
|
||||
|
||||
This makes the code:
|
||||
- Hard to maintain and debug
|
||||
- Difficult to test comprehensively
|
||||
- Prone to race conditions and edge cases
|
||||
- Challenging to extend with new features
|
||||
|
||||
## Solution: State Machine Pattern
|
||||
|
||||
### Core Design Principle
|
||||
|
||||
**Consolidate all decoder state into a single enum** with clear transitions, replacing scattered atomic flags and conditional checks.
|
||||
|
||||
### State Machine States
|
||||
|
||||
```cpp
|
||||
enum class DecoderState {
|
||||
UNINITIALIZED, // Before Initialize() is called
|
||||
READY, // Initialized and ready for decoding
|
||||
BUFFERING, // Initial buffering (0-15 frames)
|
||||
DECODING, // Normal frame-by-frame decoding
|
||||
FLUSHING, // End-of-file reached, draining DPB
|
||||
FLUSH_COMPLETE, // All frames drained
|
||||
ERROR // Unrecoverable error state
|
||||
};
|
||||
```
|
||||
|
||||
### State Transitions
|
||||
|
||||
```
|
||||
UNINITIALIZED → READY (Initialize() called successfully)
|
||||
READY → BUFFERING (First DecodeToSurface() call)
|
||||
BUFFERING → DECODING (Display queue has frames)
|
||||
DECODING → FLUSHING (End-of-file reached, NULL packet)
|
||||
FLUSHING → FLUSH_COMPLETE (Display queue empty)
|
||||
FLUSH_COMPLETE → READY (Reset() called)
|
||||
* → ERROR (Any state can transition to ERROR on failure)
|
||||
ERROR → READY (Reset() called)
|
||||
```
|
||||
|
||||
### State Machine Class
|
||||
|
||||
```cpp
|
||||
class DecoderStateMachine {
|
||||
public:
|
||||
DecoderStateMachine() : m_state(DecoderState::UNINITIALIZED) {}
|
||||
|
||||
// State queries
|
||||
DecoderState GetState() const { return m_state.load(); }
|
||||
bool IsState(DecoderState state) const { return m_state.load() == state; }
|
||||
bool CanDecode() const {
|
||||
auto state = m_state.load();
|
||||
return state == DecoderState::READY ||
|
||||
state == DecoderState::BUFFERING ||
|
||||
state == DecoderState::DECODING ||
|
||||
state == DecoderState::FLUSHING;
|
||||
}
|
||||
|
||||
// State transitions
|
||||
bool TransitionTo(DecoderState new_state) {
|
||||
DecoderState expected = m_state.load();
|
||||
if (IsValidTransition(expected, new_state)) {
|
||||
m_state.store(new_state);
|
||||
LOGF_DEBUG("[DecoderStateMachine] State transition: %s → %s",
|
||||
StateToString(expected), StateToString(new_state));
|
||||
return true;
|
||||
}
|
||||
LOGF_ERROR("[DecoderStateMachine] Invalid transition: %s → %s",
|
||||
StateToString(expected), StateToString(new_state));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Specific transition helpers
|
||||
void OnInitializeSuccess() {
|
||||
TransitionTo(DecoderState::READY);
|
||||
}
|
||||
|
||||
void OnFirstPacket() {
|
||||
if (IsState(DecoderState::READY)) {
|
||||
TransitionTo(DecoderState::BUFFERING);
|
||||
}
|
||||
}
|
||||
|
||||
void OnBufferingComplete(size_t queue_size) {
|
||||
if (IsState(DecoderState::BUFFERING) && queue_size > 0) {
|
||||
TransitionTo(DecoderState::DECODING);
|
||||
}
|
||||
}
|
||||
|
||||
void OnEndOfFile() {
|
||||
if (IsState(DecoderState::DECODING) || IsState(DecoderState::BUFFERING)) {
|
||||
TransitionTo(DecoderState::FLUSHING);
|
||||
}
|
||||
}
|
||||
|
||||
void OnFlushComplete() {
|
||||
if (IsState(DecoderState::FLUSHING)) {
|
||||
TransitionTo(DecoderState::FLUSH_COMPLETE);
|
||||
}
|
||||
}
|
||||
|
||||
void OnError() {
|
||||
TransitionTo(DecoderState::ERROR);
|
||||
}
|
||||
|
||||
void OnReset() {
|
||||
TransitionTo(DecoderState::READY);
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<DecoderState> m_state;
|
||||
|
||||
bool IsValidTransition(DecoderState from, DecoderState to) const {
|
||||
// Define valid state transitions
|
||||
switch (from) {
|
||||
case DecoderState::UNINITIALIZED:
|
||||
return to == DecoderState::READY || to == DecoderState::ERROR;
|
||||
case DecoderState::READY:
|
||||
return to == DecoderState::BUFFERING || to == DecoderState::ERROR;
|
||||
case DecoderState::BUFFERING:
|
||||
return to == DecoderState::DECODING || to == DecoderState::FLUSHING ||
|
||||
to == DecoderState::ERROR || to == DecoderState::READY;
|
||||
case DecoderState::DECODING:
|
||||
return to == DecoderState::FLUSHING || to == DecoderState::ERROR ||
|
||||
to == DecoderState::READY;
|
||||
case DecoderState::FLUSHING:
|
||||
return to == DecoderState::FLUSH_COMPLETE || to == DecoderState::ERROR ||
|
||||
to == DecoderState::READY;
|
||||
case DecoderState::FLUSH_COMPLETE:
|
||||
return to == DecoderState::READY || to == DecoderState::ERROR;
|
||||
case DecoderState::ERROR:
|
||||
return to == DecoderState::READY;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const char* StateToString(DecoderState state) const {
|
||||
switch (state) {
|
||||
case DecoderState::UNINITIALIZED: return "UNINITIALIZED";
|
||||
case DecoderState::READY: return "READY";
|
||||
case DecoderState::BUFFERING: return "BUFFERING";
|
||||
case DecoderState::DECODING: return "DECODING";
|
||||
case DecoderState::FLUSHING: return "FLUSHING";
|
||||
case DecoderState::FLUSH_COMPLETE: return "FLUSH_COMPLETE";
|
||||
case DecoderState::ERROR: return "ERROR";
|
||||
default: return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## Refactored DecodeToSurface()
|
||||
|
||||
### Before (Complex Branching):
|
||||
|
||||
```cpp
|
||||
bool DecodeToSurface(...) {
|
||||
// Step 1: Check if initialized
|
||||
if (!m_initialized) { ... }
|
||||
|
||||
// Handle NULL packet_data as flush mode
|
||||
if (!packet_data || packet_size == 0) {
|
||||
m_endOfFileReached = true;
|
||||
}
|
||||
|
||||
// Step 2: Submit packet
|
||||
if (m_endOfFileReached) {
|
||||
// Flush mode logic
|
||||
} else {
|
||||
// Normal mode logic
|
||||
}
|
||||
|
||||
// Step 3: Check initial buffering
|
||||
if (m_displayQueue.empty() && !m_initialBufferingComplete) {
|
||||
// Buffering logic
|
||||
}
|
||||
if (!m_displayQueue.empty() && !m_initialBufferingComplete) {
|
||||
m_initialBufferingComplete = true;
|
||||
}
|
||||
|
||||
// Step 4: Pop from display queue
|
||||
if (m_displayQueue.empty()) {
|
||||
if (m_endOfFileReached) {
|
||||
// Flush complete logic
|
||||
} else {
|
||||
// Error - queue empty unexpectedly
|
||||
}
|
||||
}
|
||||
|
||||
// ... (continues for 150 more lines)
|
||||
}
|
||||
```
|
||||
|
||||
### After (State Machine):
|
||||
|
||||
```cpp
|
||||
bool DecodeToSurface(const uint8_t* packet_data, size_t packet_size,
|
||||
VavCoreSurfaceType target_type,
|
||||
void* target_surface,
|
||||
VideoFrame& output_frame) {
|
||||
// State validation
|
||||
if (!m_stateMachine.CanDecode()) {
|
||||
LOGF_ERROR("[DecodeToSurface] Invalid state: %s",
|
||||
m_stateMachine.GetStateString());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handle end-of-file
|
||||
if (!packet_data || packet_size == 0) {
|
||||
return HandleFlushMode(output_frame);
|
||||
}
|
||||
|
||||
// Delegate to state-specific handler
|
||||
switch (m_stateMachine.GetState()) {
|
||||
case DecoderState::READY:
|
||||
case DecoderState::BUFFERING:
|
||||
return HandleBufferingMode(packet_data, packet_size, target_type,
|
||||
target_surface, output_frame);
|
||||
case DecoderState::DECODING:
|
||||
return HandleDecodingMode(packet_data, packet_size, target_type,
|
||||
target_surface, output_frame);
|
||||
default:
|
||||
LOGF_ERROR("[DecodeToSurface] Unexpected state in DecodeToSurface");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Helper Methods (State-Specific Logic):
|
||||
|
||||
```cpp
|
||||
bool HandleBufferingMode(const uint8_t* packet_data, size_t packet_size,
|
||||
VavCoreSurfaceType target_type,
|
||||
void* target_surface,
|
||||
VideoFrame& output_frame) {
|
||||
// Transition to buffering on first packet
|
||||
if (m_stateMachine.IsState(DecoderState::READY)) {
|
||||
m_stateMachine.OnFirstPacket();
|
||||
}
|
||||
|
||||
// Submit packet to NVDEC
|
||||
if (!SubmitPacketToParser(packet_data, packet_size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if buffering is complete
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_displayMutex);
|
||||
if (m_displayQueue.empty()) {
|
||||
// Still buffering
|
||||
return false; // VAVCORE_PACKET_ACCEPTED
|
||||
} else {
|
||||
// Buffering complete
|
||||
m_stateMachine.OnBufferingComplete(m_displayQueue.size());
|
||||
// Fall through to decode the first frame
|
||||
}
|
||||
}
|
||||
|
||||
return RetrieveAndRenderFrame(target_type, target_surface, output_frame);
|
||||
}
|
||||
|
||||
bool HandleDecodingMode(const uint8_t* packet_data, size_t packet_size,
|
||||
VavCoreSurfaceType target_type,
|
||||
void* target_surface,
|
||||
VideoFrame& output_frame) {
|
||||
// Submit packet to NVDEC
|
||||
if (!SubmitPacketToParser(packet_data, packet_size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retrieve and render frame
|
||||
return RetrieveAndRenderFrame(target_type, target_surface, output_frame);
|
||||
}
|
||||
|
||||
bool HandleFlushMode(VideoFrame& output_frame) {
|
||||
// Transition to flushing if not already
|
||||
if (!m_stateMachine.IsState(DecoderState::FLUSHING)) {
|
||||
m_stateMachine.OnEndOfFile();
|
||||
}
|
||||
|
||||
// Submit end-of-stream packet
|
||||
if (!SubmitFlushPacket()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if flush is complete
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_displayMutex);
|
||||
if (m_displayQueue.empty()) {
|
||||
m_stateMachine.OnFlushComplete();
|
||||
return false; // VAVCORE_END_OF_STREAM
|
||||
}
|
||||
}
|
||||
|
||||
// Still have frames to drain
|
||||
return RetrieveAndRenderFrame(...);
|
||||
}
|
||||
```
|
||||
|
||||
## Removed/Consolidated State Variables
|
||||
|
||||
### Before:
|
||||
```cpp
|
||||
// 13+ state variables
|
||||
std::atomic<bool> m_initialBufferingComplete{false};
|
||||
std::atomic<bool> m_endOfFileReached{false};
|
||||
std::atomic<bool> m_converterNeedsReinit{false};
|
||||
std::atomic<uint64_t> m_submissionCounter{0};
|
||||
std::atomic<uint64_t> m_returnCounter{0};
|
||||
std::atomic<bool> m_pollingRunning{false};
|
||||
std::mutex m_frameQueueMutex;
|
||||
std::mutex m_cudaContextMutex;
|
||||
std::mutex m_submissionMutex;
|
||||
std::mutex m_displayMutex;
|
||||
std::queue<int> m_displayQueue;
|
||||
FrameSlot m_frameSlots[16]; // Each has 5 atomic flags
|
||||
```
|
||||
|
||||
### After:
|
||||
```cpp
|
||||
// Single state machine + minimal supporting variables
|
||||
DecoderStateMachine m_stateMachine;
|
||||
|
||||
// Still needed (but usage clarified by state machine):
|
||||
std::mutex m_displayMutex;
|
||||
std::queue<int> m_displayQueue;
|
||||
FrameSlot m_frameSlots[16]; // Frame-specific state (not global decoder state)
|
||||
std::atomic<uint64_t> m_submissionCounter{0}; // Submission ordering
|
||||
std::mutex m_submissionMutex;
|
||||
```
|
||||
|
||||
**Eliminated:**
|
||||
- `m_initialBufferingComplete` → Replaced by `DecoderState::BUFFERING` vs `DECODING`
|
||||
- `m_endOfFileReached` → Replaced by `DecoderState::FLUSHING`
|
||||
- `m_converterNeedsReinit` → Moved to NV12ToRGBAConverter internal state
|
||||
|
||||
## Benefits
|
||||
|
||||
### 1. Complexity Reduction
|
||||
- **13+ state variables → 1 state machine** with 7 well-defined states
|
||||
- **9+ conditional branches → State-driven dispatch** (1 switch statement)
|
||||
- **~150 lines → ~40 lines** per state handler (modular functions)
|
||||
|
||||
### 2. Improved Maintainability
|
||||
- **Clear state transitions** with validation (no illegal states)
|
||||
- **State-specific logic** isolated in dedicated functions
|
||||
- **Easy debugging** with state transition logging
|
||||
|
||||
### 3. Better Testability
|
||||
- **Test individual states** independently
|
||||
- **Verify state transitions** explicitly
|
||||
- **Mock state machine** for unit tests
|
||||
|
||||
### 4. Enhanced Readability
|
||||
- **Self-documenting code** (state names describe decoder status)
|
||||
- **Linear flow** instead of nested conditions
|
||||
- **Clear intent** from state-specific handler names
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Create State Machine Class (CURRENT)
|
||||
- [x] Design state machine enum and transitions
|
||||
- [ ] Implement DecoderStateMachine class
|
||||
- [ ] Add state transition logging
|
||||
|
||||
### Phase 2: Extract Helper Methods
|
||||
- [ ] Create `SubmitPacketToParser()`
|
||||
- [ ] Create `RetrieveAndRenderFrame()`
|
||||
- [ ] Create `SubmitFlushPacket()`
|
||||
|
||||
### Phase 3: Refactor DecodeToSurface()
|
||||
- [ ] Replace state flags with state machine
|
||||
- [ ] Implement `HandleBufferingMode()`
|
||||
- [ ] Implement `HandleDecodingMode()`
|
||||
- [ ] Implement `HandleFlushMode()`
|
||||
|
||||
### Phase 4: Update Other Methods
|
||||
- [ ] Update `Initialize()` → call `m_stateMachine.OnInitializeSuccess()`
|
||||
- [ ] Update `Reset()` → call `m_stateMachine.OnReset()`
|
||||
- [ ] Update `Cleanup()` → call `m_stateMachine.TransitionTo(UNINITIALIZED)`
|
||||
|
||||
### Phase 5: Remove Obsolete State Variables
|
||||
- [ ] Remove `m_initialBufferingComplete`
|
||||
- [ ] Remove `m_endOfFileReached`
|
||||
- [ ] Verify no regressions with existing tests
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit Tests
|
||||
- State transition validation (legal/illegal transitions)
|
||||
- State-specific handler behavior
|
||||
- Error state recovery
|
||||
|
||||
### Integration Tests
|
||||
- Full decode pipeline with state transitions
|
||||
- Edge cases (empty files, flush mode, errors)
|
||||
- Multi-threaded decoding with state machine
|
||||
|
||||
### Regression Tests
|
||||
- Existing RedSurfaceNVDECTest
|
||||
- Vav2PlayerHeadless tests
|
||||
- Vav2Player GUI tests
|
||||
|
||||
---
|
||||
**Status**: Design complete, implementation in progress
|
||||
**Last Updated**: 2025-10-11
|
||||
1139
vav2/docs/working/Triple_Buffering_Refactoring_Design.md
Normal file
1139
vav2/docs/working/Triple_Buffering_Refactoring_Design.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -359,13 +359,19 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<PostBuildEvent>
|
||||
<Command>echo Copying VavCore Debug DLL...
|
||||
copy "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" "$(LayoutDir)\VavCore-debug.dll"
|
||||
echo DLL copy completed.</Command>
|
||||
<Message>Copying VavCore-debug.dll to output directory</Message>
|
||||
<Command>echo Copying VavCore Debug DLL to AppX directory...
|
||||
echo Source: "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll"
|
||||
echo Target: "$(LayoutDir)\VavCore-debug.dll"
|
||||
copy /Y "$(ProjectDir)..\..\..\vavcore\lib\VavCore-debug.dll" "$(LayoutDir)\VavCore-debug.dll"
|
||||
if errorlevel 1 (
|
||||
echo ERROR: Failed to copy VavCore-debug.dll
|
||||
exit /b 1
|
||||
)
|
||||
echo DLL copy completed successfully.</Command>
|
||||
<Message>Copying VavCore-debug.dll to AppX directory</Message>
|
||||
</PostBuildEvent>
|
||||
<PreBuildEvent>
|
||||
<Command>del "$(LayoutDir)\VavCore-debug.dll"</Command>
|
||||
<Command>if exist "$(LayoutDir)\VavCore-debug.dll" del "$(LayoutDir)\VavCore-debug.dll"</Command>
|
||||
</PreBuildEvent>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
|
||||
@@ -102,40 +102,65 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
|
||||
// Expected: VAVCORE_PACKET_ACCEPTED for first 16 frames
|
||||
// No rendering during buffering phase
|
||||
} else {
|
||||
// Phase 2: Normal decoding with D3D12 surface (17th frame onwards)
|
||||
ID3D12Resource* rgbaTexture = m_renderer->GetNextRGBATextureForCUDAInterop();
|
||||
if (!rgbaTexture) {
|
||||
LOGF_ERROR("[FrameProcessor] Failed to get next RGBA texture");
|
||||
}
|
||||
// Phase 2: Triple buffer filling (frames 16-18)
|
||||
// Fill textures 0, 1, 2 before starting normal operation
|
||||
else if (m_framesDecoded < 19) {
|
||||
auto backend = m_renderer->GetRGBASurfaceBackend();
|
||||
if (!backend) {
|
||||
LOGF_ERROR("[FrameProcessor] Failed to get RGBASurfaceBackend");
|
||||
m_frameProcessing.store(false);
|
||||
if (onComplete) onComplete(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
|
||||
int decodeIndex = backend->GetDecodeTextureIndex();
|
||||
LOGF_INFO("[FrameProcessor] Triple buffer filling: frame %llu -> texture[%d]",
|
||||
m_framesDecoded.load(), decodeIndex);
|
||||
|
||||
result = vavcore_decode_to_surface(
|
||||
player,
|
||||
VAVCORE_SURFACE_D3D12_RESOURCE,
|
||||
rgbaTexture,
|
||||
decodeTexture,
|
||||
&vavFrame
|
||||
);
|
||||
|
||||
// After successful decode, copy to staging texture for safe rendering
|
||||
// After successful decode, advance decode index only (render index stays at 0)
|
||||
if (result == VAVCORE_SUCCESS) {
|
||||
auto backend = m_renderer->GetRGBASurfaceBackend();
|
||||
if (backend) {
|
||||
HRESULT hr = backend->CopyToStagingTexture(rgbaTexture);
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[FrameProcessor] Failed to copy to staging texture: 0x%08X", hr);
|
||||
} else {
|
||||
// Wait for GPU copy to complete before proceeding
|
||||
hr = backend->WaitForCopyCompletion();
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[FrameProcessor] Failed to wait for copy completion: 0x%08X", hr);
|
||||
} else {
|
||||
LOGF_INFO("[FrameProcessor] GPU copy completed, staging texture ready");
|
||||
}
|
||||
}
|
||||
}
|
||||
backend->AdvanceDecodeOnly();
|
||||
LOGF_INFO("[FrameProcessor] Triple buffer filled: texture[%d] ready", decodeIndex);
|
||||
}
|
||||
}
|
||||
// Phase 3: Normal operation (frame 19+)
|
||||
// Render from current texture, decode into next texture
|
||||
else {
|
||||
auto backend = m_renderer->GetRGBASurfaceBackend();
|
||||
if (!backend) {
|
||||
LOGF_ERROR("[FrameProcessor] Failed to get RGBASurfaceBackend");
|
||||
m_frameProcessing.store(false);
|
||||
if (onComplete) onComplete(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
ID3D12Resource* decodeTexture = backend->GetNextDecodeTexture();
|
||||
int decodeIndex = backend->GetDecodeTextureIndex();
|
||||
int renderIndex = backend->GetRenderTextureIndex();
|
||||
LOGF_DEBUG("[FrameProcessor] Normal operation: render[%d], decode[%d]",
|
||||
renderIndex, decodeIndex);
|
||||
|
||||
result = vavcore_decode_to_surface(
|
||||
player,
|
||||
VAVCORE_SURFACE_D3D12_RESOURCE,
|
||||
decodeTexture,
|
||||
&vavFrame
|
||||
);
|
||||
|
||||
// After successful decode, advance frame indices
|
||||
if (result == VAVCORE_SUCCESS) {
|
||||
backend->AdvanceFrame();
|
||||
LOGF_DEBUG("[FrameProcessor] Frame advanced: render[%d]->render[%d]",
|
||||
renderIndex, backend->GetRenderTextureIndex());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -157,6 +182,10 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
// No frame is ready yet - VavCore will return it in a future call
|
||||
LOGF_DEBUG("[FrameProcessor] PACKET ACCEPTED - Frame buffered in VavCore CUDA DPB (16-frame buffering)");
|
||||
|
||||
// CRITICAL: Increment m_framesDecoded for buffered packets
|
||||
// This counter determines when we switch from NULL surface (buffering) to valid surface (rendering)
|
||||
m_framesDecoded++;
|
||||
|
||||
// No action needed - just wait for next timing tick
|
||||
// VavCore will return the buffered frame when ready
|
||||
m_frameProcessing.store(false);
|
||||
|
||||
@@ -268,13 +268,6 @@ ID3D12Resource* D3D12VideoRenderer::GetRGBATextureForCUDAInterop() const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ID3D12Resource* D3D12VideoRenderer::GetNextRGBATextureForCUDAInterop() {
|
||||
if (m_rgbaSurfaceBackend) {
|
||||
return m_rgbaSurfaceBackend->GetNextVideoTexture();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint8_t* D3D12VideoRenderer::GetYMappedBuffer(uint32_t bufferIndex) const {
|
||||
if (m_yuv420pUploadBackend) {
|
||||
return m_yuv420pUploadBackend->GetYMappedBuffer(bufferIndex);
|
||||
@@ -496,6 +489,13 @@ IVideoBackend* D3D12VideoRenderer::SelectBackend(const VavCoreVideoFrame& frame)
|
||||
}
|
||||
|
||||
HRESULT D3D12VideoRenderer::EnsureVideoTexture(const VavCoreVideoFrame& frame) {
|
||||
// Skip if frame has invalid dimensions (can happen during CUDA DPB buffering)
|
||||
if (frame.width == 0 || frame.height == 0) {
|
||||
LOGF_DEBUG("[D3D12VideoRenderer] Skipping texture creation for invalid frame dimensions: %dx%d",
|
||||
frame.width, frame.height);
|
||||
return S_OK; // Not an error, just skip texture creation
|
||||
}
|
||||
|
||||
// Check if we need to create/recreate video texture
|
||||
if (m_videoWidth != (uint32_t)frame.width || m_videoHeight != (uint32_t)frame.height) {
|
||||
IVideoBackend* backend = SelectBackend(frame);
|
||||
|
||||
@@ -59,10 +59,9 @@ public:
|
||||
|
||||
// Backend-specific texture access for CUDA interop
|
||||
ID3D12Resource* GetRGBATextureForCUDAInterop() const;
|
||||
ID3D12Resource* GetNextRGBATextureForCUDAInterop(); // Rotates to next buffer for triple buffering
|
||||
ID3D12Resource* GetNV12TextureForCUDAInterop() const { return nullptr; } // Future: NV12DirectBackend
|
||||
|
||||
// Get RGBASurfaceBackend for staging texture operations
|
||||
// Get RGBASurfaceBackend for triple buffer management
|
||||
RGBASurfaceBackend* GetRGBASurfaceBackend() const { return m_rgbaSurfaceBackend.get(); }
|
||||
|
||||
// Legacy YUV420P upload buffer access (for backward compatibility)
|
||||
|
||||
@@ -54,21 +54,8 @@ void RGBASurfaceBackend::Shutdown() {
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
m_rgbaTextures[i].Reset();
|
||||
}
|
||||
m_currentTextureIndex = 0;
|
||||
|
||||
// Release staging texture and copy command objects
|
||||
m_copyCommandList.Reset();
|
||||
m_copyCommandAllocator.Reset();
|
||||
m_stagingTexture.Reset();
|
||||
|
||||
// Close fence event handle
|
||||
if (m_copyFenceEvent != nullptr) {
|
||||
CloseHandle(m_copyFenceEvent);
|
||||
m_copyFenceEvent = nullptr;
|
||||
}
|
||||
|
||||
// Release fence
|
||||
m_copyFence.Reset();
|
||||
m_renderTextureIndex = 0;
|
||||
m_decodeTextureIndex = 0;
|
||||
|
||||
// Clear references (not owned)
|
||||
m_device = nullptr;
|
||||
@@ -78,15 +65,13 @@ void RGBASurfaceBackend::Shutdown() {
|
||||
}
|
||||
|
||||
HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) {
|
||||
LOGF_INFO("[RGBASurfaceBackend] CreateVideoTexture called: %ux%u", width, height);
|
||||
m_videoWidth = width;
|
||||
m_videoHeight = height;
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
// Create RGBA texture descriptor for CUDA Surface Object write
|
||||
// Format: DXGI_FORMAT_R8G8B8A8_UNORM (4 bytes per pixel)
|
||||
// Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (enables CUDA Surface Object creation)
|
||||
// Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, CUDA Surface Objects handle this automatically)
|
||||
D3D12_RESOURCE_DESC rgbaTextureDesc = {};
|
||||
rgbaTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
rgbaTextureDesc.Width = width;
|
||||
@@ -96,8 +81,8 @@ HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height)
|
||||
rgbaTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
rgbaTextureDesc.SampleDesc.Count = 1;
|
||||
rgbaTextureDesc.SampleDesc.Quality = 0;
|
||||
rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; // Tiled layout
|
||||
rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // Enable CUDA write
|
||||
rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
|
||||
D3D12_HEAP_PROPERTIES defaultHeapProps = {};
|
||||
defaultHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
@@ -108,110 +93,48 @@ HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height)
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
hr = m_device->CreateCommittedResource(
|
||||
&defaultHeapProps,
|
||||
D3D12_HEAP_FLAG_SHARED, // Required for CUDA interop
|
||||
D3D12_HEAP_FLAG_SHARED,
|
||||
&rgbaTextureDesc,
|
||||
D3D12_RESOURCE_STATE_COMMON, // CUDA will transition as needed
|
||||
D3D12_RESOURCE_STATE_COMMON,
|
||||
nullptr,
|
||||
IID_PPV_ARGS(&m_rgbaTextures[i])
|
||||
);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
// Cleanup already created textures
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to create RGBA texture[%d]: 0x%08X", i, hr);
|
||||
for (int j = 0; j < i; j++) {
|
||||
m_rgbaTextures[j].Reset();
|
||||
}
|
||||
return hr;
|
||||
}
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] Created RGBA texture[%d]: %p", i, m_rgbaTextures[i].Get());
|
||||
}
|
||||
|
||||
m_currentTextureIndex = 0;
|
||||
// Triple buffer filling logic:
|
||||
// - Frames 16-18 fill textures 0, 1, 2 (decode only, no rendering yet)
|
||||
// - Frame 19+ normal operation (decode into different texture than render)
|
||||
//
|
||||
// Initial state for filling phase:
|
||||
// - decodeIndex = 0 (will fill texture[0], then [1], then [2])
|
||||
// - renderIndex = 2 (will render from texture[2] after filling completes)
|
||||
//
|
||||
// After filling completes (frame 18):
|
||||
// - decodeIndex = 0 (wraps back after filling [2])
|
||||
// - renderIndex = 2 (will render from texture[2] at frame 19)
|
||||
// - Frame 19: render from [2], decode into [0] (no conflict!)
|
||||
m_renderTextureIndex = 2;
|
||||
m_decodeTextureIndex = 0;
|
||||
|
||||
// Create staging texture (same format, but no UAV flag - only for rendering)
|
||||
D3D12_RESOURCE_DESC stagingTextureDesc = rgbaTextureDesc;
|
||||
stagingTextureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; // No CUDA access needed
|
||||
LOGF_INFO("[RGBASurfaceBackend] All %d RGBA textures created successfully", BUFFER_COUNT);
|
||||
|
||||
hr = m_device->CreateCommittedResource(
|
||||
&defaultHeapProps,
|
||||
D3D12_HEAP_FLAG_NONE,
|
||||
&stagingTextureDesc,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, // Initial state for receiving copies
|
||||
nullptr,
|
||||
IID_PPV_ARGS(&m_stagingTexture)
|
||||
);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to create staging texture: 0x%08X", hr);
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
m_rgbaTextures[i].Reset();
|
||||
}
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Create fence for GPU copy synchronization
|
||||
hr = m_device->CreateFence(
|
||||
0,
|
||||
D3D12_FENCE_FLAG_NONE,
|
||||
IID_PPV_ARGS(&m_copyFence)
|
||||
);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy fence: 0x%08X", hr);
|
||||
m_stagingTexture.Reset();
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
m_rgbaTextures[i].Reset();
|
||||
}
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Create fence event for CPU wait
|
||||
m_copyFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
|
||||
if (m_copyFenceEvent == nullptr) {
|
||||
hr = HRESULT_FROM_WIN32(GetLastError());
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to create fence event: 0x%08X", hr);
|
||||
m_copyFence.Reset();
|
||||
m_stagingTexture.Reset();
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
m_rgbaTextures[i].Reset();
|
||||
}
|
||||
return hr;
|
||||
}
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] Copy fence and event created successfully");
|
||||
|
||||
// Create command allocator and list for texture copy operations
|
||||
hr = m_device->CreateCommandAllocator(
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
IID_PPV_ARGS(&m_copyCommandAllocator)
|
||||
);
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy command allocator: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
hr = m_device->CreateCommandList(
|
||||
0,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
m_copyCommandAllocator.Get(),
|
||||
nullptr,
|
||||
IID_PPV_ARGS(&m_copyCommandList)
|
||||
);
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to create copy command list: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Close the command list (will be reset when needed)
|
||||
m_copyCommandList->Close();
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] Created staging texture for safe rendering");
|
||||
|
||||
// Create SRV for RGBA texture
|
||||
// Create SRV for rendering
|
||||
hr = CreateSrvHeap();
|
||||
if (FAILED(hr)) {
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Update constant buffer with new aspect ratio
|
||||
// Update constant buffer
|
||||
hr = UpdateConstantBuffer();
|
||||
if (FAILED(hr)) {
|
||||
return hr;
|
||||
@@ -226,8 +149,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
|
||||
ID3D12GraphicsCommandList* commandList,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle)
|
||||
{
|
||||
// RGBASurfaceBackend doesn't need RTV (uses CopyResource)
|
||||
(void)rtvHandle;
|
||||
(void)rtvHandle; // RGBASurfaceBackend doesn't use external RTV
|
||||
if (!m_initialized) {
|
||||
return E_NOT_VALID_STATE;
|
||||
}
|
||||
@@ -236,17 +158,18 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
// Use staging texture for rendering (safe from decoder overwrites)
|
||||
ID3D12Resource* renderTexture = m_stagingTexture.Get();
|
||||
// Use current render texture (already decoded, safe to read)
|
||||
ID3D12Resource* renderTexture = m_rgbaTextures[m_renderTextureIndex].Get();
|
||||
if (!renderTexture) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: staging texture is NULL!");
|
||||
LOGF_ERROR("[RGBASurfaceBackend] RenderToBackBuffer: render texture[%d] is NULL!", m_renderTextureIndex);
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using staging texture, ptr=%p", renderTexture);
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] RenderToBackBuffer: using texture[%d], ptr=%p",
|
||||
m_renderTextureIndex, renderTexture);
|
||||
|
||||
// Staging texture is already in PIXEL_SHADER_RESOURCE state (set by CopyToStagingTexture)
|
||||
// No barrier needed here
|
||||
// Render texture is in COMMON state (CUDA managed)
|
||||
// No barrier needed for reading in pixel shader
|
||||
|
||||
// Transition back buffer to render target
|
||||
D3D12_RESOURCE_BARRIER barrierToRT = {};
|
||||
@@ -258,13 +181,12 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
|
||||
barrierToRT.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
commandList->ResourceBarrier(1, &barrierToRT);
|
||||
|
||||
// Create RTV for back buffer (not needed anymore - use rtvHandle from parameter)
|
||||
// Create RTV for back buffer
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE backBufferRtvHandle;
|
||||
D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
|
||||
rtvDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
||||
rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
||||
|
||||
// Create temporary RTV heap for back buffer
|
||||
ComPtr<ID3D12DescriptorHeap> rtvHeap;
|
||||
D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
|
||||
rtvHeapDesc.NumDescriptors = 1;
|
||||
@@ -287,7 +209,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
|
||||
ID3D12DescriptorHeap* heaps[] = { m_srvHeap.Get() };
|
||||
commandList->SetDescriptorHeaps(1, heaps);
|
||||
|
||||
// Use descriptor for staging texture (index 0, the only descriptor)
|
||||
// Use SRV for current render texture
|
||||
CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetGPUDescriptorHandleForHeapStart());
|
||||
commandList->SetGraphicsRootDescriptorTable(0, srvHandle);
|
||||
commandList->SetGraphicsRootConstantBufferView(1, m_constantBuffer->GetGPUVirtualAddress());
|
||||
@@ -314,7 +236,7 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
|
||||
|
||||
// Draw fullscreen quad
|
||||
commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
commandList->DrawInstanced(6, 1, 0, 0); // Fullscreen quad (2 triangles)
|
||||
commandList->DrawInstanced(6, 1, 0, 0);
|
||||
|
||||
// Transition back buffer to present
|
||||
D3D12_RESOURCE_BARRIER barrierToPresent = {};
|
||||
@@ -326,9 +248,6 @@ HRESULT RGBASurfaceBackend::RenderToBackBuffer(
|
||||
barrierToPresent.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
commandList->ResourceBarrier(1, &barrierToPresent);
|
||||
|
||||
// Staging texture remains in PIXEL_SHADER_RESOURCE state (no transition needed)
|
||||
// It will be transitioned back to COPY_DEST when CopyToStagingTexture is called next
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
@@ -536,9 +455,9 @@ HRESULT RGBASurfaceBackend::CreatePipelineState() {
|
||||
}
|
||||
|
||||
HRESULT RGBASurfaceBackend::CreateSrvHeap() {
|
||||
// Create descriptor heap with 1 descriptor for staging texture
|
||||
// Create descriptor heap with 1 descriptor for current render texture
|
||||
D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
|
||||
srvHeapDesc.NumDescriptors = 1; // Only need SRV for staging texture
|
||||
srvHeapDesc.NumDescriptors = 1;
|
||||
srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
||||
|
||||
@@ -547,22 +466,13 @@ HRESULT RGBASurfaceBackend::CreateSrvHeap() {
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Create SRV for staging texture (the only texture used for rendering)
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
||||
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||
srvDesc.Texture2D.MipLevels = 1;
|
||||
// Create initial SRV for texture[0] (m_renderTextureIndex = 0)
|
||||
hr = UpdateSRVForCurrentRenderTexture();
|
||||
if (FAILED(hr)) {
|
||||
return hr;
|
||||
}
|
||||
|
||||
CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart());
|
||||
|
||||
m_device->CreateShaderResourceView(
|
||||
m_stagingTexture.Get(),
|
||||
&srvDesc,
|
||||
srvHandle
|
||||
);
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] Created SRV for staging texture");
|
||||
LOGF_INFO("[RGBASurfaceBackend] Created SRV heap for render texture");
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
@@ -605,138 +515,80 @@ HRESULT RGBASurfaceBackend::UpdateConstantBuffer() {
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
ID3D12Resource* RGBASurfaceBackend::GetNextVideoTexture() {
|
||||
// Rotate to next buffer index
|
||||
int prevIndex = m_currentTextureIndex;
|
||||
m_currentTextureIndex = (m_currentTextureIndex + 1) % BUFFER_COUNT;
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] GetNextVideoTexture: %d -> %d, texture=%p",
|
||||
prevIndex, m_currentTextureIndex, m_rgbaTextures[m_currentTextureIndex].Get());
|
||||
|
||||
return m_rgbaTextures[m_currentTextureIndex].Get();
|
||||
// Triple buffering management functions
|
||||
ID3D12Resource* RGBASurfaceBackend::GetCurrentRenderTexture() const {
|
||||
return m_rgbaTextures[m_renderTextureIndex].Get();
|
||||
}
|
||||
|
||||
HRESULT RGBASurfaceBackend::CopyToStagingTexture(ID3D12Resource* sourceTexture) {
|
||||
if (!m_initialized || !m_stagingTexture || !sourceTexture) {
|
||||
ID3D12Resource* RGBASurfaceBackend::GetNextDecodeTexture() const {
|
||||
return m_rgbaTextures[m_decodeTextureIndex].Get();
|
||||
}
|
||||
|
||||
void RGBASurfaceBackend::AdvanceDecodeOnly() {
|
||||
int prevDecode = m_decodeTextureIndex;
|
||||
|
||||
// Filling phase: Only advance decode index, render index stays at 0
|
||||
// This is used during frames 16-18 to fill all three textures
|
||||
m_decodeTextureIndex = (m_decodeTextureIndex + 1) % BUFFER_COUNT;
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] AdvanceDecodeOnly: decode %d->%d (render stays at %d)",
|
||||
prevDecode, m_decodeTextureIndex, m_renderTextureIndex);
|
||||
}
|
||||
|
||||
void RGBASurfaceBackend::AdvanceFrame() {
|
||||
int prevRender = m_renderTextureIndex;
|
||||
int prevDecode = m_decodeTextureIndex;
|
||||
|
||||
// Triple buffering advance logic:
|
||||
// After Frame N decodes into decodeTexture, we want:
|
||||
// - Render from the PREVIOUS frame's texture (N-1), not the current one!
|
||||
// - Decode into the oldest texture (N-2)
|
||||
//
|
||||
// Current state: render=R, decode=D
|
||||
// After decode completes: the old render texture becomes new decode target
|
||||
// The old decode texture will be rendered NEXT frame (not this frame!)
|
||||
//
|
||||
// Example with 3 textures:
|
||||
// State: render=2, decode=0
|
||||
// - Frame 19 decodes into texture[0] (now contains frame 19)
|
||||
// - Advance: render=2 (still showing frame 18!), decode=1
|
||||
// - Frame 20 decodes into texture[1] (now contains frame 20)
|
||||
// - Advance: render=0 (now showing frame 19), decode=2
|
||||
//
|
||||
// This ensures we NEVER render from a texture that was just written!
|
||||
m_decodeTextureIndex = m_renderTextureIndex; // Old render texture becomes next decode target
|
||||
m_renderTextureIndex = prevDecode; // Old decode texture becomes new render texture
|
||||
|
||||
LOGF_INFO("[RGBASurfaceBackend] AdvanceFrame: render %d->%d, decode %d->%d",
|
||||
prevRender, m_renderTextureIndex, prevDecode, m_decodeTextureIndex);
|
||||
|
||||
// Update SRV to point to new render texture
|
||||
HRESULT hr = UpdateSRVForCurrentRenderTexture();
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to update SRV: 0x%08X", hr);
|
||||
}
|
||||
}
|
||||
|
||||
HRESULT RGBASurfaceBackend::UpdateSRVForCurrentRenderTexture() {
|
||||
if (!m_srvHeap || !m_rgbaTextures[m_renderTextureIndex]) {
|
||||
return E_NOT_VALID_STATE;
|
||||
}
|
||||
|
||||
// Reset command allocator and list
|
||||
HRESULT hr = m_copyCommandAllocator->Reset();
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to reset copy command allocator: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
||||
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||
srvDesc.Texture2D.MipLevels = 1;
|
||||
|
||||
hr = m_copyCommandList->Reset(m_copyCommandAllocator.Get(), nullptr);
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to reset copy command list: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_srvHeap->GetCPUDescriptorHandleForHeapStart());
|
||||
|
||||
// Transition source texture to COPY_SOURCE
|
||||
D3D12_RESOURCE_BARRIER sourceBarrier = {};
|
||||
sourceBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
sourceBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
sourceBarrier.Transition.pResource = sourceTexture;
|
||||
sourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; // CUDA uses COMMON
|
||||
sourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
sourceBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
|
||||
// Transition staging texture to COPY_DEST (only if not first copy)
|
||||
// First copy: staging texture already in COPY_DEST state (created with that state)
|
||||
// Subsequent copies: staging texture in PIXEL_SHADER_RESOURCE state (from previous render)
|
||||
if (m_firstCopy) {
|
||||
// First copy: only transition source
|
||||
m_copyCommandList->ResourceBarrier(1, &sourceBarrier);
|
||||
m_firstCopy = false;
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] First copy: staging texture already in COPY_DEST state");
|
||||
} else {
|
||||
// Subsequent copies: transition both staging and source
|
||||
D3D12_RESOURCE_BARRIER stagingToCopyDest = {};
|
||||
stagingToCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
stagingToCopyDest.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
stagingToCopyDest.Transition.pResource = m_stagingTexture.Get();
|
||||
stagingToCopyDest.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||
stagingToCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
stagingToCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
|
||||
D3D12_RESOURCE_BARRIER barriers[] = { stagingToCopyDest, sourceBarrier };
|
||||
m_copyCommandList->ResourceBarrier(2, barriers);
|
||||
}
|
||||
|
||||
// Copy texture
|
||||
m_copyCommandList->CopyResource(m_stagingTexture.Get(), sourceTexture);
|
||||
|
||||
// Transition source back to COMMON (for CUDA)
|
||||
sourceBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
sourceBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
|
||||
m_copyCommandList->ResourceBarrier(1, &sourceBarrier);
|
||||
|
||||
// Transition staging texture to PIXEL_SHADER_RESOURCE for rendering
|
||||
D3D12_RESOURCE_BARRIER stagingBarrier = {};
|
||||
stagingBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
stagingBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
stagingBarrier.Transition.pResource = m_stagingTexture.Get();
|
||||
stagingBarrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
stagingBarrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||
stagingBarrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
|
||||
m_copyCommandList->ResourceBarrier(1, &stagingBarrier);
|
||||
|
||||
// Close command list
|
||||
hr = m_copyCommandList->Close();
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to close copy command list: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
// Execute command list
|
||||
ID3D12CommandList* commandLists[] = { m_copyCommandList.Get() };
|
||||
m_commandQueue->ExecuteCommandLists(1, commandLists);
|
||||
|
||||
// Signal fence after copy submission
|
||||
m_copyFenceValue++;
|
||||
hr = m_commandQueue->Signal(m_copyFence.Get(), m_copyFenceValue);
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Failed to signal copy fence: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] GPU copy submitted (fence value: %llu)", m_copyFenceValue);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT RGBASurfaceBackend::WaitForCopyCompletion() {
|
||||
if (!m_copyFence || m_copyFenceEvent == nullptr) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Copy fence or event not initialized");
|
||||
return E_NOT_VALID_STATE;
|
||||
}
|
||||
|
||||
// Check if copy already completed
|
||||
if (m_copyFence->GetCompletedValue() >= m_copyFenceValue) {
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] GPU copy already complete (fence value: %llu)", m_copyFenceValue);
|
||||
return S_OK; // Already complete
|
||||
}
|
||||
|
||||
// Wait for GPU copy to complete
|
||||
HRESULT hr = m_copyFence->SetEventOnCompletion(
|
||||
m_copyFenceValue,
|
||||
m_copyFenceEvent
|
||||
m_device->CreateShaderResourceView(
|
||||
m_rgbaTextures[m_renderTextureIndex].Get(),
|
||||
&srvDesc,
|
||||
srvHandle
|
||||
);
|
||||
if (FAILED(hr)) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] SetEventOnCompletion failed: 0x%08X", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
DWORD waitResult = WaitForSingleObject(m_copyFenceEvent, 5000); // 5 second timeout
|
||||
if (waitResult != WAIT_OBJECT_0) {
|
||||
LOGF_ERROR("[RGBASurfaceBackend] Wait failed or timed out: %lu", waitResult);
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] GPU copy completed (fence value: %llu)", m_copyFenceValue);
|
||||
LOGF_DEBUG("[RGBASurfaceBackend] Updated SRV for render texture[%d]", m_renderTextureIndex);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
@@ -39,24 +39,25 @@ public:
|
||||
|
||||
HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
|
||||
ID3D12Resource* GetVideoTexture() const override {
|
||||
return m_rgbaTextures[m_currentTextureIndex].Get();
|
||||
return m_rgbaTextures[m_renderTextureIndex].Get();
|
||||
}
|
||||
|
||||
// Get next available texture for decoding (rotates buffer index)
|
||||
ID3D12Resource* GetNextVideoTexture();
|
||||
// Triple buffering management
|
||||
// Get current texture being rendered to screen
|
||||
ID3D12Resource* GetCurrentRenderTexture() const;
|
||||
|
||||
// Get current rendering texture index
|
||||
int GetCurrentTextureIndex() const { return m_currentTextureIndex; }
|
||||
// Get next texture for decoding (not currently being rendered)
|
||||
ID3D12Resource* GetNextDecodeTexture() const;
|
||||
|
||||
// Staging texture management for safe rendering
|
||||
// Copy decoder texture to stable staging texture
|
||||
HRESULT CopyToStagingTexture(ID3D12Resource* sourceTexture);
|
||||
// Advance frame: switch render/decode indices after decoding completes (normal operation)
|
||||
void AdvanceFrame();
|
||||
|
||||
// Wait for GPU copy to complete
|
||||
HRESULT WaitForCopyCompletion();
|
||||
// Advance decode only: move to next decode texture without changing render index (filling phase)
|
||||
void AdvanceDecodeOnly();
|
||||
|
||||
// Get stable staging texture for rendering (never overwritten by decoder)
|
||||
ID3D12Resource* GetStagingTexture() const { return m_stagingTexture.Get(); }
|
||||
// Get current indices for debugging
|
||||
int GetRenderTextureIndex() const { return m_renderTextureIndex; }
|
||||
int GetDecodeTextureIndex() const { return m_decodeTextureIndex; }
|
||||
|
||||
HRESULT RenderToBackBuffer(
|
||||
const VavCoreVideoFrame& frame,
|
||||
@@ -73,27 +74,19 @@ private:
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_commandQueue = nullptr;
|
||||
|
||||
// RGBA video textures (triple buffering) - Decoder writes here
|
||||
// RGBA video textures (triple buffering)
|
||||
// Format: DXGI_FORMAT_R8G8B8A8_UNORM
|
||||
// Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (for CUDA Surface Object)
|
||||
// Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, handled by CUDA Surface Objects)
|
||||
//
|
||||
// Triple buffering roles:
|
||||
// - m_renderTextureIndex: Currently rendering to screen (safe to read)
|
||||
// - m_decodeTextureIndex: Next target for decoding (safe to write)
|
||||
// - Third texture: Idle, previously decoded (ready to become render texture)
|
||||
static const int BUFFER_COUNT = 3;
|
||||
ComPtr<ID3D12Resource> m_rgbaTextures[BUFFER_COUNT];
|
||||
int m_currentTextureIndex = 0;
|
||||
|
||||
// Staging texture - Stable copy for rendering (decoder never touches this)
|
||||
// This texture is copied from m_rgbaTextures at 30fps
|
||||
// Renderer always reads from this texture (safe from race conditions)
|
||||
ComPtr<ID3D12Resource> m_stagingTexture;
|
||||
|
||||
// Command allocator and list for async texture copy
|
||||
ComPtr<ID3D12CommandAllocator> m_copyCommandAllocator;
|
||||
ComPtr<ID3D12GraphicsCommandList> m_copyCommandList;
|
||||
|
||||
// GPU synchronization for copy operations
|
||||
ComPtr<ID3D12Fence> m_copyFence;
|
||||
UINT64 m_copyFenceValue = 0;
|
||||
HANDLE m_copyFenceEvent = nullptr;
|
||||
int m_renderTextureIndex = 0; // Texture currently being rendered
|
||||
int m_decodeTextureIndex = 0; // Texture for next decode operation
|
||||
|
||||
// Graphics pipeline for simple RGBA texture sampling
|
||||
ComPtr<ID3D12RootSignature> m_rootSignature;
|
||||
@@ -122,7 +115,6 @@ private:
|
||||
uint32_t m_height = 0; // Container height
|
||||
uint32_t m_videoWidth = 0;
|
||||
uint32_t m_videoHeight = 0;
|
||||
bool m_firstCopy = true; // Track first copy to handle initial state
|
||||
|
||||
// Helper methods
|
||||
HRESULT CreateGraphicsResources();
|
||||
@@ -131,6 +123,7 @@ private:
|
||||
HRESULT CreatePipelineState();
|
||||
HRESULT CreateSrvHeap();
|
||||
HRESULT UpdateConstantBuffer();
|
||||
HRESULT UpdateSRVForCurrentRenderTexture();
|
||||
};
|
||||
|
||||
} // namespace Vav2Player
|
||||
|
||||
@@ -1374,12 +1374,19 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
|
||||
slot.pts = timestamp;
|
||||
slot.ready_for_display.store(true);
|
||||
|
||||
// Enqueue picture_index for display queue (for B-frame reordering)
|
||||
// Enqueue DisplayQueueEntry with PTS for B-frame reordering
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(decoder->m_displayMutex);
|
||||
decoder->m_displayQueue.push(pic_idx);
|
||||
LOGF_DEBUG("[HandlePictureDisplay] Pushed picture_index=%d (pts=%lld) to display queue (size: %zu)",
|
||||
pic_idx, timestamp, decoder->m_displayQueue.size());
|
||||
|
||||
DisplayQueueEntry entry;
|
||||
entry.frame_slot_index = pic_idx;
|
||||
entry.pts = timestamp;
|
||||
entry.submission_id = slot.submission_id;
|
||||
|
||||
decoder->m_displayQueue.push(entry);
|
||||
|
||||
LOGF_DEBUG("[HandlePictureDisplay] Pushed DisplayQueueEntry: slot=%d, pts=%lld, submission_id=%llu (queue size: %zu)",
|
||||
pic_idx, timestamp, slot.submission_id, decoder->m_displayQueue.size());
|
||||
}
|
||||
|
||||
return 1;
|
||||
@@ -1572,11 +1579,17 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
VideoFrame& output_frame) {
|
||||
LOGF_DEBUG("[DecodeToSurface] Called with target_type=%d", static_cast<int>(target_type));
|
||||
|
||||
if (!m_initialized || !packet_data) {
|
||||
LOGF_ERROR("[DecodeToSurface] Not initialized or null packet_data");
|
||||
if (!m_initialized) {
|
||||
LOGF_ERROR("[DecodeToSurface] Not initialized");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handle NULL packet_data as flush mode (end of file reached)
|
||||
if (!packet_data || packet_size == 0) {
|
||||
LOGF_DEBUG("[DecodeToSurface] NULL packet - flush mode (end of file)");
|
||||
m_state = DecoderState::FLUSHING;
|
||||
}
|
||||
|
||||
// Set CUDA context for current thread
|
||||
{
|
||||
std::lock_guard<std::mutex> contextLock(m_cudaContextMutex);
|
||||
@@ -1617,13 +1630,25 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
// ===== Step 2: Submit packet to NVDEC parser =====
|
||||
// This triggers HandlePictureDecode (if new frame) and HandlePictureDisplay (always)
|
||||
CUVIDSOURCEDATAPACKET packet = {};
|
||||
packet.payload = packet_data;
|
||||
packet.payload_size = static_cast<unsigned long>(packet_size);
|
||||
packet.flags = CUVID_PKT_ENDOFPICTURE;
|
||||
packet.timestamp = 0; // Not used - NVDEC parser overwrites this value
|
||||
|
||||
LOGF_INFO("[DecodeToSurface] Calling cuvidParseVideoData (submission_id=%llu)...",
|
||||
my_submission_id);
|
||||
// Handle flush mode (NULL packet)
|
||||
if (m_state == DecoderState::FLUSHING) {
|
||||
// Flush mode: send end-of-stream packet to drain CUDA DPB
|
||||
packet.flags = CUVID_PKT_ENDOFSTREAM;
|
||||
packet.payload = nullptr;
|
||||
packet.payload_size = 0;
|
||||
LOGF_INFO("[DecodeToSurface] Flush mode: sending ENDOFSTREAM packet (submission_id=%llu)",
|
||||
my_submission_id);
|
||||
} else {
|
||||
// Normal mode: send actual packet data
|
||||
packet.payload = packet_data;
|
||||
packet.payload_size = static_cast<unsigned long>(packet_size);
|
||||
packet.flags = CUVID_PKT_ENDOFPICTURE;
|
||||
packet.timestamp = 0; // Not used - NVDEC parser overwrites this value
|
||||
|
||||
LOGF_INFO("[DecodeToSurface] Normal mode: calling cuvidParseVideoData (submission_id=%llu)...",
|
||||
my_submission_id);
|
||||
}
|
||||
|
||||
CUresult result = cuvidParseVideoData(m_parser, &packet);
|
||||
// cuvidParseVideoData is SYNCHRONOUS - all callbacks execute before return
|
||||
@@ -1647,33 +1672,63 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_displayMutex);
|
||||
|
||||
// During initial buffering, accept packets until display queue has frames
|
||||
if (m_displayQueue.empty() && !m_initialBufferingComplete) {
|
||||
LOGF_DEBUG("[DecodeToSurface] PACKET ACCEPTED - Initial buffering (queue size: 0)");
|
||||
return VAVCORE_PACKET_ACCEPTED;
|
||||
// Transition from READY to BUFFERING on first packet
|
||||
if (m_state == DecoderState::READY && m_displayQueue.empty()) {
|
||||
m_state = DecoderState::BUFFERING;
|
||||
LOGF_DEBUG("[DecodeToSurface] State transition: READY → BUFFERING");
|
||||
}
|
||||
|
||||
// Once we have frames in queue, mark buffering complete
|
||||
if (!m_displayQueue.empty() && !m_initialBufferingComplete) {
|
||||
m_initialBufferingComplete = true;
|
||||
LOGF_INFO("[DecodeToSurface] Initial buffering complete, queue size: %zu", m_displayQueue.size());
|
||||
// During initial buffering, accept packets until display queue has frames
|
||||
if (m_displayQueue.empty() && m_state == DecoderState::BUFFERING) {
|
||||
LOGF_DEBUG("[DecodeToSurface] PACKET ACCEPTED - Initial buffering (queue size: 0)");
|
||||
// Return false to indicate no frame yet (still buffering)
|
||||
// The C API wrapper will convert this to VAVCORE_PACKET_ACCEPTED
|
||||
return false;
|
||||
}
|
||||
|
||||
// Once we have frames in queue, transition to DECODING
|
||||
if (!m_displayQueue.empty() && m_state == DecoderState::BUFFERING) {
|
||||
m_state = DecoderState::DECODING;
|
||||
LOGF_INFO("[DecodeToSurface] State transition: BUFFERING → DECODING (queue size: %zu)", m_displayQueue.size());
|
||||
}
|
||||
}
|
||||
|
||||
// ===== Step 4: Pop from display queue to get picture_index =====
|
||||
// ===== Step 4: Pop from display queue to get picture_index (PTS-ordered) =====
|
||||
DisplayQueueEntry entry;
|
||||
int pic_idx = -1;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_displayMutex);
|
||||
|
||||
if (m_displayQueue.empty()) {
|
||||
LOGF_ERROR("[DecodeToSurface] Display queue EMPTY after buffering complete (SHOULD NOT HAPPEN!)");
|
||||
return false;
|
||||
// Check if we're in flush mode
|
||||
if (m_state == DecoderState::FLUSHING) {
|
||||
// Flush mode: no more frames in CUDA DPB
|
||||
// Return false to indicate no frame, caller will check end-of-stream
|
||||
LOGF_INFO("[DecodeToSurface] Flush complete: all frames drained from CUDA DPB");
|
||||
|
||||
// Release pending submission before returning
|
||||
{
|
||||
std::lock_guard<std::mutex> lock2(m_submissionMutex);
|
||||
m_pendingSubmissions[pending_idx].in_use.store(false);
|
||||
}
|
||||
|
||||
// Return false - the C API wrapper will convert this to VAVCORE_END_OF_STREAM
|
||||
// when combined with file reader's IsEndOfFile() check
|
||||
return false;
|
||||
} else {
|
||||
// Normal mode: queue empty unexpectedly
|
||||
LOGF_ERROR("[DecodeToSurface] Display queue EMPTY after buffering complete (SHOULD NOT HAPPEN!)");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
pic_idx = m_displayQueue.front();
|
||||
// Pop from priority queue (PTS-ordered)
|
||||
entry = m_displayQueue.top();
|
||||
m_displayQueue.pop();
|
||||
LOGF_INFO("[DecodeToSurface] Popped picture_index=%d from display queue (queue size now: %zu)",
|
||||
pic_idx, m_displayQueue.size());
|
||||
pic_idx = entry.frame_slot_index;
|
||||
|
||||
LOGF_INFO("[DecodeToSurface] Popped DisplayQueueEntry: slot=%d, pts=%lld, submission_id=%llu (queue size now: %zu)",
|
||||
pic_idx, entry.pts, entry.submission_id, m_displayQueue.size());
|
||||
}
|
||||
|
||||
if (pic_idx < 0 || pic_idx >= RING_BUFFER_SIZE) {
|
||||
@@ -1697,21 +1752,37 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Frame slot %d ready for display", pic_idx);
|
||||
|
||||
// ===== Step 6: Copy from CUDA DPB to target surface =====
|
||||
if (!CopyFromCUDADPB(pic_idx, target_type, target_surface, output_frame)) {
|
||||
// ===== Step 6: Update target_surface for this frame =====
|
||||
// CRITICAL: SwapChain provides different target_surface each frame!
|
||||
// Always update slot.target_surface to current one.
|
||||
LOGF_DEBUG("[DecodeToSurface] Updating target_surface: %p -> %p (pic_idx=%d)",
|
||||
slot.target_surface, target_surface, pic_idx);
|
||||
|
||||
if (target_surface == nullptr) {
|
||||
LOGF_ERROR("[DecodeToSurface] ERROR: target_surface is NULL for pic_idx=%d", pic_idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Always update to current target_surface (SwapChain back buffer changes each frame)
|
||||
slot.target_surface = target_surface;
|
||||
slot.surface_type = target_type;
|
||||
|
||||
// ===== Step 7: Copy from CUDA DPB to target surface =====
|
||||
// Now use slot.target_surface which is guaranteed to be valid (either from decode or late binding)
|
||||
if (!CopyFromCUDADPB(pic_idx, slot.surface_type, slot.target_surface, output_frame)) {
|
||||
LOGF_ERROR("[DecodeToSurface] CopyFromCUDADPB failed for picture_index=%d", pic_idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGF_INFO("[DecodeToSurface] SUCCESS - Frame rendered from CUDA DPB (pic_idx=%d)", pic_idx);
|
||||
|
||||
// ===== Step 7: Mark slot as reusable =====
|
||||
// ===== Step 8: Mark slot as reusable =====
|
||||
slot.ready_for_display.store(false);
|
||||
slot.in_use.store(false);
|
||||
|
||||
LOGF_DEBUG("[DecodeToSurface] Released frame slot %d", pic_idx);
|
||||
|
||||
// ===== Step 8: Release pending submission =====
|
||||
// ===== Step 9: Release pending submission =====
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_submissionMutex);
|
||||
m_pendingSubmissions[pending_idx].in_use.store(false);
|
||||
|
||||
@@ -102,6 +102,14 @@ protected:
|
||||
void LogCUDAError(CUresult result, const std::string& operation) const;
|
||||
|
||||
private:
|
||||
// Decoder state enum (simple inline approach)
|
||||
enum class DecoderState {
|
||||
READY, // Initialized and ready for first packet
|
||||
BUFFERING, // Initial buffering (0-15 frames)
|
||||
DECODING, // Normal frame-by-frame decoding
|
||||
FLUSHING // End-of-file reached, draining DPB
|
||||
};
|
||||
|
||||
// CUDA and NVDEC objects
|
||||
CUcontext m_cuContext = nullptr;
|
||||
CUdevice m_cudaDevice = 0;
|
||||
@@ -224,7 +232,23 @@ private:
|
||||
void PollingThreadFunc(); // Polling thread function
|
||||
|
||||
// Display-only packet handling (B-frame reordering)
|
||||
std::queue<int> m_displayQueue; // Queue of picture_index from HandlePictureDisplay
|
||||
// DisplayQueueEntry: Frame information for PTS-based reordering
|
||||
struct DisplayQueueEntry {
|
||||
int frame_slot_index; // FrameSlot index in m_frameSlots[]
|
||||
int64_t pts; // Presentation timestamp
|
||||
uint64_t submission_id; // Original submission order
|
||||
};
|
||||
|
||||
// PTSComparator: PTS ascending order (Min-heap for earliest PTS first)
|
||||
struct PTSComparator {
|
||||
bool operator()(const DisplayQueueEntry& a, const DisplayQueueEntry& b) const {
|
||||
return a.pts > b.pts; // Min-heap: smallest PTS has highest priority
|
||||
}
|
||||
};
|
||||
|
||||
std::priority_queue<DisplayQueueEntry,
|
||||
std::vector<DisplayQueueEntry>,
|
||||
PTSComparator> m_displayQueue; // PTS-based priority queue
|
||||
std::mutex m_displayMutex;
|
||||
|
||||
// Helper methods
|
||||
@@ -245,8 +269,8 @@ private:
|
||||
bool CopyFromCUDADPB(int pic_idx, VavCoreSurfaceType target_type,
|
||||
void* target_surface, VideoFrame& output_frame);
|
||||
|
||||
// Initial buffering state
|
||||
std::atomic<bool> m_initialBufferingComplete{false};
|
||||
// Decoder state (replaces m_initialBufferingComplete and m_endOfFileReached)
|
||||
DecoderState m_state = DecoderState::READY;
|
||||
|
||||
// NV12ToRGBAConverter reinitialization flag (set by HandleVideoSequence)
|
||||
std::atomic<bool> m_converterNeedsReinit{false};
|
||||
|
||||
Reference in New Issue
Block a user