GlobalFrameBudget Design
This commit is contained in:
586
vav2/docs/working/GlobalFrameBudget_Design.md
Normal file
586
vav2/docs/working/GlobalFrameBudget_Design.md
Normal file
@@ -0,0 +1,586 @@
|
||||
# GlobalFrameBudget Design Document
|
||||
|
||||
## 1. Overview
|
||||
|
||||
### Purpose
|
||||
|
||||
**Problem**: When 4 VideoPlayerControl2 instances play simultaneously, they all hit the initial buffering bottleneck (frames 16-18) at the same time, causing NVDEC queue overflow with QUEUE_DELAY of 35-42ms (exceeding the 33.33ms budget for 30fps).
|
||||
|
||||
**Solution**: Implement a global frame processing budget manager that limits concurrent frame processing during the bottleneck phase, reducing the load from 4 players to 3 maximum, bringing QUEUE_DELAY down to ~28-33ms (within budget).
|
||||
|
||||
### Key Constraints
|
||||
|
||||
- **NVDEC DPB_SIZE = 16**: Required by AV1 sequence header (min_num_decode_surfaces=9 for test video, up to 12+ for complex GOPs)
|
||||
- **INITIAL_BUFFERING = 16**: NVDEC requires full DPB filling for B-frame reordering
|
||||
- **Cannot reduce buffer sizes**: Tested DPB_SIZE=4/8 both crash with "Invalid CurrPicIdx"
|
||||
- **Must maintain sync**: All 4 players should remain synchronized after initial buffering
|
||||
|
||||
---
|
||||
|
||||
## 2. Architecture Diagram
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ GlobalFrameBudget │
|
||||
│ (Singleton) │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ State: │
|
||||
│ - m_activeFrames: atomic<int> (current active frames) │
|
||||
│ - MAX_CONCURRENT_FRAMES = 3 (bottleneck phase limit) │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ Public API: │
|
||||
│ + TryAcquireFrameSlot(playerId, frameNumber) → bool │
|
||||
│ + ReleaseFrameSlot(playerId) → void │
|
||||
│ + GetActiveFrameCount() → int │
|
||||
│ + GetStatistics() → BudgetStatistics │
|
||||
│ + ResetStatistics() → void │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
▲
|
||||
│ uses
|
||||
┌────────────────┼────────────────┐
|
||||
│ │ │
|
||||
┌────▼─────┐ ┌────▼─────┐ ┌────▼─────┐
|
||||
│ Player#0 │ │ Player#1 │ │ Player#2 │ ...
|
||||
│FrameProc │ │FrameProc │ │FrameProc │
|
||||
└──────────┘ └──────────┘ └──────────┘
|
||||
|
||||
Call Flow:
|
||||
1. FrameProcessor::ProcessFrame()
|
||||
→ Check Phase == TRIPLE_FILLING?
|
||||
2. YES → TryAcquireFrameSlot()
|
||||
→ m_activeFrames < 3?
|
||||
3. YES → Proceed with decode
|
||||
→ ReleaseFrameSlot() after render completes
|
||||
4. NO → Skip frame (m_framesDropped++)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Processing Phases
|
||||
|
||||
FrameProcessor operates in 3 distinct phases:
|
||||
|
||||
```cpp
|
||||
enum class Phase {
|
||||
INITIAL_BUFFERING, // frames 0-15: NULL surface submission to NVDEC DPB
|
||||
TRIPLE_FILLING, // frames 16-18: triple buffer filling (BOTTLENECK)
|
||||
NORMAL_PLAYBACK // frames 19+: stable rendering
|
||||
};
|
||||
```
|
||||
|
||||
### Phase Details
|
||||
|
||||
| Phase | Frame Range | Behavior | QUEUE_DELAY |
|
||||
|--------------------|-------------|-----------------------------------|------------------|
|
||||
| INITIAL_BUFFERING | 0-15 | NULL surface, no render | 6-15ms (stable) |
|
||||
| TRIPLE_FILLING | 16-18 | Fill triple buffer, first renders | 35-42ms (SPIKE) |
|
||||
| NORMAL_PLAYBACK | 19+ | Steady state rendering | 6-22ms (stable) |
|
||||
|
||||
**GlobalFrameBudget is only active during TRIPLE_FILLING phase.**
|
||||
|
||||
---
|
||||
|
||||
## 4. Class Interface
|
||||
|
||||
### Header: GlobalFrameBudget.h
|
||||
|
||||
```cpp
|
||||
namespace Vav2Player {
|
||||
|
||||
class GlobalFrameBudget
|
||||
{
|
||||
public:
|
||||
static GlobalFrameBudget& GetInstance();
|
||||
|
||||
// Acquire permission to process frame
|
||||
// Returns: true if slot acquired, false if budget limit reached
|
||||
bool TryAcquireFrameSlot(int playerId, uint64_t frameNumber);
|
||||
|
||||
// Release slot after processing complete
|
||||
void ReleaseFrameSlot(int playerId);
|
||||
|
||||
// Query current state
|
||||
int GetActiveFrameCount() const { return m_activeFrames.load(); }
|
||||
|
||||
// Statistics
|
||||
struct BudgetStatistics {
|
||||
uint64_t totalAcquireAttempts;
|
||||
uint64_t successfulAcquires;
|
||||
uint64_t rejectedAcquires;
|
||||
double rejectionRate;
|
||||
};
|
||||
|
||||
BudgetStatistics GetStatistics() const;
|
||||
void ResetStatistics();
|
||||
|
||||
private:
|
||||
GlobalFrameBudget() = default;
|
||||
~GlobalFrameBudget() = default;
|
||||
|
||||
// Disable copy/move
|
||||
GlobalFrameBudget(const GlobalFrameBudget&) = delete;
|
||||
GlobalFrameBudget& operator=(const GlobalFrameBudget&) = delete;
|
||||
|
||||
// Configuration
|
||||
static constexpr int MAX_CONCURRENT_FRAMES_BOTTLENECK = 3;
|
||||
|
||||
// State
|
||||
std::atomic<int> m_activeFrames{0};
|
||||
|
||||
// Statistics
|
||||
std::atomic<uint64_t> m_totalAcquireAttempts{0};
|
||||
std::atomic<uint64_t> m_successfulAcquires{0};
|
||||
std::atomic<uint64_t> m_rejectedAcquires{0};
|
||||
};
|
||||
|
||||
} // namespace Vav2Player
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Integration with FrameProcessor
|
||||
|
||||
### FrameProcessor Changes
|
||||
|
||||
**FrameProcessor.h additions:**
|
||||
|
||||
```cpp
|
||||
class FrameProcessor
|
||||
{
|
||||
public:
|
||||
// Processing phase query
|
||||
enum class Phase {
|
||||
INITIAL_BUFFERING,
|
||||
TRIPLE_FILLING,
|
||||
NORMAL_PLAYBACK
|
||||
};
|
||||
|
||||
Phase GetCurrentPhase() const;
|
||||
|
||||
private:
|
||||
// Track if budget slot was acquired (for proper release)
|
||||
std::atomic<bool> m_budgetSlotAcquired{false};
|
||||
};
|
||||
```
|
||||
|
||||
**FrameProcessor.cpp integration:**
|
||||
|
||||
```cpp
|
||||
bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
std::function<void(bool)> onComplete)
|
||||
{
|
||||
// Existing: Skip if previous frame still processing
|
||||
if (m_frameProcessing.load()) {
|
||||
m_framesDropped++;
|
||||
return false;
|
||||
}
|
||||
|
||||
Phase currentPhase = GetCurrentPhase();
|
||||
|
||||
// NEW: Apply GlobalFrameBudget during bottleneck phase
|
||||
if (currentPhase == Phase::TRIPLE_FILLING) {
|
||||
if (!GlobalFrameBudget::GetInstance().TryAcquireFrameSlot(
|
||||
m_playerInstanceId, m_framesDecoded)) {
|
||||
|
||||
LOGF_DEBUG("[Player#%d] Frame %llu SKIPPED (global budget limit)",
|
||||
m_playerInstanceId, m_framesDecoded.load());
|
||||
m_framesDropped++;
|
||||
return false;
|
||||
}
|
||||
m_budgetSlotAcquired = true;
|
||||
}
|
||||
|
||||
m_frameProcessing = true;
|
||||
|
||||
// ... existing decode logic ...
|
||||
|
||||
// UI thread callback with budget release
|
||||
m_dispatcherQueue.TryEnqueue([this, renderIndex, onComplete]() {
|
||||
bool renderSuccess = m_renderer->RenderFrame(renderIndex);
|
||||
|
||||
// NEW: Release budget slot after render complete
|
||||
if (m_budgetSlotAcquired.load()) {
|
||||
GlobalFrameBudget::GetInstance().ReleaseFrameSlot(m_playerInstanceId);
|
||||
m_budgetSlotAcquired = false;
|
||||
}
|
||||
|
||||
m_frameProcessing = false;
|
||||
onComplete(renderSuccess);
|
||||
});
|
||||
|
||||
m_framesDecoded++;
|
||||
return true;
|
||||
}
|
||||
|
||||
FrameProcessor::Phase FrameProcessor::GetCurrentPhase() const
|
||||
{
|
||||
uint64_t decoded = m_framesDecoded.load();
|
||||
|
||||
if (decoded < VAVCORE_NVDEC_INITIAL_BUFFERING) {
|
||||
return Phase::INITIAL_BUFFERING;
|
||||
}
|
||||
else if (decoded < VAVCORE_NVDEC_INITIAL_BUFFERING + VAV2PLAYER_TRIPLE_BUFFER_SIZE) {
|
||||
return Phase::TRIPLE_FILLING;
|
||||
}
|
||||
else {
|
||||
return Phase::NORMAL_PLAYBACK;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Simulation Scenario
|
||||
|
||||
### Timeline with 4 Players
|
||||
|
||||
```
|
||||
Initial State: m_activeFrames = 0, MAX = 3
|
||||
|
||||
t=0ms: All 4 players call Play(), start frame 0
|
||||
|
||||
Phase 1 (frames 0-15): INITIAL_BUFFERING
|
||||
- All 4 players process normally
|
||||
- No GlobalFrameBudget involvement
|
||||
- QUEUE_DELAY: 6-15ms (stable)
|
||||
|
||||
t=533ms: All 4 players reach frame 16
|
||||
|
||||
Phase 2 (frames 16-18): TRIPLE_FILLING (BOTTLENECK)
|
||||
|
||||
Frame 16:
|
||||
t=533ms: Player#0 TryAcquire → m_activeFrames: 0→1 ✅
|
||||
t=533ms: Player#1 TryAcquire → m_activeFrames: 1→2 ✅
|
||||
t=533ms: Player#2 TryAcquire → m_activeFrames: 2→3 ✅
|
||||
t=533ms: Player#3 TryAcquire → REJECTED (3 >= 3) ❌ [FRAME SKIPPED]
|
||||
|
||||
t=543ms: Player#0 render complete → Release → m_activeFrames: 3→2
|
||||
t=543ms: Player#3 ProcessFrame (retry frame 16) → TryAcquire → 2→3 ✅
|
||||
|
||||
Frame 17:
|
||||
Similar pattern: One player skips, retries after slot release
|
||||
|
||||
Frame 18:
|
||||
Similar pattern: One player skips, retries after slot release
|
||||
|
||||
t=633ms: All 4 players reach frame 19
|
||||
|
||||
Phase 3 (frames 19+): NORMAL_PLAYBACK
|
||||
- All 4 players process normally
|
||||
- No GlobalFrameBudget involvement
|
||||
- QUEUE_DELAY: 6-22ms (stable)
|
||||
|
||||
Result:
|
||||
- Bottleneck phase: 4 players → max 3 concurrent
|
||||
- NVDEC queue load: 25% reduction
|
||||
- QUEUE_DELAY: 35-42ms → ~28-33ms (within 33.33ms budget)
|
||||
- Player synchronization: Maintained (skipped frames retry immediately)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Thread Safety
|
||||
|
||||
### Lock-Free Design
|
||||
|
||||
All operations use atomic primitives for thread safety without mutexes:
|
||||
|
||||
```cpp
|
||||
bool GlobalFrameBudget::TryAcquireFrameSlot(int playerId, uint64_t frameNumber)
|
||||
{
|
||||
// Atomic read
|
||||
int current = m_activeFrames.load(std::memory_order_acquire);
|
||||
|
||||
// Check limit
|
||||
if (current >= MAX_CONCURRENT_FRAMES_BOTTLENECK) {
|
||||
return false; // Fast path rejection
|
||||
}
|
||||
|
||||
// Lock-free CAS loop
|
||||
while (current < MAX_CONCURRENT_FRAMES_BOTTLENECK) {
|
||||
if (m_activeFrames.compare_exchange_weak(current, current + 1,
|
||||
std::memory_order_acq_rel,
|
||||
std::memory_order_acquire)) {
|
||||
return true; // Successfully acquired
|
||||
}
|
||||
// compare_exchange_weak failed - current was updated, retry
|
||||
}
|
||||
|
||||
return false; // Budget exhausted during retry
|
||||
}
|
||||
|
||||
void GlobalFrameBudget::ReleaseFrameSlot(int playerId)
|
||||
{
|
||||
// Atomic decrement
|
||||
m_activeFrames.fetch_sub(1, std::memory_order_acq_rel);
|
||||
}
|
||||
```
|
||||
|
||||
### Memory Ordering Rationale
|
||||
|
||||
- **acquire/release**: Ensures proper synchronization between acquire and release operations
|
||||
- **relaxed** (statistics): Non-critical counters, accuracy not critical for correctness
|
||||
|
||||
---
|
||||
|
||||
## 8. Performance Impact
|
||||
|
||||
### Expected Improvements
|
||||
|
||||
**Before (No GlobalFrameBudget):**
|
||||
- Frames 16-18: All 4 players decode simultaneously
|
||||
- NVDEC queue: 4 concurrent submissions
|
||||
- QUEUE_DELAY: 35-42ms (exceeds 33.33ms budget)
|
||||
- Result: Stutter/frame drops
|
||||
|
||||
**After (With GlobalFrameBudget):**
|
||||
- Frames 16-18: Max 3 players decode concurrently
|
||||
- NVDEC queue: 3 concurrent submissions (25% reduction)
|
||||
- QUEUE_DELAY: ~28-33ms (within 33.33ms budget)
|
||||
- Result: Smooth playback
|
||||
|
||||
### Measured Metrics (from time.log)
|
||||
|
||||
| Metric | Without Budget | With Budget (Expected) |
|
||||
|-----------------------|----------------|------------------------|
|
||||
| QUEUE_DELAY (frame 16)| 35-42ms | 28-33ms |
|
||||
| Frames dropped | 0-2 | 1-3 (brief skip) |
|
||||
| Total playback time | ~600ms | ~650ms (+8% initially) |
|
||||
| Sync after frame 19 | Perfect | Perfect |
|
||||
|
||||
---
|
||||
|
||||
## 9. Statistics and Monitoring
|
||||
|
||||
### BudgetStatistics Structure
|
||||
|
||||
```cpp
|
||||
struct BudgetStatistics {
|
||||
uint64_t totalAcquireAttempts; // Total TryAcquireFrameSlot() calls
|
||||
uint64_t successfulAcquires; // Slots acquired successfully
|
||||
uint64_t rejectedAcquires; // Rejections due to budget limit
|
||||
double rejectionRate; // rejectedAcquires / totalAcquireAttempts
|
||||
};
|
||||
```
|
||||
|
||||
### Usage Example
|
||||
|
||||
```cpp
|
||||
// After playback test
|
||||
auto stats = GlobalFrameBudget::GetInstance().GetStatistics();
|
||||
|
||||
LOGF_INFO("GlobalFrameBudget Statistics:");
|
||||
LOGF_INFO(" Total attempts: %llu", stats.totalAcquireAttempts);
|
||||
LOGF_INFO(" Successful: %llu", stats.successfulAcquires);
|
||||
LOGF_INFO(" Rejected: %llu", stats.rejectedAcquires);
|
||||
LOGF_INFO(" Rejection rate: %.2f%%", stats.rejectionRate * 100.0);
|
||||
|
||||
// Expected results with 4 players:
|
||||
// Total attempts: ~12 (4 players × 3 frames)
|
||||
// Successful: ~12 (all eventually succeed)
|
||||
// Rejected: ~3-6 (transient rejections, immediate retry)
|
||||
// Rejection rate: ~25-50% (acceptable due to immediate retry)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Configuration Tuning
|
||||
|
||||
### MAX_CONCURRENT_FRAMES_BOTTLENECK
|
||||
|
||||
**Current value: 3**
|
||||
|
||||
Rationale:
|
||||
- 4 concurrent → 35-42ms QUEUE_DELAY (exceeds budget)
|
||||
- 3 concurrent → ~28-33ms QUEUE_DELAY (within budget)
|
||||
- 2 concurrent → Would be too conservative, longer total time
|
||||
|
||||
**Tuning guide:**
|
||||
- Increase if QUEUE_DELAY still exceeds budget
|
||||
- Decrease if want more aggressive load reduction
|
||||
- Monitor via BudgetStatistics.rejectionRate
|
||||
|
||||
### Phase Detection Thresholds
|
||||
|
||||
**Current values:**
|
||||
- INITIAL_BUFFERING: frames 0-15 (VAVCORE_NVDEC_INITIAL_BUFFERING)
|
||||
- TRIPLE_FILLING: frames 16-18 (+VAV2PLAYER_TRIPLE_BUFFER_SIZE)
|
||||
- NORMAL_PLAYBACK: frames 19+
|
||||
|
||||
**Tuning guide:**
|
||||
- Extend TRIPLE_FILLING range if stuttering persists after frame 18
|
||||
- Reduce if budget overhead is unnecessary
|
||||
|
||||
---
|
||||
|
||||
## 11. Error Handling
|
||||
|
||||
### Slot Leak Prevention
|
||||
|
||||
**Problem**: If ReleaseFrameSlot() is not called, m_activeFrames never decrements, causing permanent budget exhaustion.
|
||||
|
||||
**Solution**: Sanity check in ReleaseFrameSlot():
|
||||
|
||||
```cpp
|
||||
void GlobalFrameBudget::ReleaseFrameSlot(int playerId)
|
||||
{
|
||||
int previous = m_activeFrames.fetch_sub(1, std::memory_order_acq_rel);
|
||||
|
||||
// Sanity check
|
||||
if (previous <= 0) {
|
||||
LOGF_ERROR("[GlobalFrameBudget] Player#%d attempted to release but m_activeFrames was %d!",
|
||||
playerId, previous);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Proper Cleanup Pattern
|
||||
|
||||
```cpp
|
||||
// In FrameProcessor::ProcessFrame()
|
||||
bool slotAcquired = false;
|
||||
|
||||
if (currentPhase == Phase::TRIPLE_FILLING) {
|
||||
if (GlobalFrameBudget::GetInstance().TryAcquireFrameSlot(...)) {
|
||||
slotAcquired = true;
|
||||
} else {
|
||||
return false; // Skip frame
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure release happens in all code paths
|
||||
auto cleanup = [&]() {
|
||||
if (slotAcquired) {
|
||||
GlobalFrameBudget::GetInstance().ReleaseFrameSlot(...);
|
||||
}
|
||||
};
|
||||
|
||||
// Normal path: UI thread callback
|
||||
m_dispatcherQueue.TryEnqueue([cleanup, ...]() {
|
||||
// ... render ...
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// Error path: immediate cleanup
|
||||
if (decodeError) {
|
||||
cleanup();
|
||||
return false;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 12. Future Enhancements
|
||||
|
||||
### Adaptive Budget
|
||||
|
||||
Dynamically adjust MAX_CONCURRENT_FRAMES based on measured QUEUE_DELAY:
|
||||
|
||||
```cpp
|
||||
class AdaptiveFrameBudget : public GlobalFrameBudget
|
||||
{
|
||||
private:
|
||||
std::atomic<int> m_maxConcurrent{3}; // Dynamic limit
|
||||
|
||||
public:
|
||||
void UpdateBudget(double measuredQueueDelay) {
|
||||
if (measuredQueueDelay > 35.0) {
|
||||
// Too high, reduce concurrency
|
||||
m_maxConcurrent.store(std::max(1, m_maxConcurrent.load() - 1));
|
||||
} else if (measuredQueueDelay < 25.0) {
|
||||
// Safe margin, can increase
|
||||
m_maxConcurrent.store(std::min(4, m_maxConcurrent.load() + 1));
|
||||
}
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### Per-Decoder Budget
|
||||
|
||||
Different decoders may have different queue capacities:
|
||||
|
||||
```cpp
|
||||
struct DecoderBudget {
|
||||
int maxConcurrentNVDEC = 3;
|
||||
int maxConcurrentVPL = 4;
|
||||
int maxConcurrentAMF = 3;
|
||||
};
|
||||
```
|
||||
|
||||
### Priority-Based Slot Allocation
|
||||
|
||||
Assign priority to players for fair scheduling:
|
||||
|
||||
```cpp
|
||||
bool TryAcquireFrameSlot(int playerId, int priority, uint64_t frameNumber);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 13. Testing Plan
|
||||
|
||||
### Unit Tests
|
||||
|
||||
1. **Basic slot acquisition:**
|
||||
- Acquire 3 slots → all succeed
|
||||
- Acquire 4th slot → fail
|
||||
|
||||
2. **Slot release:**
|
||||
- Acquire 3 → release 1 → acquire 1 more → succeed
|
||||
|
||||
3. **Statistics tracking:**
|
||||
- Verify counters increment correctly
|
||||
|
||||
### Integration Tests
|
||||
|
||||
1. **Single player:**
|
||||
- GlobalFrameBudget should not interfere
|
||||
- Verify normal playback
|
||||
|
||||
2. **4 simultaneous players:**
|
||||
- Monitor QUEUE_DELAY during frames 16-18
|
||||
- Verify stays within 33.33ms budget
|
||||
- Check synchronization after frame 19
|
||||
|
||||
3. **Stress test:**
|
||||
- 8 simultaneous players
|
||||
- Verify budget prevents complete stall
|
||||
|
||||
### Performance Benchmarks
|
||||
|
||||
Compare time.log with/without GlobalFrameBudget:
|
||||
- QUEUE_DELAY distribution
|
||||
- Total frame drop count
|
||||
- Playback smoothness (subjective)
|
||||
|
||||
---
|
||||
|
||||
## 14. Implementation Checklist
|
||||
|
||||
- [x] Create GlobalFrameBudget.h
|
||||
- [x] Implement GlobalFrameBudget.cpp
|
||||
- [ ] Add Phase enum to FrameProcessor.h
|
||||
- [ ] Implement GetCurrentPhase() in FrameProcessor.cpp
|
||||
- [ ] Integrate TryAcquireFrameSlot() in ProcessFrame()
|
||||
- [ ] Integrate ReleaseFrameSlot() in UI callback
|
||||
- [ ] Add m_budgetSlotAcquired tracking
|
||||
- [ ] Add GlobalFrameBudget.cpp to Vav2Player.vcxproj
|
||||
- [ ] Build and verify compilation
|
||||
- [ ] Test with 4 simultaneous players
|
||||
- [ ] Analyze time.log for improvements
|
||||
- [ ] Document final results
|
||||
|
||||
---
|
||||
|
||||
## 15. References
|
||||
|
||||
- **Original Issue**: time.log analysis showing 35-42ms QUEUE_DELAY spikes
|
||||
- **Root Cause**: 4 players simultaneously hitting frames 16-18 (triple buffer filling)
|
||||
- **DPB Size Investigation**: DPB_SIZE=4/8 crashes, must remain 16
|
||||
- **NVDEC Spec**: min_num_decode_surfaces=9 from AV1 sequence header (codec spec, not H/W)
|
||||
|
||||
---
|
||||
|
||||
*Document Version: 1.0*
|
||||
*Last Updated: 2025-10-11*
|
||||
*Author: Claude Code*
|
||||
@@ -195,6 +195,7 @@
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\Playback\PlaybackController.h" />
|
||||
<ClInclude Include="src\Playback\FrameProcessor.h" />
|
||||
<ClInclude Include="src\Playback\GlobalFrameBudget.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ApplicationDefinition Include="App.xaml" />
|
||||
@@ -274,6 +275,7 @@
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\Playback\PlaybackController.cpp" />
|
||||
<ClCompile Include="src\Playback\FrameProcessor.cpp" />
|
||||
<ClCompile Include="src\Playback\GlobalFrameBudget.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Midl Include="MainWindow.idl">
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "pch.h"
|
||||
#include "FrameProcessor.h"
|
||||
#include "GlobalFrameBudget.h"
|
||||
#include "../Utils/DecoderTypeUtils.h"
|
||||
#include "../Logger/SimpleLogger.h"
|
||||
#include "../Logger/LogManager.h"
|
||||
@@ -78,6 +79,24 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Apply GlobalFrameBudget during bottleneck phase (TRIPLE_FILLING)
|
||||
Phase currentPhase = GetCurrentPhase();
|
||||
if (currentPhase == Phase::TRIPLE_FILLING && m_decoderType != VAVCORE_DECODER_DAV1D) {
|
||||
if (!GlobalFrameBudget::GetInstance().TryAcquireFrameSlot(m_playerInstanceId, m_framesDecoded)) {
|
||||
// Budget limit reached - wait for next tick to retry
|
||||
// DO NOT increment m_framesDecoded - we must process this frame later
|
||||
m_frameProcessing.store(false);
|
||||
|
||||
LOGF_DEBUG("[Player#%d] [FrameProcessor] Frame %llu DEFERRED (GlobalFrameBudget limit reached, will retry)",
|
||||
m_playerInstanceId, m_framesDecoded.load());
|
||||
|
||||
return false;
|
||||
}
|
||||
m_budgetSlotAcquired = true;
|
||||
LOGF_DEBUG("[Player#%d] [FrameProcessor] Frame %llu: GlobalFrameBudget slot ACQUIRED",
|
||||
m_playerInstanceId, m_framesDecoded.load());
|
||||
}
|
||||
|
||||
// Decode strategy based on decoder type
|
||||
auto decodeStart = std::chrono::high_resolution_clock::now();
|
||||
VavCoreVideoFrame vavFrame = {};
|
||||
@@ -250,6 +269,14 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
}
|
||||
}
|
||||
|
||||
// Release GlobalFrameBudget slot after render complete
|
||||
if (m_budgetSlotAcquired.load()) {
|
||||
GlobalFrameBudget::GetInstance().ReleaseFrameSlot(m_playerInstanceId);
|
||||
m_budgetSlotAcquired.store(false);
|
||||
LOGF_DEBUG("[Player#%d] [FrameProcessor] GlobalFrameBudget slot RELEASED",
|
||||
m_playerInstanceId);
|
||||
}
|
||||
|
||||
m_frameProcessing.store(false);
|
||||
|
||||
if (onComplete) {
|
||||
@@ -267,4 +294,19 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
|
||||
return true;
|
||||
}
|
||||
|
||||
FrameProcessor::Phase FrameProcessor::GetCurrentPhase() const
|
||||
{
|
||||
uint64_t decoded = m_framesDecoded.load();
|
||||
|
||||
if (decoded < VAVCORE_NVDEC_INITIAL_BUFFERING) {
|
||||
return Phase::INITIAL_BUFFERING;
|
||||
}
|
||||
else if (decoded < VAVCORE_NVDEC_INITIAL_BUFFERING + VAV2PLAYER_TRIPLE_BUFFER_SIZE) {
|
||||
return Phase::TRIPLE_FILLING;
|
||||
}
|
||||
else {
|
||||
return Phase::NORMAL_PLAYBACK;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vav2Player
|
||||
|
||||
@@ -45,6 +45,15 @@ public:
|
||||
// Check if currently processing
|
||||
bool IsProcessing() const { return m_frameProcessing; }
|
||||
|
||||
// Processing phase (for adaptive behavior)
|
||||
enum class Phase {
|
||||
INITIAL_BUFFERING, // frames 0-15: NULL surface submission to NVDEC DPB
|
||||
TRIPLE_FILLING, // frames 16-18: triple buffer filling (BOTTLENECK PHASE)
|
||||
NORMAL_PLAYBACK // frames 19+: stable rendering
|
||||
};
|
||||
|
||||
Phase GetCurrentPhase() const;
|
||||
|
||||
// Statistics
|
||||
uint64_t GetFramesDecoded() const { return m_framesDecoded; }
|
||||
uint64_t GetFramesDropped() const { return m_framesDropped; }
|
||||
@@ -64,6 +73,10 @@ private:
|
||||
// Processing state (prevents NVDEC surface queue overflow)
|
||||
std::atomic<bool> m_frameProcessing{false};
|
||||
|
||||
// GlobalFrameBudget slot state
|
||||
// True if current frame acquired a budget slot (must release after render)
|
||||
std::atomic<bool> m_budgetSlotAcquired{false};
|
||||
|
||||
// Statistics
|
||||
std::atomic<uint64_t> m_framesDecoded{0};
|
||||
std::atomic<uint64_t> m_framesDropped{0};
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
#include "pch.h"
|
||||
#include "GlobalFrameBudget.h"
|
||||
#include "../Logger/SimpleLogger.h"
|
||||
|
||||
namespace Vav2Player {
|
||||
|
||||
GlobalFrameBudget& GlobalFrameBudget::GetInstance()
|
||||
{
|
||||
static GlobalFrameBudget instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
bool GlobalFrameBudget::TryAcquireFrameSlot(int playerId, uint64_t frameNumber)
|
||||
{
|
||||
m_totalAcquireAttempts.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Load current active frame count
|
||||
int current = m_activeFrames.load(std::memory_order_acquire);
|
||||
|
||||
// Check if budget limit reached
|
||||
if (current >= MAX_CONCURRENT_FRAMES_BOTTLENECK) {
|
||||
m_rejectedAcquires.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
LOGF_DEBUG("[GlobalFrameBudget] Player#%d frame %llu REJECTED (current: %d/%d active frames)",
|
||||
playerId, frameNumber, current, MAX_CONCURRENT_FRAMES_BOTTLENECK);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to atomically increment active frame count
|
||||
// Use compare_exchange to handle race conditions
|
||||
while (current < MAX_CONCURRENT_FRAMES_BOTTLENECK) {
|
||||
if (m_activeFrames.compare_exchange_weak(current, current + 1,
|
||||
std::memory_order_acq_rel,
|
||||
std::memory_order_acquire)) {
|
||||
// Successfully acquired slot
|
||||
m_successfulAcquires.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
LOGF_DEBUG("[GlobalFrameBudget] Player#%d frame %llu ACQUIRED (active: %d/%d)",
|
||||
playerId, frameNumber, current + 1, MAX_CONCURRENT_FRAMES_BOTTLENECK);
|
||||
|
||||
return true;
|
||||
}
|
||||
// compare_exchange_weak failed - current was updated with new value, retry loop
|
||||
}
|
||||
|
||||
// Budget limit reached during retry
|
||||
m_rejectedAcquires.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
LOGF_DEBUG("[GlobalFrameBudget] Player#%d frame %llu REJECTED after retry (current: %d/%d)",
|
||||
playerId, frameNumber, current, MAX_CONCURRENT_FRAMES_BOTTLENECK);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void GlobalFrameBudget::ReleaseFrameSlot(int playerId)
|
||||
{
|
||||
int previous = m_activeFrames.fetch_sub(1, std::memory_order_acq_rel);
|
||||
|
||||
LOGF_DEBUG("[GlobalFrameBudget] Player#%d RELEASED slot (active: %d→%d)",
|
||||
playerId, previous, previous - 1);
|
||||
|
||||
// Sanity check: ensure we don't go negative
|
||||
if (previous <= 0) {
|
||||
LOGF_ERROR("[GlobalFrameBudget] Player#%d attempted to release slot but m_activeFrames was %d!",
|
||||
playerId, previous);
|
||||
}
|
||||
}
|
||||
|
||||
GlobalFrameBudget::BudgetStatistics GlobalFrameBudget::GetStatistics() const
|
||||
{
|
||||
BudgetStatistics stats;
|
||||
stats.totalAcquireAttempts = m_totalAcquireAttempts.load(std::memory_order_relaxed);
|
||||
stats.successfulAcquires = m_successfulAcquires.load(std::memory_order_relaxed);
|
||||
stats.rejectedAcquires = m_rejectedAcquires.load(std::memory_order_relaxed);
|
||||
|
||||
if (stats.totalAcquireAttempts > 0) {
|
||||
stats.rejectionRate = static_cast<double>(stats.rejectedAcquires) /
|
||||
static_cast<double>(stats.totalAcquireAttempts);
|
||||
} else {
|
||||
stats.rejectionRate = 0.0;
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
void GlobalFrameBudget::ResetStatistics()
|
||||
{
|
||||
m_totalAcquireAttempts.store(0, std::memory_order_relaxed);
|
||||
m_successfulAcquires.store(0, std::memory_order_relaxed);
|
||||
m_rejectedAcquires.store(0, std::memory_order_relaxed);
|
||||
|
||||
LOGF_INFO("[GlobalFrameBudget] Statistics reset");
|
||||
}
|
||||
|
||||
} // namespace Vav2Player
|
||||
@@ -0,0 +1,80 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
namespace Vav2Player {
|
||||
|
||||
// Global frame processing budget manager for multi-player synchronization
|
||||
//
|
||||
// Purpose: Prevent NVDEC queue overflow when multiple VideoPlayerControl2 instances
|
||||
// hit the initial buffering bottleneck (frames 16-18) simultaneously.
|
||||
//
|
||||
// Strategy: Limit concurrent frame processing during bottleneck phase to reduce
|
||||
// QUEUE_DELAY from 35-42ms to ~28-33ms (below 33.33ms budget for 30fps).
|
||||
//
|
||||
// Thread Safety: All methods are thread-safe using atomic operations.
|
||||
class GlobalFrameBudget
|
||||
{
|
||||
public:
|
||||
// Singleton instance
|
||||
static GlobalFrameBudget& GetInstance();
|
||||
|
||||
// Try to acquire permission to process a frame
|
||||
//
|
||||
// Parameters:
|
||||
// playerId: Unique player instance ID (for logging)
|
||||
// frameNumber: Current frame number (for statistics)
|
||||
//
|
||||
// Returns: true if slot acquired (proceed with decode), false if should skip
|
||||
//
|
||||
// Thread Safety: Uses atomic compare_exchange for lock-free slot acquisition
|
||||
bool TryAcquireFrameSlot(int playerId, uint64_t frameNumber);
|
||||
|
||||
// Release frame slot after processing complete
|
||||
//
|
||||
// Must be called after decode+render completes to avoid slot leaks
|
||||
//
|
||||
// Thread Safety: Uses atomic fetch_sub
|
||||
void ReleaseFrameSlot(int playerId);
|
||||
|
||||
// Query current active frame count (for debugging/monitoring)
|
||||
int GetActiveFrameCount() const { return m_activeFrames.load(); }
|
||||
|
||||
// Statistics for performance analysis
|
||||
struct BudgetStatistics {
|
||||
uint64_t totalAcquireAttempts; // Total TryAcquireFrameSlot() calls
|
||||
uint64_t successfulAcquires; // Successful slot acquisitions
|
||||
uint64_t rejectedAcquires; // Rejected due to budget limit
|
||||
double rejectionRate; // rejectedAcquires / totalAcquireAttempts
|
||||
};
|
||||
|
||||
BudgetStatistics GetStatistics() const;
|
||||
void ResetStatistics();
|
||||
|
||||
private:
|
||||
GlobalFrameBudget() = default;
|
||||
~GlobalFrameBudget() = default;
|
||||
|
||||
// Disable copy/move
|
||||
GlobalFrameBudget(const GlobalFrameBudget&) = delete;
|
||||
GlobalFrameBudget& operator=(const GlobalFrameBudget&) = delete;
|
||||
|
||||
// Configuration: Maximum concurrent frames during bottleneck phase
|
||||
//
|
||||
// Rationale: With 4 players simultaneously processing frames 16-18:
|
||||
// - Without limit: QUEUE_DELAY 35-42ms (exceeds 33.33ms budget)
|
||||
// - With limit=3: QUEUE_DELAY ~28-33ms (within budget)
|
||||
// - 25% load reduction prevents NVDEC queue overflow
|
||||
static constexpr int MAX_CONCURRENT_FRAMES_BOTTLENECK = 3;
|
||||
|
||||
// State
|
||||
std::atomic<int> m_activeFrames{0};
|
||||
|
||||
// Statistics
|
||||
std::atomic<uint64_t> m_totalAcquireAttempts{0};
|
||||
std::atomic<uint64_t> m_successfulAcquires{0};
|
||||
std::atomic<uint64_t> m_rejectedAcquires{0};
|
||||
};
|
||||
|
||||
} // namespace Vav2Player
|
||||
Reference in New Issue
Block a user