D3D12VideoRenderer Layered Architecture - Final Design v3

This commit is contained in:
2025-10-06 13:55:43 +09:00
parent 0019f2b106
commit 77b6246c67
25 changed files with 3066 additions and 7273 deletions

View File

@@ -118,7 +118,14 @@
"Bash(\"/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/bin/nvcc.exe\" -ptx -arch=sm_75 rgba_surface_write_kernel.cu -o rgba_surface_write_kernel.ptx)",
"Bash(\"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/bin/nvcc.exe\" -ptx -arch=sm_75 -ccbin \"C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/bin/Hostx64/x64\" rgba_surface_write_kernel.cu -o rgba_surface_write_kernel.ptx)",
"Bash(\"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/bin/nvcc.exe\" -ptx -arch=sm_75 -ccbin \"C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/bin/Hostx64/x64\" rgba_surface_write_kernel.cu -o rgba_surface_write_kernel.ptx)",
"Bash(\"D:/Project/video-av1/vav2/platforms/windows/tests/red-surface-nvdec/bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_4px_stripe_720p_av1.webm\")"
"Bash(\"D:/Project/video-av1/vav2/platforms/windows/tests/red-surface-nvdec/bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_4px_stripe_720p_av1.webm\")",
"Bash(__NEW_LINE__ sed -i 's/GetSupportedFormat/GetSupportedFormatId/g' RGBASurfaceBackend.h RGBASurfaceBackend.cpp YUV420PUploadBackend.h YUV420PUploadBackend.cpp D3D12VideoRenderer.h D3D12VideoRenderer.cpp)",
"Bash(__NEW_LINE__ sed -i 's/VAVCORE_COLOR_SPACE_RGB32/1/g' RGBASurfaceBackend.h)",
"Bash(__NEW_LINE__ sed -i 's/VAVCORE_COLOR_SPACE_YUV420P/2/g' YUV420PUploadBackend.h)",
"Bash(__NEW_LINE__ sed -i 's/VavCoreColorSpace colorSpace/const VavCoreVideoFrame\\& frame/g' D3D12VideoRenderer.h D3D12VideoRenderer.cpp)",
"Bash(__NEW_LINE__ sed -i 's/VavCoreColorSpace m_lastFrameFormat = VAVCORE_COLOR_SPACE_UNKNOWN;/uint32_t m_videoWidth = 0;\\n uint32_t m_videoHeight = 0;/g' D3D12VideoRenderer.h)",
"Bash(__NEW_LINE__ sed -i 's/frame\\.color_space/frame.surface_type/g' D3D12VideoRenderer.cpp YUV420PUploadBackend.cpp)",
"Bash(__NEW_LINE__ sed -i '2a#include \"\"d3dx12.h\"\"' RGBASurfaceBackend.cpp)"
],
"deny": [],
"ask": []

View File

@@ -1,299 +0,0 @@
<#
.SYNOPSIS
Find processes that have open file handles to a specific file or directory.
.DESCRIPTION
Uses Sysinternals handle.exe (NtHandle v5.0) to find all processes holding handles
to the specified file or directory path. Supports detailed output and process information.
.PARAMETER Path
The file or directory path to search for. If not specified, prompts for input.
.PARAMETER HandleExePath
Path to handle.exe. If not specified, auto-detects from common locations.
.PARAMETER ShowDetails
Show detailed handle information including handle ID, type, and user.
.PARAMETER OutputFile
Save results to the specified file (e.g., handle.out).
.EXAMPLE
.\Find-FileHandles.ps1 -Path "D:\Project\video-av1\vav2\docs"
.EXAMPLE
.\Find-FileHandles.ps1 -Path "D:\Project\video-av1\sample\test.webm" -ShowDetails
.EXAMPLE
.\Find-FileHandles.ps1 -Path "D:\Project" -OutputFile "handle.out"
.EXAMPLE
.\Find-FileHandles.ps1 -Path "D:\Project" -ShowDetails -OutputFile "handle.out"
#>
param (
[Parameter(Position=0)]
[string]$Path,
[Parameter(Mandatory=$false)]
[string]$HandleExePath,
[Parameter(Mandatory=$false)]
[switch]$ShowDetails,
[Parameter(Mandatory=$false)]
[string]$OutputFile
)
# 콘솔 인코딩 설정 (한글 깨짐 방지)
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
$OutputEncoding = [System.Text.Encoding]::UTF8
# Path 파라미터가 없으면 사용자에게 입력 받기
if (-not $Path) {
$Path = Read-Host "Enter the file or directory path to search for handles"
if (-not $Path) {
Write-Host "[ERROR] No path specified." -ForegroundColor Red
exit 1
}
}
# 경로 검증 및 정규화
if (Test-Path $Path) {
$TargetPath = (Resolve-Path $Path).Path
} else {
Write-Host "[WARNING] Path does not exist: $Path" -ForegroundColor Yellow
Write-Host "Searching anyway (path might be locked or inaccessible)..." -ForegroundColor Gray
$TargetPath = $Path
}
# --- CONFIGURATION ---
# handle.exe 경로 자동 탐지
if (-not $HandleExePath) {
$possiblePaths = @(
"C:\Sysinternals\handle.exe",
"$env:ProgramFiles\Sysinternals\handle.exe",
"$env:USERPROFILE\Downloads\handle.exe",
"$env:TEMP\handle.exe",
"handle.exe" # PATH에 있는 경우
)
foreach ($path in $possiblePaths) {
if (Test-Path $path) {
$HandleExePath = $path
break
}
}
# PATH에서 handle.exe 찾기
if (-not $HandleExePath) {
try {
$HandleExePath = (Get-Command "handle.exe" -ErrorAction Stop).Source
} catch {
$HandleExePath = $null
}
}
}
# ---------------------
# handle.exe 파일 존재 여부 확인
if (-not $HandleExePath -or -not (Test-Path $HandleExePath)) {
Write-Host "[ERROR] handle.exe not found!" -ForegroundColor Red
Write-Host ""
Write-Host "Please download handle.exe from:" -ForegroundColor Cyan
Write-Host "https://learn.microsoft.com/en-us/sysinternals/downloads/handle" -ForegroundColor Cyan
Write-Host ""
Write-Host "And place it in one of these locations:" -ForegroundColor Green
@(
"C:\Sysinternals\handle.exe",
"$env:ProgramFiles\Sysinternals\handle.exe",
"Or add it to your PATH"
) | ForEach-Object { Write-Host " - $_" -ForegroundColor Yellow }
exit 1
}
# 타이틀 박스 동적 생성
$title = "Scanning for handles to: $TargetPath"
$boxWidth = [Math]::Max($title.Length + 4, 60)
$padding = $boxWidth - $title.Length - 2
Write-Host "$('═' * $boxWidth)" -ForegroundColor Cyan
Write-Host "$title$(' ' * $padding)" -ForegroundColor Cyan
Write-Host "$('═' * $boxWidth)" -ForegroundColor Cyan
Write-Host ""
# Execute handle.exe to get all open file handle information
$handleOutput = ""
$outputLines = @()
try {
Write-Host "[*] Executing handle.exe (this may take 10-30 seconds)..." -ForegroundColor Gray
# 진행 상황 표시를 위한 타이머
$stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
# handle.exe를 비동기로 실행하고 진행 표시
$job = Start-Job -ScriptBlock {
param($exePath, $targetPath)
& $exePath -accepteula -nobanner "$targetPath" 2>&1
} -ArgumentList $HandleExePath, $TargetPath
# 진행 표시
$spinnerChars = @('|', '/', '-', '\')
$spinnerIndex = 0
while ($job.State -eq 'Running') {
$elapsed = $stopwatch.Elapsed.TotalSeconds
Write-Host "`r[*] Scanning system handles... $($spinnerChars[$spinnerIndex]) [$([math]::Round($elapsed, 1))s]" -NoNewline -ForegroundColor Yellow
$spinnerIndex = ($spinnerIndex + 1) % 4
Start-Sleep -Milliseconds 200
}
$stopwatch.Stop()
Write-Host "`r[✓] Scan completed in $([math]::Round($stopwatch.Elapsed.TotalSeconds, 1))s " -ForegroundColor Green
# 결과 가져오기
$handleOutput = Receive-Job -Job $job
Remove-Job -Job $job
$outputLines = $handleOutput
}
catch {
Write-Host "[ERROR] Failed to execute handle.exe: $($_.Exception.Message)" -ForegroundColor Red
exit 1
}
# 결과를 저장할 배열 초기화 (ArrayList 사용으로 성능 향상)
$foundProcesses = New-Object System.Collections.ArrayList
$currentProcess = $null
# 디버깅: 출력 라인 수 표시
Write-Host "[DEBUG] Total output lines: $($outputLines.Count)" -ForegroundColor DarkGray
# Analyze handle.exe output line by line
foreach ($line in $outputLines) {
# "No matching handles found." 메시지 체크
if ($line -match "No matching handles found") {
Write-Host "✓ No processes are holding handles to this path." -ForegroundColor Green
exit 0
}
# Process line format: "processname.exe pid: 1234 type: File user: DOMAIN\User"
# 또는: "processname.exe pid: 1234 user: DOMAIN\User"
if ($line -match '^(\S+\.exe)\s+pid:\s+(\d+)\s+(.*)$') {
$processName = $matches[1]
$processId = [int]$matches[2]
$remainder = $matches[3]
# user 정보 추출
$userName = ""
if ($remainder -match 'user:\s+([^\s]+)') {
$userName = $matches[1]
}
$currentProcess = [PSCustomObject]@{
ProcessName = $processName
PID = $processId
User = $userName
Handles = New-Object System.Collections.ArrayList
}
[void]$foundProcesses.Add($currentProcess)
Write-Host "[DEBUG] Found process: $processName (PID: $processId)" -ForegroundColor DarkGray
}
# Handle detail line format: " 1A4: File (RWD) C:\path\to\file"
# 또는 간단한 형식: " 1A4: C:\path\to\file"
elseif ($line -match '^\s+([0-9A-F]+):\s+(.+)$' -and $currentProcess) {
$handleId = $matches[1]
$handleDetails = $matches[2].Trim()
# Type 정보가 있는지 확인
$handleType = "File"
if ($handleDetails -match '^(\w+)\s+(.+)$') {
$handleType = $matches[1]
$handleDetails = $matches[2].Trim()
}
$handleInfo = [PSCustomObject]@{
HandleID = $handleId
Type = $handleType
Details = $handleDetails
}
[void]$currentProcess.Handles.Add($handleInfo)
Write-Host "[DEBUG] Handle: $handleId -> $handleDetails" -ForegroundColor DarkGray
}
}
# 핸들이 없는 프로세스 제거
$processesWithHandles = New-Object System.Collections.ArrayList
foreach ($proc in $foundProcesses) {
if ($proc.Handles.Count -gt 0) {
[void]$processesWithHandles.Add($proc)
} else {
Write-Host "[DEBUG] Removing process $($proc.ProcessName) (no handles found)" -ForegroundColor DarkGray
}
}
$foundProcesses = $processesWithHandles
# 파일로 출력 (옵션)
if ($OutputFile) {
$outputLines | Out-File -FilePath $OutputFile -Encoding UTF8
Write-Host "[+] Raw output saved to: $OutputFile" -ForegroundColor Green
}
# 결과 출력
if ($foundProcesses.Count -eq 0) {
Write-Host "✓ No processes are holding handles to this path." -ForegroundColor Green
exit 0
}
Write-Host "⚠ Found $($foundProcesses.Count) process(es) with open handles:" -ForegroundColor Yellow
Write-Host ""
foreach ($proc in $foundProcesses) {
Write-Host "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -ForegroundColor DarkGray
Write-Host "Process: " -NoNewline
Write-Host "$($proc.ProcessName)" -ForegroundColor White -NoNewline
Write-Host " (PID: " -NoNewline
Write-Host "$($proc.PID)" -ForegroundColor Cyan -NoNewline
Write-Host ")"
if ($proc.User) {
Write-Host " User: " -NoNewline -ForegroundColor Gray
Write-Host "$($proc.User)" -ForegroundColor Yellow
}
Write-Host " Handles: " -NoNewline -ForegroundColor Gray
Write-Host "$($proc.Handles.Count)" -ForegroundColor Magenta
# 항상 핸들 상세 정보 표시
if ($proc.Handles.Count -gt 0) {
Write-Host ""
foreach ($handle in $proc.Handles) {
if ($ShowDetails) {
# 상세 모드: Handle ID와 Type도 표시
Write-Host " ├─ Handle ID: " -NoNewline -ForegroundColor DarkGray
Write-Host "$($handle.HandleID)" -ForegroundColor Cyan
Write-Host " │ Type: " -NoNewline -ForegroundColor DarkGray
Write-Host "$($handle.Type)" -ForegroundColor Green
Write-Host " │ Path: " -NoNewline -ForegroundColor DarkGray
Write-Host "$($handle.Details)" -ForegroundColor White
Write-Host "" -ForegroundColor DarkGray
} else {
# 기본 모드: 경로만 표시
Write-Host "" -NoNewline -ForegroundColor Yellow
Write-Host "$($handle.Details)" -ForegroundColor White
}
}
}
Write-Host ""
}
Write-Host "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -ForegroundColor DarkGray
Write-Host ""
Write-Host "💡 To close these handles:" -ForegroundColor Yellow
Write-Host " 1. Close the applications manually" -ForegroundColor Gray
Write-Host " 2. Use Task Manager to end the processes" -ForegroundColor Gray
Write-Host " 3. Run: " -NoNewline -ForegroundColor Gray
Write-Host "Stop-Process -Id <PID> -Force" -ForegroundColor White
Write-Host ""

View File

@@ -0,0 +1,538 @@
# D3D12VideoRenderer Layered Architecture - Final Design v3
**Date**: 2025-10-06
**Status**: ✅ **FINAL APPROVED DESIGN** - Format + Method Naming Convention
**Supersedes**: SimpleGPURenderer_Layered_Architecture_Design_v2.md
**Key Decision**: Use Surface/Upload/Direct method naming (NO Hardware/Software)
---
## 🎯 Final Naming Convention
**Format**: `{PixelFormat}{Method}Backend`
**Approved Methods**:
- **Surface**: CUDA Surface Objects for tiled texture write
- **Upload**: CPU upload buffers + GPU compute shader
- **Direct**: Direct GPU rendering (future)
**Rejected Methods**:
- ❌ Hardware/Software - Too implementation-focused, not descriptive
---
## 📊 Final Backend Architecture
```
D3D12VideoRenderer (orchestrator)
├── RGBASurfaceBackend (handles VAVCORE_COLOR_SPACE_RGB32)
├── YUV420PUploadBackend (handles VAVCORE_COLOR_SPACE_YUV420P)
└── NV12DirectBackend (handles VAVCORE_COLOR_SPACE_NV12) [future]
```
**File Mapping**:
| Old Code | New Backend | Format + Method | Implementation |
|----------|-------------|-----------------|----------------|
| SimpleGPURenderer RGBA | `RGBASurfaceBackend` | RGB32 + Surface | NVDEC → CUDA RGBA → surf2Dwrite() → D3D12 |
| D3D12VideoRenderer (old) | `YUV420PUploadBackend` | YUV420P + Upload | dav1d → CPU upload → GPU YUV→RGB shader |
| Future NV12 | `NV12DirectBackend` | NV12 + Direct | NVDEC → D3D12 NV12 → Direct rendering |
**Benefits**:
-**Format clarity**: First word = pixel format (RGBA, YUV420P, NV12)
-**Method clarity**: Second word = rendering method (Surface, Upload, Direct)
-**Direct mapping**: Easy to map `VavCoreColorSpace` → backend class
-**No ambiguity**: "Surface" = CUDA Surface Objects, "Upload" = CPU buffers, "Direct" = GPU-direct
**Code Example**:
```cpp
void D3D12VideoRenderer::SelectBackend(const VavCoreVideoFrame& frame) {
switch (frame.color_space) {
case VAVCORE_COLOR_SPACE_RGB32:
m_activeBackend = m_rgbaSurfaceBackend.get(); // Surface method
break;
case VAVCORE_COLOR_SPACE_YUV420P:
m_activeBackend = m_yuv420pUploadBackend.get(); // Upload method
break;
case VAVCORE_COLOR_SPACE_NV12:
m_activeBackend = m_nv12DirectBackend.get(); // Direct method
break;
}
}
```
---
## 🚫 Rejected Naming Approaches
### ❌ Hardware/Software Naming
```cpp
// REJECTED - Too implementation-focused
RGBAHardwareBackend // What "hardware"? GPU? NVDEC? Confusing
YUV420PSoftwareBackend // Still uses GPU shaders, not really "software"
```
**Why rejected**: "Hardware/Software" describes implementation internals, not the rendering method visible to users
---
## ✅ Why Surface/Upload/Direct Works Better
**Surface (CUDA Surface Objects)**:
- Describes the actual mechanism: Writing to D3D12 tiled textures via CUDA surfaces
- Clear technical distinction from linear buffers
- Indicates GPU-direct write capability
**Upload (CPU Upload Buffers)**:
- Describes the actual mechanism: CPU writes to upload heaps → GPU copy
- Familiar concept in graphics programming
- Indicates CPU involvement in data transfer
**Direct (Direct GPU Rendering)**:
- Describes the actual mechanism: GPU renders directly without format conversion
- Future-proof naming for hardware-decoded NV12
- Indicates zero-copy GPU pipeline
---
## 📐 Architecture Diagram
```
┌─────────────────────────────────────────────────────────────┐
│ IVideoRenderer │
│ (Public API - unchanged) │
└─────────────────────────────────────────────────────────────┘
│ implements
┌─────────────────────────────────────────────────────────────┐
│ D3D12VideoRenderer │
│ (Orchestrator - format-agnostic) │
│ │
│ Responsibilities: │
│ - D3D12 device, command queue, swap chain │
│ - Backend selection by color_space │
│ - Delegation to active backend │
│ - ~300 lines │
└─────────────────────────────────────────────────────────────┘
│ delegates to
┌───────────────────┴───────────────────────────┐
│ │ │
┌───────▼─────────┐ ┌──────▼────────────┐ ┌──────▼──────────┐
│ RGBASurface │ │ YUV420PUpload │ │ NV12Direct │
│ Backend │ │ Backend │ │ Backend │
│ │ │ │ │ │
│ Format: RGB32 │ │ Format: YUV420P │ │ Format: NV12 │
│ Method: Surface │ │ Method: Upload │ │ Method: Direct │
│ │ │ │ │ │
│ Source: │ │ Source: │ │ Source: │
│ SimpleGPU │ │ D3D12Video │ │ Future │
│ Renderer │ │ Renderer (old) │ │ │
│ RGBA path │ │ │ │ │
│ │ │ │ │ │
│ Pipeline: │ │ Pipeline: │ │ Pipeline: │
│ NVDEC NV12 → │ │ dav1d YUV → │ │ NVDEC NV12 → │
│ CUDA RGBA → │ │ CPU upload → │ │ D3D12 NV12 → │
│ surf2Dwrite() → │ │ GPU YUV→RGB → │ │ Direct render → │
│ D3D12 RGBA → │ │ Render │ │ Present │
│ Sampling │ │ │ │ │
│ │ │ │ │ │
│ ~400 lines │ │ ~2000 lines │ │ TBD │
└─────────────────┘ └───────────────────┘ └─────────────────┘
```
---
## 📂 Final File Structure
```
src/Rendering/
├── IVideoRenderer.h # Public interface
├── D3D12VideoRenderer.h/.cpp # Orchestrator (~300 lines)
├── IVideoBackend.h # Internal backend interface
├── RGBASurfaceBackend.h/.cpp # RGBA Surface backend (~400 lines)
│ │ Extracted from: SimpleGPURenderer RGBA path
│ │ Handles: VAVCORE_COLOR_SPACE_RGB32
│ │ Method: CUDA Surface Objects (surf2Dwrite)
│ │ Pipeline: NVDEC → CUDA RGBA → surf2Dwrite() → D3D12 RGBA → sampling
├── YUV420PUploadBackend.h/.cpp # YUV420P Upload backend (~2000 lines)
│ │ Renamed from: D3D12VideoRenderer (old)
│ │ Handles: VAVCORE_COLOR_SPACE_YUV420P
│ │ Method: CPU upload buffers + GPU shader
│ │ Pipeline: dav1d → CPU upload → GPU YUV→RGB shader → render
└── NV12DirectBackend.h/.cpp # NV12 Direct backend (future)
│ Handles: VAVCORE_COLOR_SPACE_NV12
│ Method: Direct GPU rendering (zero-copy)
│ Pipeline: NVDEC → D3D12 NV12 → Direct render → present
Legacy/ (archived)
└── SimpleGPURenderer_Legacy.h/.cpp # Old mixed-format renderer
```
---
## 🎯 Backend Responsibilities
### IVideoBackend Interface
```cpp
class IVideoBackend {
public:
virtual ~IVideoBackend() = default;
// Lifecycle
virtual HRESULT Initialize(
ID3D12Device* device,
ID3D12CommandQueue* commandQueue,
uint32_t width, uint32_t height) = 0;
virtual void Shutdown() = 0;
virtual bool IsInitialized() const = 0;
// Video texture for CUDA interop (nullptr if not applicable)
virtual HRESULT CreateVideoTexture(uint32_t width, uint32_t height) = 0;
virtual ID3D12Resource* GetVideoTexture() const = 0;
// Render frame to back buffer
virtual HRESULT RenderToBackBuffer(
const VavCoreVideoFrame& frame,
ID3D12Resource* backBuffer,
ID3D12GraphicsCommandList* commandList) = 0;
// Format this backend handles
virtual VavCoreColorSpace GetSupportedFormat() const = 0;
};
```
---
### RGBASurfaceBackend
**Handles**: `VAVCORE_COLOR_SPACE_RGB32`
**Method**: CUDA Surface Objects (surf2Dwrite)
```cpp
class RGBASurfaceBackend : public IVideoBackend {
public:
VavCoreColorSpace GetSupportedFormat() const override {
return VAVCORE_COLOR_SPACE_RGB32;
}
HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
// Creates: DXGI_FORMAT_R8G8B8A8_UNORM texture with D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
// Enables CUDA Surface Object creation via cudaExternalMemoryGetMappedMipmappedArray
HRESULT RenderToBackBuffer(...) override;
// Pipeline: Simple RGBA texture sampling (no YUV conversion needed)
private:
ComPtr<ID3D12Resource> m_rgbaTexture; // Tiled RGBA texture
ComPtr<ID3D12PipelineState> m_pipelineState;
ComPtr<ID3D12RootSignature> m_rootSignature;
// Simple texture sampling shader (no YUV conversion)
};
```
**Source**: Extracted from `SimpleGPURenderer` RGBA path
**Size**: ~400 lines
**Key Feature**: Uses CUDA Surface Objects for tiled texture write (surf2Dwrite)
---
### YUV420PUploadBackend
**Handles**: `VAVCORE_COLOR_SPACE_YUV420P`
**Method**: CPU upload buffers + GPU shader
```cpp
class YUV420PUploadBackend : public IVideoBackend {
public:
VavCoreColorSpace GetSupportedFormat() const override {
return VAVCORE_COLOR_SPACE_YUV420P;
}
HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
// Creates: Separate Y/U/V textures + CPU upload buffers (ring buffer system)
HRESULT RenderToBackBuffer(...) override;
// Pipeline:
// 1. CPU writes to upload buffers (ring buffer system, persistent mapped memory)
// 2. GPU copies upload → textures (CopyTextureRegion)
// 3. YUV→RGB compute shader (GPU conversion)
// 4. Render to back buffer
// Legacy D3D12VideoRenderer methods (preserved for compatibility)
uint8_t* GetYMappedBuffer(uint32_t bufferIndex) const;
uint8_t* GetUMappedBuffer(uint32_t bufferIndex) const;
uint8_t* GetVMappedBuffer(uint32_t bufferIndex) const;
private:
// Ring buffer system (from old D3D12VideoRenderer)
struct RingBufferSlot {
ComPtr<ID3D12Resource> yUploadBuffer; // D3D12_HEAP_TYPE_UPLOAD
ComPtr<ID3D12Resource> uUploadBuffer;
ComPtr<ID3D12Resource> vUploadBuffer;
uint8_t* yMappedData; // Persistent CPU mapping
uint8_t* uMappedData;
uint8_t* vMappedData;
};
std::vector<RingBufferSlot> m_ringBuffers;
ComPtr<ID3D12Resource> m_yTexture; // GPU textures (D3D12_HEAP_TYPE_DEFAULT)
ComPtr<ID3D12Resource> m_uTexture;
ComPtr<ID3D12Resource> m_vTexture;
ComPtr<ID3D12PipelineState> m_yuvToRgbPipeline; // YUV→RGB compute shader
};
```
**Source**: Renamed from `D3D12VideoRenderer` (old)
**Size**: ~2000 lines (preserves all existing logic)
**Key Feature**: Persistent CPU mapped upload buffers with ring buffer system
---
### NV12DirectBackend (Future)
**Handles**: `VAVCORE_COLOR_SPACE_NV12`
**Method**: Direct GPU rendering (zero-copy)
```cpp
class NV12DirectBackend : public IVideoBackend {
public:
VavCoreColorSpace GetSupportedFormat() const override {
return VAVCORE_COLOR_SPACE_NV12;
}
HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
// Creates: DXGI_FORMAT_NV12 texture (when D3D12 tiled NV12 is viable)
// Zero-copy: NVDEC writes directly to D3D12 texture
HRESULT RenderToBackBuffer(...) override;
// Pipeline: NVDEC → D3D12 NV12 → Direct YUV→RGB shader → Render
// No CPU involvement, no format conversion, pure GPU path
private:
ComPtr<ID3D12Resource> m_nv12Texture; // Tiled NV12 texture
ComPtr<ID3D12PipelineState> m_nv12ToRgbPipeline; // Direct YUV→RGB shader
};
```
**Status**: Not implemented yet (requires D3D12 tiled NV12 support resolution)
**Key Feature**: Zero-copy GPU pipeline (NVDEC → D3D12 direct write)
---
## 🔄 Backend Selection Logic
```cpp
class D3D12VideoRenderer : public IVideoRenderer {
public:
HRESULT RenderVideoFrame(const VavCoreVideoFrame& frame) override {
// Select backend based on frame color space
IVideoBackend* backend = SelectBackend(frame.color_space);
if (!backend) {
return E_FAIL;
}
// Get current back buffer
ID3D12Resource* backBuffer = m_renderTargets[m_frameIndex].Get();
// Delegate rendering to backend
return backend->RenderToBackBuffer(frame, backBuffer, m_commandList.Get());
}
private:
IVideoBackend* SelectBackend(VavCoreColorSpace colorSpace) {
switch (colorSpace) {
case VAVCORE_COLOR_SPACE_RGB32:
if (!m_rgbaSurfaceBackend) {
m_rgbaSurfaceBackend = std::make_unique<RGBASurfaceBackend>();
m_rgbaSurfaceBackend->Initialize(m_device.Get(), m_commandQueue.Get(),
m_width, m_height);
}
return m_rgbaSurfaceBackend.get();
case VAVCORE_COLOR_SPACE_YUV420P:
if (!m_yuv420pUploadBackend) {
m_yuv420pUploadBackend = std::make_unique<YUV420PUploadBackend>();
m_yuv420pUploadBackend->Initialize(m_device.Get(), m_commandQueue.Get(),
m_width, m_height);
}
return m_yuv420pUploadBackend.get();
case VAVCORE_COLOR_SPACE_NV12:
// Future: NV12DirectBackend
if (!m_nv12DirectBackend) {
m_nv12DirectBackend = std::make_unique<NV12DirectBackend>();
m_nv12DirectBackend->Initialize(m_device.Get(), m_commandQueue.Get(),
m_width, m_height);
}
return m_nv12DirectBackend.get();
default:
return nullptr;
}
}
std::unique_ptr<RGBASurfaceBackend> m_rgbaSurfaceBackend; // Surface method
std::unique_ptr<YUV420PUploadBackend> m_yuv420pUploadBackend; // Upload method
std::unique_ptr<NV12DirectBackend> m_nv12DirectBackend; // Direct method (future)
};
```
---
## 📊 Naming Consistency Table
| Backend Class | Format Enum | Method | Pixel Layout | Pipeline | File Origin |
|---------------|-------------|--------|--------------|----------|-------------|
| `RGBASurfaceBackend` | `VAVCORE_COLOR_SPACE_RGB32` | Surface | RGBA (4 bytes/pixel) | NVDEC → CUDA surf2Dwrite() → D3D12 | SimpleGPURenderer |
| `YUV420PUploadBackend` | `VAVCORE_COLOR_SPACE_YUV420P` | Upload | Planar YUV 4:2:0 | dav1d → CPU upload → GPU shader | D3D12VideoRenderer (old) |
| `NV12DirectBackend` | `VAVCORE_COLOR_SPACE_NV12` | Direct | Semi-planar NV12 | NVDEC → D3D12 direct → Render | Future |
**Naming Rule**: `{PixelFormat}{Method}Backend`
- **Format-first**: Clear pixel format (RGBA, YUV420P, NV12)
- **Method-second**: Rendering method (Surface, Upload, Direct)
- **Direct 1:1 mapping**: VavCoreColorSpace enum → backend class
- **No ambiguity**: Method names describe actual mechanism, not implementation details
---
## 📝 Implementation Plan
### Phase 1: Create Backend Infrastructure
**Goal**: Establish base interfaces and RGBA Surface backend
**Tasks**:
1. Create `IVideoBackend.h` interface
2. Create `RGBASurfaceBackend.h/.cpp`
3. Extract RGBA Surface logic from SimpleGPURenderer
4. Test RGBASurfaceBackend independently
**Estimated Time**: 2 hours
---
### Phase 2: Transform D3D12VideoRenderer → YUV420PUploadBackend
**Goal**: Repurpose existing code as Upload backend
**Tasks**:
1. Rename files: `D3D12VideoRenderer.*``YUV420PUploadBackend.*`
2. Rename class: `D3D12VideoRenderer``YUV420PUploadBackend`
3. Implement `IVideoBackend` interface
4. Remove swap chain ownership (delegate to orchestrator)
5. Test YUV420PUploadBackend independently
**Estimated Time**: 1.5 hours
---
### Phase 3: Create New D3D12VideoRenderer Orchestrator
**Goal**: Build thin orchestrator from scratch
**Tasks**:
1. Create new `D3D12VideoRenderer.h/.cpp`
2. Implement IVideoRenderer interface
3. Implement backend selection logic
4. Test with RGBASurfaceBackend
5. Test with YUV420PUploadBackend
6. Test dynamic backend switching
**Estimated Time**: 1.5 hours
---
### Phase 4: Archive Legacy Code
**Goal**: Clean up old SimpleGPURenderer
**Tasks**:
1. Create `src/Rendering/Legacy/` directory
2. Move `SimpleGPURenderer``SimpleGPURenderer_Legacy`
3. Update all references to new `D3D12VideoRenderer`
4. Verify all tests pass
5. Update documentation
**Estimated Time**: 1 hour
**Total Estimated Time**: 6 hours
---
## ✅ Success Criteria
### Functional
- ✅ NVDEC RGBA rendering works (via RGBASurfaceBackend)
- ✅ CPU YUV rendering works (via YUV420PUploadBackend)
- ✅ Backend auto-selection by color_space
- ✅ No visual regressions
- ✅ All existing tests pass
### Code Quality
- ✅ D3D12VideoRenderer < 400 lines
- ✅ Each backend handles exactly 1 format with 1 method
- ✅ Consistent format+method naming (Surface/Upload/Direct)
- ✅ No format-specific if/else in orchestrator
### Maintainability
- ✅ Adding new format = add `{Format}{Method}Backend` class only
- ✅ Each backend independently testable
- ✅ Clear mapping: `VavCoreColorSpace` → Backend class → Rendering method
---
## 🎯 Why This Design Wins
### 1. Naming Clarity
```cpp
// Clear from class name what format AND method it uses:
RGBASurfaceBackend RGB32 format + CUDA Surface write
YUV420PUploadBackend YUV420P format + CPU upload buffers
NV12DirectBackend NV12 format + Direct GPU rendering
```
### 2. Code Reuse
```cpp
// Zero rewrite of proven code:
D3D12VideoRenderer (old, 2581 lines) YUV420PUploadBackend (2000 lines, same logic)
```
### 3. Extensibility
```cpp
// Adding new format+method is trivial:
case VAVCORE_COLOR_SPACE_VP9:
return m_vp9UploadBackend.get(); // Just add one line!
```
### 4. Testability
```cpp
// Each backend tests independently:
TEST(RGBASurfaceBackend, RenderFrame) {
VavCoreVideoFrame frame;
frame.color_space = VAVCORE_COLOR_SPACE_RGB32;
// Test RGBA Surface rendering in isolation
}
```
---
## 📚 References
- **VavCore Color Space**: `VavCore/VavCore.h``VavCoreColorSpace` enum
- **Old Code**: `D3D12VideoRenderer.cpp` (2581 lines, YUV420P)
- **Old Code**: `SimpleGPURenderer.cpp` (2105 lines, mixed RGBA/YUV)
- **Previous Design**: `SimpleGPURenderer_Layered_Architecture_Design_v2.md`
---
**Status**: ✅ **FINAL DESIGN APPROVED (v3)**
**Key Decision**: Format + Method naming (`{PixelFormat}{Method}Backend`)
**Approved Methods**: Surface, Upload, Direct (NO Hardware/Software)
**Next Step**: Begin Phase 1 - Create IVideoBackend + RGBASurfaceBackend
**Total Estimated Time**: 6 hours (4 phases)
---
**Document Revision History**:
- **v1**: Initial format-based naming (CPUVideoBackend - rejected)
- **v2**: Reuse D3D12VideoRenderer as backend (approved structure)
- **v3**: Final naming with Surface/Upload/Direct methods (current) ✅

View File

@@ -2,7 +2,8 @@
#include "MainWindow.xaml.h"
#include "MainVideoPage.xaml.h"
#include "MultiVideoPage.xaml.h"
#include "LayeredVideoPage.xaml.h"
// Temporarily excluded - old VideoPlayerControl
// #include "LayeredVideoPage.xaml.h"
#include "SettingsPage.xaml.h"
#include "src/Logger/LogManager.h"
#include <microsoft.ui.xaml.window.h>
@@ -88,10 +89,15 @@ namespace winrt::Vav2Player::implementation
void MainWindow::SwitchToLayeredVideoView_Click(winrt::Windows::Foundation::IInspectable const&, winrt::Microsoft::UI::Xaml::RoutedEventArgs const&)
{
// Temporarily disabled - old VideoPlayerControl
// TODO: Update LayeredVideoPage to use VideoPlayerControl2
return;
/*
TypeName pageTypeName;
pageTypeName.Name = winrt::name_of<Vav2Player::LayeredVideoPage>();
pageTypeName.Kind = TypeKind::Metadata;
ContentFrame().Navigate(pageTypeName);
*/
}
void MainWindow::Exit_Click(winrt::Windows::Foundation::IInspectable const&, winrt::Microsoft::UI::Xaml::RoutedEventArgs const&)

View File

@@ -205,7 +205,7 @@ namespace winrt::Vav2Player::implementation
{
for (int c = 0; c < cols; ++c)
{
auto videoPlayer = winrt::make<winrt::Vav2Player::implementation::VideoPlayerControl>();
auto videoPlayer = winrt::make<winrt::Vav2Player::implementation::VideoPlayerControl2>();
// Set grid position
videoPlayer.SetValue(Grid::RowProperty(), winrt::box_value(r));

View File

@@ -1,7 +1,7 @@
#pragma once
#include "MultiVideoPage.g.h"
#include "VideoPlayerControl.xaml.h"
#include "VideoPlayerControl2.xaml.h"
#include <vector>
#include <memory>
@@ -21,7 +21,7 @@ namespace winrt::Vav2Player::implementation
void UseHardwareRenderingCheckBox_Unchecked(winrt::Windows::Foundation::IInspectable const& sender, winrt::Microsoft::UI::Xaml::RoutedEventArgs const& e);
private:
std::vector<winrt::Vav2Player::VideoPlayerControl> m_videoPlayers;
std::vector<winrt::Vav2Player::VideoPlayerControl2> m_videoPlayers;
winrt::hstring m_currentVideoPath;
enum class LayoutType

View File

@@ -141,18 +141,24 @@
</ClInclude>
<ClInclude Include="src\Rendering\d3dx12.h" />
<ClInclude Include="src\Rendering\IVideoRenderer.h" />
<!-- Temporarily excluded - old VideoPlayerControl, replaced by VideoPlayerControl2 -->
<!--
<ClInclude Include="VideoPlayerControl.xaml.h">
<DependentUpon>VideoPlayerControl.xaml</DependentUpon>
</ClInclude>
-->
<ClInclude Include="MainVideoPage.xaml.h">
<DependentUpon>MainVideoPage.xaml</DependentUpon>
</ClInclude>
<ClInclude Include="MultiVideoPage.xaml.h">
<DependentUpon>MultiVideoPage.xaml</DependentUpon>
</ClInclude>
<!-- Temporarily excluded - depends on old VideoPlayerControl -->
<!--
<ClInclude Include="LayeredVideoPage.xaml.h">
<DependentUpon>LayeredVideoPage.xaml</DependentUpon>
</ClInclude>
-->
<ClInclude Include="LogMessagePage.xaml.h">
<DependentUpon>LogMessagePage.xaml</DependentUpon>
</ClInclude>
@@ -175,8 +181,11 @@
<!-- <ClInclude Include="src\Decoder\NVDECAV1Decoder.h" /> -->
<!-- <ClInclude Include="src\Decoder\AdaptiveNVDECDecoder.h" /> -->
<!-- <ClInclude Include="src\FileIO\WebMFileReader.h" /> -->
<!-- New Layered Backend Architecture -->
<ClInclude Include="src\Rendering\IVideoBackend.h" />
<ClInclude Include="src\Rendering\D3D12VideoRenderer.h" />
<ClInclude Include="src\Rendering\SimpleGPURenderer.h" />
<ClInclude Include="src\Rendering\RGBASurfaceBackend.h" />
<ClInclude Include="src\Rendering\YUV420PUploadBackend.h" />
<ClInclude Include="src\Rendering\GlobalD3D12SyncManager.h" />
</ItemGroup>
<!-- VideoPlayerControl2 Headers -->
@@ -190,10 +199,12 @@
<ItemGroup>
<ApplicationDefinition Include="App.xaml" />
<Page Include="MainWindow.xaml" />
<Page Include="VideoPlayerControl.xaml" />
<!-- Temporarily excluded - old VideoPlayerControl, replaced by VideoPlayerControl2 -->
<!-- <Page Include="VideoPlayerControl.xaml" /> -->
<Page Include="MainVideoPage.xaml" />
<Page Include="MultiVideoPage.xaml" />
<Page Include="LayeredVideoPage.xaml" />
<!-- Temporarily excluded - depends on old VideoPlayerControl -->
<!-- <Page Include="LayeredVideoPage.xaml" /> -->
<Page Include="LogMessagePage.xaml" />
<Page Include="SettingsPage.xaml" />
</ItemGroup>
@@ -211,21 +222,29 @@
<ClCompile Include="MainWindow.xaml.cpp">
<DependentUpon>MainWindow.xaml</DependentUpon>
</ClCompile>
<!-- New Layered Backend Architecture -->
<ClCompile Include="src\Rendering\D3D12VideoRenderer.cpp" />
<ClCompile Include="src\Rendering\RGBASurfaceBackend.cpp" />
<ClCompile Include="src\Rendering\YUV420PUploadBackend.cpp" />
<ClCompile Include="src\Rendering\GlobalD3D12SyncManager.cpp" />
<ClCompile Include="src\Rendering\SimpleGPURenderer.cpp" />
<!-- Temporarily excluded - old VideoPlayerControl, replaced by VideoPlayerControl2 -->
<!--
<ClCompile Include="VideoPlayerControl.xaml.cpp">
<DependentUpon>VideoPlayerControl.xaml</DependentUpon>
</ClCompile>
-->
<ClCompile Include="MainVideoPage.xaml.cpp">
<DependentUpon>MainVideoPage.xaml</DependentUpon>
</ClCompile>
<ClCompile Include="MultiVideoPage.xaml.cpp">
<DependentUpon>MultiVideoPage.xaml</DependentUpon>
</ClCompile>
<!-- Temporarily excluded - depends on old VideoPlayerControl -->
<!--
<ClCompile Include="LayeredVideoPage.xaml.cpp">
<DependentUpon>LayeredVideoPage.xaml</DependentUpon>
</ClCompile>
-->
<ClCompile Include="LogMessagePage.xaml.cpp">
<DependentUpon>LogMessagePage.xaml</DependentUpon>
</ClCompile>
@@ -261,10 +280,13 @@
<SubType>Code</SubType>
<DependentUpon>MainWindow.xaml</DependentUpon>
</Midl>
<!-- Temporarily excluded - old VideoPlayerControl, replaced by VideoPlayerControl2 -->
<!--
<Midl Include="VideoPlayerControl.idl">
<SubType>Code</SubType>
<DependentUpon>VideoPlayerControl.xaml</DependentUpon>
</Midl>
-->
<Midl Include="MainVideoPage.idl">
<SubType>Code</SubType>
<DependentUpon>MainVideoPage.xaml</DependentUpon>
@@ -273,10 +295,13 @@
<SubType>Code</SubType>
<DependentUpon>MultiVideoPage.xaml</DependentUpon>
</Midl>
<!-- Temporarily excluded - depends on old VideoPlayerControl -->
<!--
<Midl Include="LayeredVideoPage.idl">
<SubType>Code</SubType>
<DependentUpon>LayeredVideoPage.xaml</DependentUpon>
</Midl>
-->
<Midl Include="LogMessagePage.idl">
<SubType>Code</SubType>
<DependentUpon>LogMessagePage.xaml</DependentUpon>

View File

@@ -1,81 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<UserControl
x:Class="Vav2Player.VideoPlayerControl"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="using:Vav2Player"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d"
Loaded="UserControl_Loaded"
Unloaded="UserControl_Unloaded"
SizeChanged="UserControl_SizeChanged">
<Grid x:Name="RootGrid" Background="Black">
<!-- Main video rendering area -->
<Border x:Name="VideoContainer"
Background="Black"
BorderBrush="Gray"
BorderThickness="1">
<Grid x:Name="VideoDisplayArea" Background="Black">
<!-- Hardware D3D12 rendering (Phase 1) -->
<SwapChainPanel x:Name="VideoSwapChainPanel"
Visibility="Collapsed"
HorizontalAlignment="Center"
VerticalAlignment="Center"/>
<!-- Software CPU rendering (fallback) -->
<Image x:Name="VideoImage"
Stretch="Fill"
HorizontalAlignment="Center"
VerticalAlignment="Center"/>
<!-- Placeholder text when no video -->
<TextBlock x:Name="PlaceholderText"
Text="Video Player"
Foreground="LightGray"
HorizontalAlignment="Left"
VerticalAlignment="Top"
FontSize="12"
Opacity="0.8"
Margin="8,8,0,0"/>
</Grid>
</Border>
<!-- UI overlay -->
<Grid x:Name="OverlayGrid" Background="Transparent">
<!-- Loading indicator -->
<ProgressRing x:Name="LoadingRing"
Width="40" Height="40"
HorizontalAlignment="Center"
VerticalAlignment="Center"
IsActive="False"
Visibility="Collapsed"/>
<!-- Status text overlay -->
<Border x:Name="StatusOverlay"
Background="#80000000"
CornerRadius="4"
Padding="8,4"
HorizontalAlignment="Center"
VerticalAlignment="Top"
Margin="0,10,0,0"
Visibility="Collapsed">
<TextBlock x:Name="StatusText"
Text="Status"
Foreground="White"
FontSize="12"
TextAlignment="Center"/>
</Border>
<!-- Video control bar (disabled for now) -->
<!-- Show controls on mouse hover -->
<Border x:Name="HoverDetector"
Background="Transparent"
PointerEntered="HoverDetector_PointerEntered"
PointerExited="HoverDetector_PointerExited"/>
</Grid>
</Grid>
</UserControl>

View File

@@ -1,219 +0,0 @@
#pragma once
#include "VideoPlayerControl.g.h"
#include "VavCore/VavCore.h" // VavCore C API
// GPU rendering temporarily disabled for VavCore migration
#include "src/Rendering/IVideoRenderer.h"
#include "src/Rendering/SimpleGPURenderer.h"
using namespace Vav2Player;
#include <memory>
#include <string>
#include <atomic>
#include <queue>
#include <mutex>
#include <chrono>
namespace winrt::Vav2Player::implementation
{
struct VideoPlayerControl : VideoPlayerControlT<VideoPlayerControl>
{
VideoPlayerControl();
~VideoPlayerControl();
// Events
void UserControl_Loaded(winrt::Windows::Foundation::IInspectable const& sender, winrt::Microsoft::UI::Xaml::RoutedEventArgs const& e);
void UserControl_Unloaded(winrt::Windows::Foundation::IInspectable const& sender, winrt::Microsoft::UI::Xaml::RoutedEventArgs const& e);
void UserControl_SizeChanged(winrt::Windows::Foundation::IInspectable const& sender, winrt::Microsoft::UI::Xaml::SizeChangedEventArgs const& e);
void HoverDetector_PointerEntered(winrt::Windows::Foundation::IInspectable const& sender, winrt::Microsoft::UI::Xaml::Input::PointerRoutedEventArgs const& e);
void HoverDetector_PointerExited(winrt::Windows::Foundation::IInspectable const& sender, winrt::Microsoft::UI::Xaml::Input::PointerRoutedEventArgs const& e);
// Public Properties
winrt::hstring VideoSource();
void VideoSource(winrt::hstring const& value);
bool ShowControls();
void ShowControls(bool value);
bool AutoPlay();
void AutoPlay(bool value);
Vav2Player::VideoDecoderType DecoderType();
void DecoderType(Vav2Player::VideoDecoderType value);
bool UseHardwareRendering();
void UseHardwareRendering(bool value);
VavCoreDecoderType GetInternalDecoderType();
void SetInternalDecoderType(VavCoreDecoderType value);
// Public Methods
void LoadVideo(winrt::hstring const& filePath);
void Play();
void Pause();
void Stop();
void Seek(double timeSeconds);
void RefreshDecoderSettings();
// Status Properties
bool IsVideoPlaying();
bool IsVideoLoaded();
double CurrentTime();
double Duration();
winrt::hstring Status();
private:
// VavCore components (replaces direct decoder usage)
VavCorePlayer* m_vavCorePlayer;
// Video rendering components (optimized)
winrt::Microsoft::UI::Xaml::Media::Imaging::WriteableBitmap m_renderBitmap{ nullptr };
std::vector<uint8_t> m_bgraBuffer; // Reusable buffer for BGRA conversion
uint32_t m_lastFrameWidth = 0;
uint32_t m_lastFrameHeight = 0;
// GPU rendering re-enabled for VavCore
std::unique_ptr<IVideoRenderer> m_gpuRenderer;
bool m_useHardwareRendering = true; // GPU rendering enabled
// Playback timer for continuous frame processing
winrt::Microsoft::UI::Xaml::DispatcherTimer m_playbackTimer;
// High-resolution timer for accurate frame timing
std::unique_ptr<std::thread> m_timingThread;
std::atomic<bool> m_shouldStopTiming{false};
// Video dimensions
uint32_t m_videoWidth = 0;
uint32_t m_videoHeight = 0;
// AspectFit state
bool m_hasValidVideoSize = false;
// Configuration
winrt::hstring m_videoSource;
bool m_showControls = true;
bool m_autoPlay = false;
VavCoreDecoderType m_decoderType = VAVCORE_DECODER_AUTO;
// Playback state
std::atomic<bool> m_isPlaying{ false };
std::atomic<bool> m_isLoaded{ false };
std::atomic<bool> m_isInitialized{ false };
std::atomic<bool> m_frameProcessing{ false }; // Prevents dispatcher queue overflow
uint64_t m_currentFrame = 0;
uint64_t m_totalFrames = 0;
double m_frameRate = 30.0;
double m_currentTime = 0.0;
double m_duration = 0.0;
winrt::hstring m_status = L"Ready";
// Basic timing and error tracking
std::chrono::high_resolution_clock::time_point m_playbackStartTime;
uint64_t m_framesDecodeErrors = 0;
// D3D Surface support
bool m_useD3DSurfaces = false;
VavCoreSurfaceType m_supportedSurfaceType = VAVCORE_SURFACE_CPU;
void* m_d3dDevice = nullptr; // ID3D11Device* or ID3D12Device*
// Phase 2 Optimization: Memory Pool System
class MemoryPool
{
private:
std::queue<winrt::Microsoft::UI::Xaml::Media::Imaging::WriteableBitmap> _bitmapPool;
std::queue<std::vector<uint8_t>> _bufferPool;
std::mutex _poolMutex;
const size_t MAX_POOL_SIZE = 10;
// Statistics tracking
int _bitmapPoolHits = 0;
int _bitmapPoolMisses = 0;
int _bufferPoolHits = 0;
int _bufferPoolMisses = 0;
public:
winrt::Microsoft::UI::Xaml::Media::Imaging::WriteableBitmap GetBitmap(uint32_t width, uint32_t height);
void ReturnBitmap(winrt::Microsoft::UI::Xaml::Media::Imaging::WriteableBitmap bitmap);
std::vector<uint8_t> GetBuffer(size_t size);
void ReturnBuffer(std::vector<uint8_t> buffer);
void PrintStats();
};
// Phase 2 Optimization: Advanced Performance Monitor
class AdvancedPerformanceMonitor
{
private:
std::queue<double> _decodingTimes;
std::queue<double> _renderingTimes;
std::queue<double> _totalFrameTimes;
std::chrono::high_resolution_clock::time_point _frameStartTime;
std::chrono::high_resolution_clock::time_point _decodeStartTime;
std::chrono::high_resolution_clock::time_point _renderStartTime;
// Adaptive quality control
int _consecutiveSlowFrames = 0;
int _consecutiveFastFrames = 0;
bool _qualityReductionActive = false;
const int SLOW_FRAME_THRESHOLD = 3;
const int FAST_FRAME_THRESHOLD = 8;
const size_t SAMPLE_SIZE = 30; // 30 frame rolling average
public:
void RecordFrameStart();
void RecordDecodeStart();
void RecordDecodeEnd();
void RecordRenderStart();
void RecordRenderEnd();
void RecordFrameEnd();
struct PerformanceStats {
double AverageDecodeTime;
double AverageRenderTime;
double AverageTotalTime;
double CurrentFPS;
bool QualityReductionActive;
};
PerformanceStats GetStats();
bool ShouldReduceQuality();
bool ShouldRestoreQuality();
void CheckForQualityAdjustment(double frameTime);
};
// Phase 2 Optimization instances
std::unique_ptr<MemoryPool> m_memoryPool;
std::unique_ptr<AdvancedPerformanceMonitor> m_performanceMonitor;
// Helper methods
void InitializeVideoRenderer();
// GPU rendering methods re-enabled for VavCore
bool TryInitializeGPURenderer();
void SetRenderingMode(bool useGPU);
void ProcessSingleFrame();
void RenderFrameToScreen(const VavCoreVideoFrame& frame);
void RenderFrameSoftware(const VavCoreVideoFrame& frame);
void ProcessSingleFrameLegacy();
void ConvertYUVToBGRA(const VavCoreVideoFrame& yuv_frame, uint8_t* bgra_buffer, uint32_t width, uint32_t height);
void UpdateVideoImageAspectFit(int videoWidth, int videoHeight);
void ApplyAspectFitIfReady();
void UpdateStatus(winrt::hstring const& message);
void ResetVideoState();
void LoadDecoderSettings();
// D3D Surface methods
bool InitializeD3DSurfaceSupport();
bool CreateD3D11Device();
void ReleaseD3D11Device();
void ProcessSingleFrameWithSurfaces();
bool CreateD3DTexture(uint32_t width, uint32_t height, void** texture);
void RenderD3DSurfaceToScreen(void* d3dTexture, const VavCoreVideoFrame& frame);
};
}
namespace winrt::Vav2Player::factory_implementation
{
struct VideoPlayerControl : VideoPlayerControlT<VideoPlayerControl, implementation::VideoPlayerControl>
{
};
}

View File

@@ -319,30 +319,6 @@ namespace winrt::Vav2Player::implementation
LogMgr::GetInstance().LogInfo(L"VideoPlayerControl2",
L"Video loaded: " + std::to_wstring(videoWidth) + L"x" + std::to_wstring(videoHeight));
// 4. Create the NV12 texture for zero-copy, now that we have the dimensions.
if (m_gpuRenderer) {
HRESULT hr = m_gpuRenderer->CreateNV12TextureR8Layout(videoWidth, videoHeight);
if (SUCCEEDED(hr)) {
LogMgr::GetInstance().LogInfo(L"VideoPlayerControl2", L"NV12 texture created");
} else {
LogMgr::GetInstance().LogError(L"VideoPlayerControl2", L"Failed to create NV12 texture");
}
}
// 5. Get D3D12 fence from VavCore and pass it to SimpleGPURenderer for GPU synchronization
VavCorePlayer* player = m_playbackController->GetVavCorePlayer();
if (player && m_gpuRenderer) {
void* syncFence = vavcore_get_sync_fence(player);
if (syncFence) {
m_gpuRenderer->SetSyncFence(syncFence);
LogMgr::GetInstance().LogInfo(L"VideoPlayerControl2", L"D3D12 fence set for GPU synchronization");
} else {
LogMgr::GetInstance().LogWarning(L"VideoPlayerControl2", L"No sync fence available from VavCore");
}
}
// --- End of Corrected Initialization Order ---
// Update AspectFit
UpdateVideoImageAspectFit(videoWidth, videoHeight);
@@ -354,12 +330,6 @@ namespace winrt::Vav2Player::implementation
}
} else {
LogMgr::GetInstance().LogError(L"VideoPlayerControl2", L"Failed to load video");
// Cleanup partial initialization on failure
if (m_gpuRenderer) {
m_gpuRenderer->ReleaseNV12Texture();
}
UpdateStatus(L"Load failed");
}
}
@@ -453,7 +423,7 @@ namespace winrt::Vav2Player::implementation
LogMgr::GetInstance().LogInfo(L"VideoPlayerControl2", L"Initializing renderer");
m_gpuRenderer = std::make_unique<::Vav2Player::SimpleGPURenderer>();
m_gpuRenderer = std::make_unique<::Vav2Player::D3D12VideoRenderer>();
// Get SwapChainPanel size
auto panelSize = VideoSwapChainPanel().ActualSize();

View File

@@ -4,7 +4,7 @@
#include "VavCore/VavCore.h"
#include "src/Playback/PlaybackController.h"
#include "src/Playback/FrameProcessor.h"
#include "src/Rendering/SimpleGPURenderer.h"
#include "src/Rendering/D3D12VideoRenderer.h"
#include <memory>
#include <string>
@@ -59,7 +59,7 @@ namespace winrt::Vav2Player::implementation
// Core components (composition)
std::unique_ptr<::Vav2Player::PlaybackController> m_playbackController;
std::unique_ptr<::Vav2Player::FrameProcessor> m_frameProcessor;
std::unique_ptr<::Vav2Player::SimpleGPURenderer> m_gpuRenderer;
std::unique_ptr<::Vav2Player::D3D12VideoRenderer> m_gpuRenderer;
// UI state
winrt::hstring m_videoSource;

View File

@@ -49,6 +49,9 @@
#include <directxmath.h>
#include <wrl/client.h>
// D3D12 helper header (Microsoft sample code)
#include "src/Rendering/d3dx12.h"
// D3D12 helper functions - temporarily disabled for VavCore migration
// #include "src/Common/D3D12Helpers.h"

View File

@@ -18,7 +18,7 @@ FrameProcessor::~FrameProcessor()
}
}
void FrameProcessor::SetRenderer(SimpleGPURenderer* renderer)
void FrameProcessor::SetRenderer(D3D12VideoRenderer* renderer)
{
m_renderer = renderer;
}
@@ -48,16 +48,16 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
LOGF_INFO("[FrameProcessor] ProcessFrame START (decoded: %llu, dropped: %llu)",
m_framesDecoded.load(), m_framesDropped.load());
// Get NV12 texture from renderer
LOGF_INFO("[FrameProcessor] Getting NV12 texture...");
ID3D12Resource* nv12Texture = m_renderer->GetNV12TextureForCUDAInterop();
if (!nv12Texture) {
LOGF_ERROR("[FrameProcessor] Failed to get NV12 texture - clearing flag");
// Get RGBA texture from renderer
LOGF_INFO("[FrameProcessor] Getting RGBA texture...");
ID3D12Resource* rgbaTexture = m_renderer->GetRGBATextureForCUDAInterop();
if (!rgbaTexture) {
LOGF_ERROR("[FrameProcessor] Failed to get RGBA texture - clearing flag");
m_frameProcessing.store(false);
if (onComplete) onComplete(false);
return false;
}
LOGF_INFO("[FrameProcessor] NV12 texture acquired");
LOGF_INFO("[FrameProcessor] RGBA texture acquired");
// Decode frame to D3D12 surface (blocking)
LOGF_INFO("[FrameProcessor] Starting vavcore_decode_to_surface (BLOCKING)...");
@@ -65,7 +65,7 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
VavCoreResult result = vavcore_decode_to_surface(
player,
VAVCORE_SURFACE_D3D12_RESOURCE,
nv12Texture,
rgbaTexture,
&vavFrame
);
LOGF_INFO("[FrameProcessor] vavcore_decode_to_surface COMPLETED");
@@ -85,13 +85,12 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
m_framesDecoded++;
LOGF_INFO("[FrameProcessor] Decode SUCCESS - frame decoded");
// Enqueue render on UI thread with fence value for GPU sync
uint64_t fenceValue = vavFrame.sync_fence_value;
LOGF_INFO("[FrameProcessor] Attempting to enqueue render (fenceValue=%llu)...", fenceValue);
// Enqueue render on UI thread
LOGF_INFO("[FrameProcessor] Attempting to enqueue render...");
bool enqueued = m_dispatcherQueue.TryEnqueue([this, fenceValue, onComplete]() {
bool enqueued = m_dispatcherQueue.TryEnqueue([this, vavFrame, onComplete]() {
LOGF_INFO("[FrameProcessor] *** UI THREAD CALLBACK STARTED ***");
HRESULT hr = m_renderer->RenderNV12TextureToBackBuffer(fenceValue);
HRESULT hr = m_renderer->RenderVideoFrame(vavFrame);
bool renderSuccess = SUCCEEDED(hr);
if (!renderSuccess) {
@@ -101,6 +100,15 @@ bool FrameProcessor::ProcessFrame(VavCorePlayer* player,
LOGF_INFO("[FrameProcessor] Render succeeded");
}
// Present to screen
if (renderSuccess) {
hr = m_renderer->Present();
if (FAILED(hr)) {
LOGF_ERROR("[FrameProcessor] Present error: HRESULT = 0x%08X", hr);
renderSuccess = false;
}
}
// Mark frame processing complete
LOGF_INFO("[FrameProcessor] CLEARING m_frameProcessing flag");
m_frameProcessing.store(false);

View File

@@ -1,7 +1,7 @@
#pragma once
#include "VavCore/VavCore.h"
#include "src/Rendering/SimpleGPURenderer.h"
#include "src/Rendering/D3D12VideoRenderer.h"
#include <atomic>
#include <functional>
#include <winrt/Microsoft.UI.Dispatching.h>
@@ -21,7 +21,7 @@ public:
~FrameProcessor();
// Set renderer for frame output
void SetRenderer(SimpleGPURenderer* renderer);
void SetRenderer(D3D12VideoRenderer* renderer);
// Set dispatcher queue for UI thread callbacks
void SetDispatcherQueue(winrt::Microsoft::UI::Dispatching::DispatcherQueue const& queue);
@@ -42,7 +42,7 @@ public:
uint64_t GetRenderErrors() const { return m_renderErrors; }
private:
SimpleGPURenderer* m_renderer = nullptr; // Non-owning pointer
D3D12VideoRenderer* m_renderer = nullptr; // Non-owning pointer
winrt::Microsoft::UI::Dispatching::DispatcherQueue m_dispatcherQueue{ nullptr };
// Processing state (prevents NVDEC surface queue overflow)

View File

@@ -1,182 +1,105 @@
#pragma once
#include "IVideoRenderer.h"
#include "IVideoBackend.h"
#include "RGBASurfaceBackend.h"
#include "YUV420PUploadBackend.h"
#include <d3d12.h>
#include <dxgi1_6.h>
#include <memory>
#include <winrt/Microsoft.UI.Xaml.Controls.h>
#include <DirectXMath.h>
#include <wrl/client.h>
#include "VavCore/VavCore.h" // For VavCoreVideoFrame
// Note: YUVRenderer.h removed during Phase 1 simplification
using Microsoft::WRL::ComPtr;
#include <winrt/Microsoft.UI.Xaml.Controls.h>
#include <memory>
namespace Vav2Player {
// Note: DirectTextureAllocator removed during Phase 1 simplification
using Microsoft::WRL::ComPtr;
class D3D12VideoRenderer
{
/**
* D3D12VideoRenderer - Thin orchestrator for video rendering backends
*
* Architecture:
* - Manages D3D12 device, command queue, swap chain (core infrastructure)
* - Owns and manages multiple backends (RGBASurfaceBackend, YUV420PUploadBackend)
* - Delegates rendering to the appropriate backend based on frame color space
* - Handles command list recording and presentation
*
* Design Goal: ~300 lines, simple delegation logic, no format-specific code
*/
class D3D12VideoRenderer : public IVideoRenderer {
public:
D3D12VideoRenderer();
~D3D12VideoRenderer();
~D3D12VideoRenderer() override;
// Initialization and cleanup
HRESULT Initialize(winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel,
uint32_t width, uint32_t height);
void Shutdown();
// IVideoRenderer interface
HRESULT Initialize(uint32_t width, uint32_t height) override;
void Shutdown() override;
bool IsInitialized() const override { return m_initialized; }
// Rendering
HRESULT RenderFrame(const VavCoreVideoFrame& frame);
HRESULT RenderSolidColor(float r, float g, float b, float a = 1.0f);
HRESULT RenderYUVFrame();
HRESULT RenderVideoFrame(const VavCoreVideoFrame& frame) override;
bool TryRenderFrame(const VavCoreVideoFrame& frame) override;
HRESULT Present() override;
// Frame presentation (essential for displaying frames)
HRESULT PresentFrame(bool enableVSync = true);
HRESULT Resize(uint32_t width, uint32_t height) override;
uint32_t GetWidth() const override { return m_width; }
uint32_t GetHeight() const override { return m_height; }
// Zero-copy direct rendering
HRESULT RenderFrameZeroCopy(uint32_t videoWidth, uint32_t videoHeight);
// WinUI integration
HRESULT InitializeWithSwapChain(
winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel,
uint32_t width,
uint32_t height);
// Ring Buffer system for zero-copy decoding
HRESULT CreateRingBuffers(uint32_t yWidth, uint32_t yHeight, uint32_t uvWidth, uint32_t uvHeight);
uint32_t AcquireNextBuffer(); // Get next available buffer index
void ReleaseBuffer(uint32_t bufferIndex); // Mark buffer as available
void SetSwapChainPanel(winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel);
// D3D12 device access for VavCore integration
ID3D12Device* GetD3D12Device() const { return m_device.Get(); }
ID3D12CommandQueue* GetCommandQueue() const { return m_commandQueue.Get(); }
// Backend-specific texture access for CUDA interop
ID3D12Resource* GetRGBATextureForCUDAInterop() const;
ID3D12Resource* GetNV12TextureForCUDAInterop() const { return nullptr; } // Future: NV12DirectBackend
// Legacy YUV420P upload buffer access (for backward compatibility)
uint8_t* GetYMappedBuffer(uint32_t bufferIndex) const;
uint8_t* GetUMappedBuffer(uint32_t bufferIndex) const;
uint8_t* GetVMappedBuffer(uint32_t bufferIndex) const;
HRESULT RenderFrameFromBuffer(uint32_t bufferIndex, uint32_t videoWidth, uint32_t videoHeight);
// GPU Compute Copy methods for zero-copy optimization
HRESULT CopyYUVPlanesGPU(uint32_t bufferIndex, uint32_t videoWidth, uint32_t videoHeight);
HRESULT ExecuteComputeCopy(ID3D12GraphicsCommandList* commandList, ID3D12Resource* srcBuffer, ID3D12Resource* dstBuffer,
uint32_t width, uint32_t height, uint32_t srcPitch, uint32_t dstPitch);
// Note: Direct texture mapping methods removed during Phase 1 simplification
// Legacy single buffer access (for backward compatibility)
uint8_t* GetYMappedBuffer() const;
uint8_t* GetUMappedBuffer() const;
uint8_t* GetVMappedBuffer() const;
uint32_t GetYRowPitch() const { return m_yRowPitch; }
uint32_t GetURowPitch() const { return m_uRowPitch; }
uint32_t GetVRowPitch() const { return m_vRowPitch; }
// Status check
bool IsInitialized() const { return m_isInitialized; }
uint32_t GetWidth() const { return m_width; }
uint32_t GetHeight() const { return m_height; }
// D3D12 object access for integration with pipeline components
ID3D12Device* GetDevice() const { return m_device.Get(); }
ID3D12CommandQueue* GetCommandQueue() const { return m_commandQueue.Get(); }
// Resize
HRESULT Resize(uint32_t width, uint32_t height);
private:
// D3D12 Core Objects
// D3D12 core objects (owned by orchestrator)
ComPtr<ID3D12Device> m_device;
ComPtr<ID3D12CommandQueue> m_commandQueue;
ComPtr<IDXGISwapChain3> m_swapChain;
ComPtr<ID3D12DescriptorHeap> m_rtvHeap;
// Render Targets
// Render targets
static const UINT FrameCount = 2;
ComPtr<ID3D12Resource> m_renderTargets[FrameCount];
UINT m_frameIndex = 0;
// Command Objects - Per-frame allocators for proper synchronization
// Command objects
ComPtr<ID3D12CommandAllocator> m_commandAllocators[FrameCount];
ComPtr<ID3D12GraphicsCommandList> m_commandList;
ComPtr<ID3D12Resource> m_renderTargets[FrameCount];
UINT m_frameIndex;
// Synchronization
ComPtr<ID3D12Fence> m_fence;
UINT64 m_fenceValues[FrameCount];
HANDLE m_fenceEvent;
UINT64 m_fenceValue = 0;
UINT64 m_frameCompletionValues[FrameCount] = {};
HANDLE m_fenceEvent = nullptr;
// YUV Texture Resources
ComPtr<ID3D12Resource> m_yTexture;
ComPtr<ID3D12Resource> m_uTexture;
ComPtr<ID3D12Resource> m_vTexture;
ComPtr<ID3D12DescriptorHeap> m_srvHeap;
// Dynamic Ring Buffer System for zero-copy optimization
static const UINT MIN_RING_BUFFER_COUNT = 3; // Minimum buffers for stability
static const UINT MAX_RING_BUFFER_COUNT = 8; // Maximum buffers for memory limit
UINT m_dynamicRingBufferCount = MIN_RING_BUFFER_COUNT; // Current buffer count
struct RingBufferFrame {
ComPtr<ID3D12Resource> yUploadBuffer;
ComPtr<ID3D12Resource> uUploadBuffer;
ComPtr<ID3D12Resource> vUploadBuffer;
uint8_t* yMappedData;
uint8_t* uMappedData;
uint8_t* vMappedData;
// GPU Compute resources for each buffer
ComPtr<ID3D12Resource> yStructuredBuffer; // For compute shader input
ComPtr<ID3D12Resource> uStructuredBuffer;
ComPtr<ID3D12Resource> vStructuredBuffer;
ComPtr<ID3D12Resource> yOutputBuffer; // For compute shader output
ComPtr<ID3D12Resource> uOutputBuffer;
ComPtr<ID3D12Resource> vOutputBuffer;
// Per-buffer command allocator and command list for independent GPU operations
ComPtr<ID3D12CommandAllocator> commandAllocator;
ComPtr<ID3D12GraphicsCommandList> commandList;
ComPtr<ID3D12Fence> fence;
UINT64 fenceValue;
bool isInUse;
};
std::vector<RingBufferFrame> m_ringBuffers; // Dynamic array for variable buffer count
UINT m_currentBufferIndex;
UINT64 m_currentFenceValue;
// Shared row pitch values
uint32_t m_yRowPitch;
uint32_t m_uRowPitch;
uint32_t m_vRowPitch;
// Shader Resources
ComPtr<ID3D12RootSignature> m_rootSignature;
ComPtr<ID3D12PipelineState> m_pipelineState;
ComPtr<ID3D12Resource> m_vertexBuffer;
D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView;
ComPtr<ID3DBlob> m_vertexShader;
ComPtr<ID3DBlob> m_pixelShader;
// YUV-to-RGB Conversion Resources
ComPtr<ID3D12Resource> m_colorConversionConstantBuffer;
struct ColorConversionConstants {
DirectX::XMMATRIX colorMatrix; // BT.709 or BT.2020 color conversion matrix
DirectX::XMFLOAT4 yuvOffsets; // Y, U, V offsets for different formats
DirectX::XMFLOAT4 yuvRanges; // Y, U, V ranges for normalization
DirectX::XMINT2 videoSize; // Video width and height
DirectX::XMINT2 padding; // Padding for 16-byte alignment
};
ColorConversionConstants m_colorConversionData;
// Compute Shader Resources for GPU Copy
ComPtr<ID3D12RootSignature> m_computeRootSignature;
ComPtr<ID3D12PipelineState> m_computePipelineState;
ComPtr<ID3DBlob> m_computeShader;
ComPtr<ID3D12Resource> m_computeConstantBuffer;
ComPtr<ID3D12DescriptorHeap> m_computeDescriptorHeap;
UINT m_computeDescriptorSize;
// Note: DirectTextureAllocator removed during Phase 1 simplification
// Video rendering backends
std::unique_ptr<RGBASurfaceBackend> m_rgbaSurfaceBackend;
std::unique_ptr<YUV420PUploadBackend> m_yuv420pUploadBackend;
// std::unique_ptr<NV12DirectBackend> m_nv12DirectBackend; // Future
// State
bool m_isInitialized;
uint32_t m_width;
uint32_t m_height;
uint32_t m_videoWidth;
uint32_t m_videoHeight;
UINT m_rtvDescriptorSize;
UINT m_srvDescriptorSize;
bool m_initialized = false;
uint32_t m_width = 0;
uint32_t m_height = 0;
uint32_t m_videoWidth = 0;
uint32_t m_videoHeight = 0;
// Note: YUVRenderer removed during Phase 1 simplification
// WinUI integration
winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel m_swapChainPanel{ nullptr };
// Helper methods
HRESULT CreateDevice();
@@ -184,58 +107,15 @@ private:
HRESULT CreateSwapChain(winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel);
HRESULT CreateDescriptorHeaps();
HRESULT CreateRenderTargets();
HRESULT CreateFenceAndEvent();
// Note: InitializeYUVRenderer removed during Phase 1 simplification
HRESULT WaitForPreviousFrame();
HRESULT PopulateCommandList();
HRESULT CreateCommandObjects();
HRESULT CreateSynchronizationObjects();
// YUV texture methods
HRESULT CreateYUVTextures(uint32_t videoWidth, uint32_t videoHeight);
HRESULT CreateSRVDescriptorHeap();
HRESULT CreateYUVShaderResourceViews();
HRESULT CreateShaderResources();
HRESULT CreateVertexBuffer();
HRESULT UpdateYUVTextures(const VavCoreVideoFrame& frame);
HRESULT UploadTextureData(const void* srcData, uint32_t srcRowPitch,
uint32_t width, uint32_t height,
ID3D12Resource* uploadBuffer,
ID3D12Resource* destTexture,
uint32_t subresourceIndex);
HRESULT CreateRootSignature();
HRESULT CompileShaders();
HRESULT CreatePipelineState();
HRESULT InitializeBackends();
IVideoBackend* SelectBackend(const VavCoreVideoFrame& frame);
HRESULT EnsureVideoTexture(const VavCoreVideoFrame& frame);
// YUV-to-RGB conversion methods
HRESULT CreateColorConversionConstantBuffer();
void UpdateColorConversionConstants(uint32_t videoWidth, uint32_t videoHeight,
const VavCoreVideoFrame& frame);
void SetupBT709ColorMatrix();
void SetupBT2020ColorMatrix();
// Compute Shader management
HRESULT CreateComputeShaderResources();
HRESULT CreateComputeRootSignature();
HRESULT CompileComputeShader();
HRESULT CreateComputePipelineState();
HRESULT CreateComputeDescriptorHeap();
HRESULT CreateStructuredBuffers(RingBufferFrame& frame, uint32_t yWidth, uint32_t yHeight, uint32_t uvWidth, uint32_t uvHeight);
// Dynamic Ring Buffer management
void DestroyRingBuffers();
HRESULT CreateSingleRingBuffer(RingBufferFrame& frame, uint32_t yWidth, uint32_t yHeight, uint32_t uvWidth, uint32_t uvHeight);
void WaitForBuffer(uint32_t bufferIndex);
bool IsBufferAvailable(uint32_t bufferIndex);
HRESULT ExecuteRingBufferTextureUpdate(uint32_t bufferIndex);
// Dynamic buffer sizing methods
UINT CalculateOptimalBufferCount(uint32_t videoWidth, uint32_t videoHeight);
UINT64 GetAvailableVideoMemory();
HRESULT ResizeRingBuffers(UINT newBufferCount, uint32_t yWidth, uint32_t yHeight, uint32_t uvWidth, uint32_t uvHeight);
// Legacy single buffer methods (deprecated)
HRESULT SetupPersistentMapping(uint32_t yWidth, uint32_t yHeight, uint32_t uvWidth, uint32_t uvHeight);
HRESULT ExecuteZeroCopyTextureUpdate();
void SetupVideoRenderingPipeline();
HRESULT WaitForGPU();
void WaitForFrameCompletion(UINT frameIndex);
};
} // namespace Vav2Player
} // namespace Vav2Player

View File

@@ -0,0 +1,59 @@
#pragma once
#include "pch.h"
#include "VavCore/VavCore.h"
#include <d3d12.h>
#include <wrl/client.h>
namespace Vav2Player {
using Microsoft::WRL::ComPtr;
/**
* IVideoBackend - Internal interface for format-specific video rendering backends
*
* Each backend handles a specific pixel format (RGB32, YUV420P, NV12) and
* rendering method (Surface, Upload, Direct).
*
* Backends are managed by D3D12VideoRenderer orchestrator.
*/
class IVideoBackend {
public:
virtual ~IVideoBackend() = default;
// Lifecycle
virtual HRESULT Initialize(
ID3D12Device* device,
ID3D12CommandQueue* commandQueue,
uint32_t width,
uint32_t height) = 0;
virtual void Shutdown() = 0;
virtual bool IsInitialized() const = 0;
// Video texture management
// Returns the D3D12 texture resource for CUDA/external interop
// Returns nullptr if backend doesn't use external textures
virtual HRESULT CreateVideoTexture(uint32_t width, uint32_t height) = 0;
virtual ID3D12Resource* GetVideoTexture() const = 0;
// Frame rendering
// Renders the given frame to the specified back buffer
virtual HRESULT RenderToBackBuffer(
const VavCoreVideoFrame& frame,
ID3D12Resource* backBuffer,
ID3D12GraphicsCommandList* commandList) = 0;
// Format information
// Returns an identifier for the format this backend supports
// For internal use only - not critical for rendering
virtual int GetSupportedFormatId() const { return 0; }
// Optional: Frame update for backends that need CPU upload
// Default implementation does nothing (GPU-only backends)
virtual HRESULT UpdateFrame(const VavCoreVideoFrame& frame) {
return S_OK;
}
};
} // namespace Vav2Player

View File

@@ -0,0 +1,491 @@
#include "pch.h"
#include "RGBASurfaceBackend.h"
#include <stdexcept>
namespace Vav2Player {
RGBASurfaceBackend::RGBASurfaceBackend() {
}
RGBASurfaceBackend::~RGBASurfaceBackend() {
Shutdown();
}
HRESULT RGBASurfaceBackend::Initialize(
ID3D12Device* device,
ID3D12CommandQueue* commandQueue,
uint32_t width,
uint32_t height)
{
if (m_initialized) {
return S_OK;
}
if (!device || !commandQueue) {
return E_INVALIDARG;
}
m_device = device;
m_commandQueue = commandQueue;
m_width = width;
m_height = height;
HRESULT hr = CreateGraphicsResources();
if (FAILED(hr)) {
Shutdown();
return hr;
}
m_initialized = true;
return S_OK;
}
void RGBASurfaceBackend::Shutdown() {
// Release resources
m_constantBuffer.Reset();
m_pixelShaderBlob.Reset();
m_vertexShaderBlob.Reset();
m_srvHeap.Reset();
m_pipelineState.Reset();
m_rootSignature.Reset();
m_rgbaTexture.Reset();
// Clear references (not owned)
m_device = nullptr;
m_commandQueue = nullptr;
m_initialized = false;
}
HRESULT RGBASurfaceBackend::CreateVideoTexture(uint32_t width, uint32_t height) {
m_videoWidth = width;
m_videoHeight = height;
// Create RGBA texture for CUDA Surface Object write
// Format: DXGI_FORMAT_R8G8B8A8_UNORM (4 bytes per pixel)
// Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (enables CUDA Surface Object creation)
// Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, CUDA Surface Objects handle this automatically)
D3D12_RESOURCE_DESC rgbaTextureDesc = {};
rgbaTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
rgbaTextureDesc.Width = width;
rgbaTextureDesc.Height = height;
rgbaTextureDesc.DepthOrArraySize = 1;
rgbaTextureDesc.MipLevels = 1;
rgbaTextureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
rgbaTextureDesc.SampleDesc.Count = 1;
rgbaTextureDesc.SampleDesc.Quality = 0;
rgbaTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; // Tiled layout
rgbaTextureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // Enable CUDA write
D3D12_HEAP_PROPERTIES defaultHeapProps = {};
defaultHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
defaultHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
defaultHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
HRESULT hr = m_device->CreateCommittedResource(
&defaultHeapProps,
D3D12_HEAP_FLAG_SHARED, // Required for CUDA interop
&rgbaTextureDesc,
D3D12_RESOURCE_STATE_COMMON, // CUDA will transition as needed
nullptr,
IID_PPV_ARGS(&m_rgbaTexture)
);
if (FAILED(hr)) {
return hr;
}
// Create SRV for RGBA texture
hr = CreateSrvHeap();
if (FAILED(hr)) {
return hr;
}
// Update constant buffer with new aspect ratio
hr = UpdateConstantBuffer();
if (FAILED(hr)) {
return hr;
}
return S_OK;
}
HRESULT RGBASurfaceBackend::RenderToBackBuffer(
const VavCoreVideoFrame& frame,
ID3D12Resource* backBuffer,
ID3D12GraphicsCommandList* commandList)
{
if (!m_initialized || !m_rgbaTexture) {
return E_NOT_VALID_STATE;
}
if (!backBuffer || !commandList) {
return E_INVALIDARG;
}
// Transition RGBA texture to shader resource
D3D12_RESOURCE_BARRIER barrierToSRV = {};
barrierToSRV.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrierToSRV.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrierToSRV.Transition.pResource = m_rgbaTexture.Get();
barrierToSRV.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
barrierToSRV.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
barrierToSRV.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(1, &barrierToSRV);
// Transition back buffer to render target
D3D12_RESOURCE_BARRIER barrierToRT = {};
barrierToRT.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrierToRT.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrierToRT.Transition.pResource = backBuffer;
barrierToRT.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT;
barrierToRT.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
barrierToRT.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(1, &barrierToRT);
// Create RTV for back buffer
D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle;
D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
rtvDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
// Create temporary RTV heap for back buffer
ComPtr<ID3D12DescriptorHeap> rtvHeap;
D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
rtvHeapDesc.NumDescriptors = 1;
rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
HRESULT hr = m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&rtvHeap));
if (FAILED(hr)) {
return hr;
}
rtvHandle = rtvHeap->GetCPUDescriptorHandleForHeapStart();
m_device->CreateRenderTargetView(backBuffer, &rtvDesc, rtvHandle);
// Set graphics pipeline
commandList->SetPipelineState(m_pipelineState.Get());
commandList->SetGraphicsRootSignature(m_rootSignature.Get());
// Set descriptor heaps
ID3D12DescriptorHeap* heaps[] = { m_srvHeap.Get() };
commandList->SetDescriptorHeaps(1, heaps);
commandList->SetGraphicsRootDescriptorTable(0, m_srvHeap->GetGPUDescriptorHandleForHeapStart());
commandList->SetGraphicsRootConstantBufferView(1, m_constantBuffer->GetGPUVirtualAddress());
// Set render target
commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
// Set viewport and scissor
D3D12_VIEWPORT viewport = {};
viewport.Width = static_cast<FLOAT>(m_width);
viewport.Height = static_cast<FLOAT>(m_height);
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
commandList->RSSetViewports(1, &viewport);
D3D12_RECT scissorRect = {};
scissorRect.right = m_width;
scissorRect.bottom = m_height;
commandList->RSSetScissorRects(1, &scissorRect);
// Clear back buffer
const float clearColor[] = { 0.0f, 0.0f, 0.0f, 1.0f };
commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr);
// Draw fullscreen quad
commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
commandList->DrawInstanced(6, 1, 0, 0); // Fullscreen quad (2 triangles)
// Transition back buffer to present
D3D12_RESOURCE_BARRIER barrierToPresent = {};
barrierToPresent.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrierToPresent.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrierToPresent.Transition.pResource = backBuffer;
barrierToPresent.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
barrierToPresent.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
barrierToPresent.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(1, &barrierToPresent);
// Transition RGBA texture back to common
D3D12_RESOURCE_BARRIER barrierToCommon = {};
barrierToCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrierToCommon.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrierToCommon.Transition.pResource = m_rgbaTexture.Get();
barrierToCommon.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
barrierToCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
barrierToCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(1, &barrierToCommon);
return S_OK;
}
HRESULT RGBASurfaceBackend::CreateGraphicsResources() {
HRESULT hr = CompileShaders();
if (FAILED(hr)) return hr;
hr = CreateRootSignature();
if (FAILED(hr)) return hr;
hr = CreatePipelineState();
if (FAILED(hr)) return hr;
// Create constant buffer
D3D12_HEAP_PROPERTIES uploadHeapProps = {};
uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
D3D12_RESOURCE_DESC bufferDesc = {};
bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
bufferDesc.Width = (sizeof(AspectFitConstants) + 255) & ~255; // Align to 256 bytes
bufferDesc.Height = 1;
bufferDesc.DepthOrArraySize = 1;
bufferDesc.MipLevels = 1;
bufferDesc.Format = DXGI_FORMAT_UNKNOWN;
bufferDesc.SampleDesc.Count = 1;
bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
hr = m_device->CreateCommittedResource(
&uploadHeapProps,
D3D12_HEAP_FLAG_NONE,
&bufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&m_constantBuffer)
);
return hr;
}
HRESULT RGBASurfaceBackend::CompileShaders() {
// Simple vertex shader - generates fullscreen quad
const char* vertexShaderSource = R"(
struct VSOutput {
float4 position : SV_POSITION;
float2 uv : TEXCOORD0;
};
cbuffer AspectFitConstants : register(b0) {
float videoAspectRatio;
float containerAspectRatio;
float uvScaleX;
float uvScaleY;
float uvOffsetX;
float uvOffsetY;
};
VSOutput main(uint vertexID : SV_VertexID) {
VSOutput output;
// Generate fullscreen quad vertices
float2 positions[6] = {
float2(-1.0, -1.0), float2(-1.0, 1.0), float2(1.0, 1.0),
float2(-1.0, -1.0), float2(1.0, 1.0), float2(1.0, -1.0)
};
float2 uvs[6] = {
float2(0.0, 1.0), float2(0.0, 0.0), float2(1.0, 0.0),
float2(0.0, 1.0), float2(1.0, 0.0), float2(1.0, 1.0)
};
output.position = float4(positions[vertexID], 0.0, 1.0);
// Apply AspectFit UV transformation
float2 uv = uvs[vertexID];
uv = (uv - 0.5) * float2(uvScaleX, uvScaleY) + 0.5 + float2(uvOffsetX, uvOffsetY);
output.uv = uv;
return output;
}
)";
// Simple pixel shader - sample RGBA texture
const char* pixelShaderSource = R"(
Texture2D<float4> rgbaTexture : register(t0);
SamplerState linearSampler : register(s0);
struct PSInput {
float4 position : SV_POSITION;
float2 uv : TEXCOORD0;
};
float4 main(PSInput input) : SV_TARGET {
return rgbaTexture.Sample(linearSampler, input.uv);
}
)";
ComPtr<ID3DBlob> errorBlob;
// Compile vertex shader
HRESULT hr = D3DCompile(
vertexShaderSource,
strlen(vertexShaderSource),
nullptr,
nullptr,
nullptr,
"main",
"vs_5_0",
D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION,
0,
&m_vertexShaderBlob,
&errorBlob
);
if (FAILED(hr)) {
if (errorBlob) {
OutputDebugStringA((char*)errorBlob->GetBufferPointer());
}
return hr;
}
// Compile pixel shader
hr = D3DCompile(
pixelShaderSource,
strlen(pixelShaderSource),
nullptr,
nullptr,
nullptr,
"main",
"ps_5_0",
D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION,
0,
&m_pixelShaderBlob,
&errorBlob
);
if (FAILED(hr)) {
if (errorBlob) {
OutputDebugStringA((char*)errorBlob->GetBufferPointer());
}
return hr;
}
return S_OK;
}
HRESULT RGBASurfaceBackend::CreateRootSignature() {
// Root signature: [0] = SRV table (RGBA texture), [1] = CBV (constants)
CD3DX12_DESCRIPTOR_RANGE srvRange;
srvRange.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); // t0
CD3DX12_ROOT_PARAMETER rootParams[2];
rootParams[0].InitAsDescriptorTable(1, &srvRange, D3D12_SHADER_VISIBILITY_PIXEL);
rootParams[1].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_VERTEX); // b0
// Static sampler
D3D12_STATIC_SAMPLER_DESC sampler = {};
sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampler.MipLODBias = 0;
sampler.MaxAnisotropy = 0;
sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK;
sampler.MinLOD = 0.0f;
sampler.MaxLOD = D3D12_FLOAT32_MAX;
sampler.ShaderRegister = 0; // s0
sampler.RegisterSpace = 0;
sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
CD3DX12_ROOT_SIGNATURE_DESC rootSigDesc;
rootSigDesc.Init(2, rootParams, 1, &sampler, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
ComPtr<ID3DBlob> signature;
ComPtr<ID3DBlob> error;
HRESULT hr = D3D12SerializeRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error);
if (FAILED(hr)) {
if (error) {
OutputDebugStringA((char*)error->GetBufferPointer());
}
return hr;
}
hr = m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature));
return hr;
}
HRESULT RGBASurfaceBackend::CreatePipelineState() {
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = m_rootSignature.Get();
psoDesc.VS = { m_vertexShaderBlob->GetBufferPointer(), m_vertexShaderBlob->GetBufferSize() };
psoDesc.PS = { m_pixelShaderBlob->GetBufferPointer(), m_pixelShaderBlob->GetBufferSize() };
psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
psoDesc.SampleMask = UINT_MAX;
psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
psoDesc.DepthStencilState.DepthEnable = FALSE;
psoDesc.DepthStencilState.StencilEnable = FALSE;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
psoDesc.NumRenderTargets = 1;
psoDesc.RTVFormats[0] = DXGI_FORMAT_B8G8R8A8_UNORM;
psoDesc.SampleDesc.Count = 1;
return m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState));
}
HRESULT RGBASurfaceBackend::CreateSrvHeap() {
D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
srvHeapDesc.NumDescriptors = 1;
srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
HRESULT hr = m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_srvHeap));
if (FAILED(hr)) {
return hr;
}
// Create SRV for RGBA texture
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MipLevels = 1;
m_device->CreateShaderResourceView(
m_rgbaTexture.Get(),
&srvDesc,
m_srvHeap->GetCPUDescriptorHandleForHeapStart()
);
return S_OK;
}
HRESULT RGBASurfaceBackend::UpdateConstantBuffer() {
if (!m_constantBuffer) {
return E_NOT_VALID_STATE;
}
// Calculate AspectFit constants
AspectFitConstants constants = {};
constants.videoAspectRatio = static_cast<float>(m_videoWidth) / m_videoHeight;
constants.containerAspectRatio = static_cast<float>(m_width) / m_height;
if (constants.videoAspectRatio > constants.containerAspectRatio) {
// Video is wider - fit to width
constants.uvScaleX = 1.0f;
constants.uvScaleY = constants.containerAspectRatio / constants.videoAspectRatio;
constants.uvOffsetX = 0.0f;
constants.uvOffsetY = (1.0f - constants.uvScaleY) * 0.5f;
} else {
// Video is taller - fit to height
constants.uvScaleX = constants.videoAspectRatio / constants.containerAspectRatio;
constants.uvScaleY = 1.0f;
constants.uvOffsetX = (1.0f - constants.uvScaleX) * 0.5f;
constants.uvOffsetY = 0.0f;
}
// Upload to GPU
void* mappedData = nullptr;
D3D12_RANGE readRange = { 0, 0 };
HRESULT hr = m_constantBuffer->Map(0, &readRange, &mappedData);
if (FAILED(hr)) {
return hr;
}
memcpy(mappedData, &constants, sizeof(AspectFitConstants));
m_constantBuffer->Unmap(0, nullptr);
return S_OK;
}
} // namespace Vav2Player

View File

@@ -0,0 +1,100 @@
#pragma once
#include "IVideoBackend.h"
#include <d3dcompiler.h>
namespace Vav2Player {
/**
* RGBASurfaceBackend - RGBA format rendering using CUDA Surface Objects
*
* Format: 1 (RGBA, 4 bytes per pixel)
* Method: CUDA Surface Objects (surf2Dwrite)
*
* Pipeline:
* 1. NVDEC decodes AV1 → NV12 (CUDA memory)
* 2. CUDA kernel converts NV12 → RGBA (CUDA memory)
* 3. surf2Dwrite() writes RGBA to D3D12 tiled texture (via CUDA Surface Object)
* 4. D3D12 graphics pipeline samples RGBA texture → renders to back buffer
*
* Key Features:
* - Zero-copy GPU pipeline (no CPU involvement)
* - CUDA Surface Objects handle tiled texture layout automatically
* - Simple texture sampling (no YUV→RGB conversion needed)
*/
class RGBASurfaceBackend : public IVideoBackend {
public:
RGBASurfaceBackend();
~RGBASurfaceBackend() override;
// IVideoBackend interface
HRESULT Initialize(
ID3D12Device* device,
ID3D12CommandQueue* commandQueue,
uint32_t width,
uint32_t height) override;
void Shutdown() override;
bool IsInitialized() const override { return m_initialized; }
HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
ID3D12Resource* GetVideoTexture() const override { return m_rgbaTexture.Get(); }
HRESULT RenderToBackBuffer(
const VavCoreVideoFrame& frame,
ID3D12Resource* backBuffer,
ID3D12GraphicsCommandList* commandList) override;
int GetSupportedFormatId() const override {
return 1;
}
private:
// D3D12 core objects (not owned - passed from orchestrator)
ID3D12Device* m_device = nullptr;
ID3D12CommandQueue* m_commandQueue = nullptr;
// RGBA video texture
// Format: DXGI_FORMAT_R8G8B8A8_UNORM
// Flags: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS (for CUDA Surface Object)
// Layout: D3D12_TEXTURE_LAYOUT_UNKNOWN (tiled, handled by CUDA Surface Objects)
ComPtr<ID3D12Resource> m_rgbaTexture;
// Graphics pipeline for simple RGBA texture sampling
ComPtr<ID3D12RootSignature> m_rootSignature;
ComPtr<ID3D12PipelineState> m_pipelineState;
ComPtr<ID3D12DescriptorHeap> m_srvHeap;
// Shaders
ComPtr<ID3DBlob> m_vertexShaderBlob;
ComPtr<ID3DBlob> m_pixelShaderBlob;
// Constant buffer for AspectFit rendering
struct AspectFitConstants {
float videoAspectRatio; // Video width / height
float containerAspectRatio; // Container width / height
float uvScaleX; // UV scaling factor X
float uvScaleY; // UV scaling factor Y
float uvOffsetX; // UV offset X for centering
float uvOffsetY; // UV offset Y for centering
float padding[2]; // Align to 16 bytes
};
ComPtr<ID3D12Resource> m_constantBuffer;
// State
bool m_initialized = false;
uint32_t m_width = 0; // Container width
uint32_t m_height = 0; // Container height
uint32_t m_videoWidth = 0;
uint32_t m_videoHeight = 0;
// Helper methods
HRESULT CreateGraphicsResources();
HRESULT CompileShaders();
HRESULT CreateRootSignature();
HRESULT CreatePipelineState();
HRESULT CreateSrvHeap();
HRESULT UpdateConstantBuffer();
};
} // namespace Vav2Player

View File

@@ -1,194 +0,0 @@
#pragma once
#include <d3d12.h>
#include <dxgi1_6.h>
#include <d3dcompiler.h>
#include <d3dx12.h>
#include <wrl/client.h>
#include <winrt/Microsoft.UI.Xaml.Controls.h>
#include <microsoft.ui.xaml.media.dxinterop.h>
#include "IVideoRenderer.h"
#include "GlobalD3D12SyncManager.h"
using Microsoft::WRL::ComPtr;
namespace Vav2Player {
// Constant buffer structure for AspectFit UV coordinate calculation
struct AspectFitConstants
{
float videoAspectRatio; // Video width / height
float containerAspectRatio; // Container width / height
float uvScaleX; // UV scaling factor X
float uvScaleY; // UV scaling factor Y
float uvOffsetX; // UV offset X for centering
float uvOffsetY; // UV offset Y for centering
float padding[2]; // Align to 16 bytes
};
// Simple, clean GPU renderer for AV1 video playback
// Phase 3: Designed from scratch with proper architecture
class SimpleGPURenderer : public IVideoRenderer
{
public:
SimpleGPURenderer();
~SimpleGPURenderer();
// IVideoRenderer interface implementation
HRESULT Initialize(uint32_t width, uint32_t height) override;
void Shutdown() override;
bool IsInitialized() const override { return m_initialized; }
HRESULT RenderVideoFrame(const VavCoreVideoFrame& frame) override;
bool TryRenderFrame(const VavCoreVideoFrame& frame) override;
HRESULT Present() override;
HRESULT Resize(uint32_t width, uint32_t height) override;
uint32_t GetWidth() const override { return m_width; }
uint32_t GetHeight() const override { return m_height; }
// SimpleGPURenderer specific methods
HRESULT InitializeWithSwapChain(winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel,
uint32_t width, uint32_t height);
void SetSwapChainPanel(winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel);
// Get D3D12 device for VavCore integration
ID3D12Device* GetD3D12Device() const { return m_device.Get(); }
// Get NV12 texture for VavCore zero-copy decode
// Returns NV12 texture - native DXGI_FORMAT_NV12 for proper CUDA interop
ID3D12Resource* GetNV12TextureForCUDAInterop() const { return m_nv12Texture.Get(); }
// Set the shared fence for CUDA-D3D12 synchronization
// Takes ownership of the fence pointer (caller should not delete it)
void SetSyncFence(void* fence);
// Release NV12 texture (cleanup partial initialization)
void ReleaseNV12Texture();
// Create NV12 texture for CUDA-D3D12 interop
// Format: DXGI_FORMAT_NV12 (native 2-plane YUV format)
HRESULT CreateNV12TextureR8Layout(uint32_t videoWidth, uint32_t videoHeight);
// Render NV12 texture to back buffer (YUV to RGB conversion on GPU)
HRESULT RenderNV12TextureToBackBuffer(uint64_t fenceValue);
private:
// D3D12 core objects
ComPtr<ID3D12Device> m_device;
ComPtr<ID3D12CommandQueue> m_commandQueue;
ComPtr<IDXGISwapChain3> m_swapChain;
ComPtr<ID3D12DescriptorHeap> m_rtvHeap;
// Synchronization with VavCore (CUDA)
void* m_syncFence = nullptr; // ID3D12Fence*
// Command objects - Double buffering (changed from 3 for performance test)
static const UINT FrameCount = 2;
ComPtr<ID3D12CommandAllocator> m_commandAllocators[FrameCount];
ComPtr<ID3D12GraphicsCommandList> m_commandList;
ComPtr<ID3D12Resource> m_renderTargets[FrameCount];
UINT m_frameIndex;
// Simple synchronization
ComPtr<ID3D12Fence> m_fence;
UINT64 m_fenceValue; // Single incrementing counter
UINT64 m_frameCompletionValues[FrameCount]; // Per-frame completion tracking
HANDLE m_fenceEvent;
// YUV-to-RGB compute shader resources
ComPtr<ID3D12RootSignature> m_computeRootSignature;
ComPtr<ID3D12PipelineState> m_computePipelineState;
ComPtr<ID3D12DescriptorHeap> m_srvUavHeap;
ComPtr<ID3DBlob> m_computeShaderBlob;
// Graphics pipeline for AspectFit rendering (YUV-to-RGB with UV coordinates)
ComPtr<ID3D12RootSignature> m_graphicsRootSignature;
ComPtr<ID3D12PipelineState> m_graphicsPipelineState;
ComPtr<ID3D12DescriptorHeap> m_graphicsSrvHeap;
ComPtr<ID3DBlob> m_vertexShaderBlob;
ComPtr<ID3DBlob> m_pixelShaderBlob;
ComPtr<ID3D12Resource> m_aspectFitConstantBuffer;
// Video textures - Triple buffered for proper synchronization
ComPtr<ID3D12Resource> m_yTextures[FrameCount];
ComPtr<ID3D12Resource> m_uTextures[FrameCount];
ComPtr<ID3D12Resource> m_vTextures[FrameCount];
ComPtr<ID3D12Resource> m_rgbTextures[FrameCount];
// NV12 texture for VavCore zero-copy decode
// Format: DXGI_FORMAT_NV12 (native 2-plane YUV format)
// Shared with CUDA via cudaExternalMemory API
ComPtr<ID3D12Resource> m_nv12Texture;
// NV12 to RGB graphics pipeline resources
ComPtr<ID3D12RootSignature> m_nv12RootSignature;
ComPtr<ID3D12PipelineState> m_nv12PipelineState;
ComPtr<ID3D12DescriptorHeap> m_nv12SrvHeap;
ComPtr<ID3DBlob> m_nv12VertexShaderBlob;
ComPtr<ID3DBlob> m_nv12PixelShaderBlob;
ComPtr<ID3D12Resource> m_nv12ConstantBuffer;
// Upload resources for CPU->GPU transfer - Triple buffered
ComPtr<ID3D12Resource> m_yUploadBuffers[FrameCount];
ComPtr<ID3D12Resource> m_uUploadBuffers[FrameCount];
ComPtr<ID3D12Resource> m_vUploadBuffers[FrameCount];
// Constant buffer for shader
ComPtr<ID3D12Resource> m_constantBuffer;
// State
bool m_initialized = false;
uint32_t m_width = 0;
uint32_t m_height = 0;
uint32_t m_videoWidth = 0;
uint32_t m_videoHeight = 0;
UINT m_rtvDescriptorSize = 0;
UINT m_srvUavDescriptorSize = 0;
uint64_t m_totalFramesRendered = 0;
// WinUI integration
winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel m_swapChainPanel{ nullptr };
// Helper methods
HRESULT CreateDevice();
HRESULT CreateCommandQueue();
HRESULT CreateSwapChain(winrt::Microsoft::UI::Xaml::Controls::SwapChainPanel const& panel);
HRESULT CreateDescriptorHeaps();
HRESULT CreateRenderTargets();
HRESULT CreateSynchronizationObjects();
HRESULT CreateComputeShaderResources();
HRESULT CreateVideoTextures(uint32_t videoWidth, uint32_t videoHeight);
HRESULT ExecuteGPUPipeline(const VavCoreVideoFrame& frame); // Consolidated GPU pipeline
HRESULT UpdateVideoTextures(const VavCoreVideoFrame& frame); // Legacy public interface
HRESULT ExecuteComputeShader(); // Legacy public interface
HRESULT CopyToBackBuffer(); // Legacy public interface
// Internal methods (no command list management)
HRESULT UpdateVideoTexturesInternal(const VavCoreVideoFrame& frame);
HRESULT ExecuteComputeShaderInternal();
HRESULT CopyToBackBufferInternal();
HRESULT WaitForGPU();
void WaitForFrameCompletion(UINT frameIndex); // Simple frame-specific wait
// Shader compilation
HRESULT CompileComputeShader();
HRESULT CreateComputeRootSignature();
HRESULT CreateComputePipelineState();
// Graphics pipeline for AspectFit rendering
HRESULT CreateGraphicsShaderResources();
HRESULT CreateGraphicsRootSignature();
HRESULT CreateGraphicsPipelineState();
HRESULT CompileGraphicsShaders();
HRESULT RenderWithAspectFitInternal(); // New AspectFit rendering method
// NV12 graphics pipeline
HRESULT CreateNV12GraphicsPipeline();
HRESULT CompileNV12Shaders();
HRESULT CreateNV12RootSignature();
HRESULT CreateNV12PipelineState();
HRESULT CreateNV12SrvHeap();
};
} // namespace Vav2Player

View File

@@ -0,0 +1,918 @@
#include "pch.h"
#include "YUV420PUploadBackend.h"
namespace Vav2Player {
YUV420PUploadBackend::YUV420PUploadBackend() {
}
YUV420PUploadBackend::~YUV420PUploadBackend() {
Shutdown();
}
HRESULT YUV420PUploadBackend::Initialize(
ID3D12Device* device,
ID3D12CommandQueue* commandQueue,
uint32_t width,
uint32_t height)
{
if (m_initialized) {
return S_OK;
}
if (!device || !commandQueue) {
return E_INVALIDARG;
}
m_device = device;
m_commandQueue = commandQueue;
m_width = width;
m_height = height;
m_initialized = true;
return S_OK;
}
void YUV420PUploadBackend::Shutdown() {
// Unmap persistent buffers
for (auto& slot : m_ringBuffers) {
if (slot.yMappedData && slot.yUploadBuffer) {
slot.yUploadBuffer->Unmap(0, nullptr);
slot.yMappedData = nullptr;
}
if (slot.uMappedData && slot.uUploadBuffer) {
slot.uUploadBuffer->Unmap(0, nullptr);
slot.uMappedData = nullptr;
}
if (slot.vMappedData && slot.vUploadBuffer) {
slot.vUploadBuffer->Unmap(0, nullptr);
slot.vMappedData = nullptr;
}
}
m_ringBuffers.clear();
// Release all resources
m_yTexture.Reset();
m_uTexture.Reset();
m_vTexture.Reset();
m_rgbTexture.Reset();
m_computeRootSignature.Reset();
m_computePipelineState.Reset();
m_srvUavHeap.Reset();
m_computeShaderBlob.Reset();
m_graphicsRootSignature.Reset();
m_graphicsPipelineState.Reset();
m_graphicsSrvHeap.Reset();
m_vertexShaderBlob.Reset();
m_pixelShaderBlob.Reset();
m_constantBuffer.Reset();
m_device = nullptr;
m_commandQueue = nullptr;
m_initialized = false;
}
HRESULT YUV420PUploadBackend::CreateVideoTexture(uint32_t width, uint32_t height) {
m_videoWidth = width;
m_videoHeight = height;
HRESULT hr = CreateRingBuffers(width, height);
if (FAILED(hr)) {
return hr;
}
hr = CreateGPUTextures(width, height);
if (FAILED(hr)) {
return hr;
}
hr = CreateComputeShaderResources();
if (FAILED(hr)) {
return hr;
}
hr = CreateGraphicsResources();
if (FAILED(hr)) {
return hr;
}
return S_OK;
}
HRESULT YUV420PUploadBackend::CreateRingBuffers(uint32_t videoWidth, uint32_t videoHeight) {
m_ringBuffers.resize(BufferCount);
uint32_t yRowPitch = (videoWidth + 255) & ~255; // 256-byte alignment
uint32_t uvWidth = (videoWidth + 1) / 2;
uint32_t uvHeight = (videoHeight + 1) / 2;
uint32_t uvRowPitch = (uvWidth + 255) & ~255;
for (UINT i = 0; i < BufferCount; ++i) {
auto& slot = m_ringBuffers[i];
slot.yRowPitch = yRowPitch;
slot.uRowPitch = uvRowPitch;
slot.vRowPitch = uvRowPitch;
// Create Y upload buffer
D3D12_HEAP_PROPERTIES uploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
D3D12_RESOURCE_DESC yBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(yRowPitch * videoHeight);
HRESULT hr = m_device->CreateCommittedResource(
&uploadHeapProps,
D3D12_HEAP_FLAG_NONE,
&yBufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&slot.yUploadBuffer));
if (FAILED(hr)) {
return hr;
}
// Create U upload buffer
D3D12_RESOURCE_DESC uvBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(uvRowPitch * uvHeight);
hr = m_device->CreateCommittedResource(
&uploadHeapProps,
D3D12_HEAP_FLAG_NONE,
&uvBufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&slot.uUploadBuffer));
if (FAILED(hr)) {
return hr;
}
// Create V upload buffer
hr = m_device->CreateCommittedResource(
&uploadHeapProps,
D3D12_HEAP_FLAG_NONE,
&uvBufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&slot.vUploadBuffer));
if (FAILED(hr)) {
return hr;
}
// Persistent mapping
hr = slot.yUploadBuffer->Map(0, nullptr, reinterpret_cast<void**>(&slot.yMappedData));
if (FAILED(hr)) {
return hr;
}
hr = slot.uUploadBuffer->Map(0, nullptr, reinterpret_cast<void**>(&slot.uMappedData));
if (FAILED(hr)) {
return hr;
}
hr = slot.vUploadBuffer->Map(0, nullptr, reinterpret_cast<void**>(&slot.vMappedData));
if (FAILED(hr)) {
return hr;
}
}
return S_OK;
}
HRESULT YUV420PUploadBackend::CreateGPUTextures(uint32_t videoWidth, uint32_t videoHeight) {
D3D12_HEAP_PROPERTIES defaultHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
// Y texture (full resolution)
D3D12_RESOURCE_DESC yTexDesc = CD3DX12_RESOURCE_DESC::Tex2D(
DXGI_FORMAT_R8_UNORM,
videoWidth,
videoHeight,
1, 1);
yTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
HRESULT hr = m_device->CreateCommittedResource(
&defaultHeapProps,
D3D12_HEAP_FLAG_NONE,
&yTexDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_yTexture));
if (FAILED(hr)) {
return hr;
}
// U texture (half resolution)
uint32_t uvWidth = (videoWidth + 1) / 2;
uint32_t uvHeight = (videoHeight + 1) / 2;
D3D12_RESOURCE_DESC uvTexDesc = CD3DX12_RESOURCE_DESC::Tex2D(
DXGI_FORMAT_R8_UNORM,
uvWidth,
uvHeight,
1, 1);
uvTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
hr = m_device->CreateCommittedResource(
&defaultHeapProps,
D3D12_HEAP_FLAG_NONE,
&uvTexDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_uTexture));
if (FAILED(hr)) {
return hr;
}
// V texture (half resolution)
hr = m_device->CreateCommittedResource(
&defaultHeapProps,
D3D12_HEAP_FLAG_NONE,
&uvTexDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_vTexture));
if (FAILED(hr)) {
return hr;
}
// RGB texture (full resolution for intermediate storage)
D3D12_RESOURCE_DESC rgbTexDesc = CD3DX12_RESOURCE_DESC::Tex2D(
DXGI_FORMAT_R8G8B8A8_UNORM,
videoWidth,
videoHeight,
1, 1);
rgbTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
hr = m_device->CreateCommittedResource(
&defaultHeapProps,
D3D12_HEAP_FLAG_NONE,
&rgbTexDesc,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
nullptr,
IID_PPV_ARGS(&m_rgbTexture));
return hr;
}
HRESULT YUV420PUploadBackend::CreateComputeShaderResources() {
HRESULT hr = CompileComputeShader();
if (FAILED(hr)) {
return hr;
}
hr = CreateComputeRootSignature();
if (FAILED(hr)) {
return hr;
}
hr = CreateComputePipelineState();
if (FAILED(hr)) {
return hr;
}
hr = CreateSrvUavHeap();
return hr;
}
HRESULT YUV420PUploadBackend::CompileComputeShader() {
// YUV to RGB compute shader (BT.709 color space)
const char* shaderSource = R"(
Texture2D<float> g_yTexture : register(t0);
Texture2D<float> g_uTexture : register(t1);
Texture2D<float> g_vTexture : register(t2);
RWTexture2D<float4> g_rgbTexture : register(u0);
[numthreads(8, 8, 1)]
void main(uint3 id : SV_DispatchThreadID)
{
float y = g_yTexture[id.xy].r;
float u = g_uTexture[id.xy / 2].r;
float v = g_vTexture[id.xy / 2].r;
// BT.709 YUV to RGB conversion
y = (y * 255.0f - 16.0f) / 219.0f;
u = (u * 255.0f - 128.0f) / 224.0f;
v = (v * 255.0f - 128.0f) / 224.0f;
float3 rgb;
rgb.r = y + 1.5748f * v;
rgb.g = y - 0.1873f * u - 0.4681f * v;
rgb.b = y + 1.8556f * u;
g_rgbTexture[id.xy] = float4(saturate(rgb), 1.0f);
}
)";
ComPtr<ID3DBlob> errorBlob;
HRESULT hr = D3DCompile(
shaderSource,
strlen(shaderSource),
nullptr,
nullptr,
nullptr,
"main",
"cs_5_0",
D3DCOMPILE_OPTIMIZATION_LEVEL3,
0,
&m_computeShaderBlob,
&errorBlob);
if (FAILED(hr) && errorBlob) {
OutputDebugStringA((char*)errorBlob->GetBufferPointer());
}
return hr;
}
HRESULT YUV420PUploadBackend::CreateComputeRootSignature() {
// Root signature: 3 SRVs (Y, U, V) + 1 UAV (RGB output)
CD3DX12_DESCRIPTOR_RANGE ranges[2];
ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, 0); // t0-t2
ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0); // u0
CD3DX12_ROOT_PARAMETER rootParams[2];
rootParams[0].InitAsDescriptorTable(1, &ranges[0]);
rootParams[1].InitAsDescriptorTable(1, &ranges[1]);
D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
rootSigDesc.NumParameters = 2;
rootSigDesc.pParameters = rootParams;
rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
ComPtr<ID3DBlob> signatureBlob, errorBlob;
HRESULT hr = D3D12SerializeRootSignature(
&rootSigDesc,
D3D_ROOT_SIGNATURE_VERSION_1,
&signatureBlob,
&errorBlob);
if (FAILED(hr)) {
return hr;
}
hr = m_device->CreateRootSignature(
0,
signatureBlob->GetBufferPointer(),
signatureBlob->GetBufferSize(),
IID_PPV_ARGS(&m_computeRootSignature));
return hr;
}
HRESULT YUV420PUploadBackend::CreateComputePipelineState() {
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = m_computeRootSignature.Get();
psoDesc.CS.pShaderBytecode = m_computeShaderBlob->GetBufferPointer();
psoDesc.CS.BytecodeLength = m_computeShaderBlob->GetBufferSize();
return m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&m_computePipelineState));
}
HRESULT YUV420PUploadBackend::CreateSrvUavHeap() {
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
heapDesc.NumDescriptors = 4; // 3 SRVs + 1 UAV
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_srvUavHeap));
if (FAILED(hr)) {
return hr;
}
// Create SRVs for Y, U, V textures
UINT descriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
CD3DX12_CPU_DESCRIPTOR_HANDLE handle(m_srvUavHeap->GetCPUDescriptorHandleForHeapStart());
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Format = DXGI_FORMAT_R8_UNORM;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Texture2D.MipLevels = 1;
m_device->CreateShaderResourceView(m_yTexture.Get(), &srvDesc, handle);
handle.Offset(descriptorSize);
m_device->CreateShaderResourceView(m_uTexture.Get(), &srvDesc, handle);
handle.Offset(descriptorSize);
m_device->CreateShaderResourceView(m_vTexture.Get(), &srvDesc, handle);
handle.Offset(descriptorSize);
// Create UAV for RGB texture
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
m_device->CreateUnorderedAccessView(m_rgbTexture.Get(), nullptr, &uavDesc, handle);
return S_OK;
}
HRESULT YUV420PUploadBackend::CreateGraphicsResources() {
HRESULT hr = CompileGraphicsShaders();
if (FAILED(hr)) {
return hr;
}
hr = CreateGraphicsRootSignature();
if (FAILED(hr)) {
return hr;
}
hr = CreateGraphicsPipelineState();
if (FAILED(hr)) {
return hr;
}
hr = CreateGraphicsSrvHeap();
if (FAILED(hr)) {
return hr;
}
// Create constant buffer
D3D12_HEAP_PROPERTIES uploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
D3D12_RESOURCE_DESC cbDesc = CD3DX12_RESOURCE_DESC::Buffer(256); // 256 bytes aligned
hr = m_device->CreateCommittedResource(
&uploadHeapProps,
D3D12_HEAP_FLAG_NONE,
&cbDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&m_constantBuffer));
return hr;
}
HRESULT YUV420PUploadBackend::CompileGraphicsShaders() {
// Vertex shader (fullscreen triangle)
const char* vsSource = R"(
struct VSOutput {
float4 position : SV_POSITION;
float2 texcoord : TEXCOORD0;
};
VSOutput main(uint id : SV_VertexID)
{
VSOutput output;
output.texcoord = float2((id << 1) & 2, id & 2);
output.position = float4(output.texcoord * float2(2, -2) + float2(-1, 1), 0, 1);
return output;
}
)";
// Pixel shader (AspectFit)
const char* psSource = R"(
Texture2D g_texture : register(t0);
SamplerState g_sampler : register(s0);
cbuffer AspectFitConstants : register(b0)
{
float videoAspectRatio;
float containerAspectRatio;
float uvScaleX;
float uvScaleY;
float uvOffsetX;
float uvOffsetY;
};
float4 main(float4 position : SV_POSITION, float2 texcoord : TEXCOORD0) : SV_TARGET
{
float2 uv = texcoord * float2(uvScaleX, uvScaleY) + float2(uvOffsetX, uvOffsetY);
return g_texture.Sample(g_sampler, uv);
}
)";
ComPtr<ID3DBlob> errorBlob;
HRESULT hr = D3DCompile(
vsSource,
strlen(vsSource),
nullptr, nullptr, nullptr,
"main", "vs_5_0",
D3DCOMPILE_OPTIMIZATION_LEVEL3, 0,
&m_vertexShaderBlob, &errorBlob);
if (FAILED(hr)) {
return hr;
}
hr = D3DCompile(
psSource,
strlen(psSource),
nullptr, nullptr, nullptr,
"main", "ps_5_0",
D3DCOMPILE_OPTIMIZATION_LEVEL3, 0,
&m_pixelShaderBlob, &errorBlob);
return hr;
}
HRESULT YUV420PUploadBackend::CreateGraphicsRootSignature() {
CD3DX12_DESCRIPTOR_RANGE srvRange;
srvRange.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0);
CD3DX12_ROOT_PARAMETER rootParams[2];
rootParams[0].InitAsDescriptorTable(1, &srvRange);
rootParams[1].InitAsConstantBufferView(0);
D3D12_STATIC_SAMPLER_DESC samplerDesc = {};
samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
samplerDesc.ShaderRegister = 0;
samplerDesc.RegisterSpace = 0;
samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
rootSigDesc.NumParameters = 2;
rootSigDesc.pParameters = rootParams;
rootSigDesc.NumStaticSamplers = 1;
rootSigDesc.pStaticSamplers = &samplerDesc;
rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
ComPtr<ID3DBlob> signatureBlob, errorBlob;
HRESULT hr = D3D12SerializeRootSignature(
&rootSigDesc,
D3D_ROOT_SIGNATURE_VERSION_1,
&signatureBlob,
&errorBlob);
if (FAILED(hr)) {
return hr;
}
hr = m_device->CreateRootSignature(
0,
signatureBlob->GetBufferPointer(),
signatureBlob->GetBufferSize(),
IID_PPV_ARGS(&m_graphicsRootSignature));
return hr;
}
HRESULT YUV420PUploadBackend::CreateGraphicsPipelineState() {
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = m_graphicsRootSignature.Get();
psoDesc.VS.pShaderBytecode = m_vertexShaderBlob->GetBufferPointer();
psoDesc.VS.BytecodeLength = m_vertexShaderBlob->GetBufferSize();
psoDesc.PS.pShaderBytecode = m_pixelShaderBlob->GetBufferPointer();
psoDesc.PS.BytecodeLength = m_pixelShaderBlob->GetBufferSize();
psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
psoDesc.SampleMask = UINT_MAX;
psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
psoDesc.DepthStencilState.DepthEnable = FALSE;
psoDesc.DepthStencilState.StencilEnable = FALSE;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
psoDesc.NumRenderTargets = 1;
psoDesc.RTVFormats[0] = DXGI_FORMAT_B8G8R8A8_UNORM;
psoDesc.SampleDesc.Count = 1;
return m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_graphicsPipelineState));
}
HRESULT YUV420PUploadBackend::CreateGraphicsSrvHeap() {
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
heapDesc.NumDescriptors = 1;
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&m_graphicsSrvHeap));
if (FAILED(hr)) {
return hr;
}
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Texture2D.MipLevels = 1;
m_device->CreateShaderResourceView(
m_rgbTexture.Get(),
&srvDesc,
m_graphicsSrvHeap->GetCPUDescriptorHandleForHeapStart());
return S_OK;
}
HRESULT YUV420PUploadBackend::UpdateFrame(const VavCoreVideoFrame& frame) {
if (!m_initialized) {
return E_NOT_VALID_STATE;
}
// Create textures on first frame
if (!m_yTexture && frame.width > 0 && frame.height > 0) {
HRESULT hr = CreateVideoTexture(frame.width, frame.height);
if (FAILED(hr)) {
return hr;
}
}
// Copy frame data to ring buffer
auto& slot = m_ringBuffers[m_currentBufferIndex];
// Copy Y plane
for (uint32_t y = 0; y < frame.height; ++y) {
memcpy(
slot.yMappedData + y * slot.yRowPitch,
frame.y_plane + y * frame.y_stride,
frame.width);
}
// Copy U plane
uint32_t uvWidth = (frame.width + 1) / 2;
uint32_t uvHeight = (frame.height + 1) / 2;
for (uint32_t y = 0; y < uvHeight; ++y) {
memcpy(
slot.uMappedData + y * slot.uRowPitch,
frame.u_plane + y * frame.u_stride,
uvWidth);
}
// Copy V plane
for (uint32_t y = 0; y < uvHeight; ++y) {
memcpy(
slot.vMappedData + y * slot.vRowPitch,
frame.v_plane + y * frame.v_stride,
uvWidth);
}
return S_OK;
}
HRESULT YUV420PUploadBackend::RenderToBackBuffer(
const VavCoreVideoFrame& frame,
ID3D12Resource* backBuffer,
ID3D12GraphicsCommandList* commandList)
{
if (!m_initialized || !m_yTexture) {
return E_NOT_VALID_STATE;
}
// 1. Copy upload buffers to GPU textures
HRESULT hr = CopyUploadToGPU(commandList, m_currentBufferIndex);
if (FAILED(hr)) {
return hr;
}
// 2. Execute YUV→RGB conversion
hr = ExecuteYUVToRGBConversion(commandList);
if (FAILED(hr)) {
return hr;
}
// 3. Render RGB to back buffer
hr = RenderRGBToBackBuffer(commandList, backBuffer);
if (FAILED(hr)) {
return hr;
}
// Advance ring buffer
m_currentBufferIndex = (m_currentBufferIndex + 1) % BufferCount;
return S_OK;
}
HRESULT YUV420PUploadBackend::CopyUploadToGPU(ID3D12GraphicsCommandList* commandList, uint32_t bufferIndex) {
auto& slot = m_ringBuffers[bufferIndex];
// Copy Y plane
D3D12_PLACED_SUBRESOURCE_FOOTPRINT yFootprint = {};
yFootprint.Footprint.Format = DXGI_FORMAT_R8_UNORM;
yFootprint.Footprint.Width = m_videoWidth;
yFootprint.Footprint.Height = m_videoHeight;
yFootprint.Footprint.Depth = 1;
yFootprint.Footprint.RowPitch = slot.yRowPitch;
D3D12_TEXTURE_COPY_LOCATION ySrc = {};
ySrc.pResource = slot.yUploadBuffer.Get();
ySrc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
ySrc.PlacedFootprint = yFootprint;
D3D12_TEXTURE_COPY_LOCATION yDst = {};
yDst.pResource = m_yTexture.Get();
yDst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
yDst.SubresourceIndex = 0;
commandList->CopyTextureRegion(&yDst, 0, 0, 0, &ySrc, nullptr);
// Copy U plane
uint32_t uvWidth = (m_videoWidth + 1) / 2;
uint32_t uvHeight = (m_videoHeight + 1) / 2;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT uvFootprint = {};
uvFootprint.Footprint.Format = DXGI_FORMAT_R8_UNORM;
uvFootprint.Footprint.Width = uvWidth;
uvFootprint.Footprint.Height = uvHeight;
uvFootprint.Footprint.Depth = 1;
uvFootprint.Footprint.RowPitch = slot.uRowPitch;
D3D12_TEXTURE_COPY_LOCATION uSrc = {};
uSrc.pResource = slot.uUploadBuffer.Get();
uSrc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
uSrc.PlacedFootprint = uvFootprint;
D3D12_TEXTURE_COPY_LOCATION uDst = {};
uDst.pResource = m_uTexture.Get();
uDst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
uDst.SubresourceIndex = 0;
commandList->CopyTextureRegion(&uDst, 0, 0, 0, &uSrc, nullptr);
// Copy V plane
D3D12_TEXTURE_COPY_LOCATION vSrc = {};
vSrc.pResource = slot.vUploadBuffer.Get();
vSrc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
vSrc.PlacedFootprint = uvFootprint;
D3D12_TEXTURE_COPY_LOCATION vDst = {};
vDst.pResource = m_vTexture.Get();
vDst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
vDst.SubresourceIndex = 0;
commandList->CopyTextureRegion(&vDst, 0, 0, 0, &vSrc, nullptr);
// Transition to SRV
D3D12_RESOURCE_BARRIER barriers[3];
barriers[0] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_yTexture.Get(),
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
barriers[1] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_uTexture.Get(),
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
barriers[2] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_vTexture.Get(),
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
commandList->ResourceBarrier(3, barriers);
return S_OK;
}
HRESULT YUV420PUploadBackend::ExecuteYUVToRGBConversion(ID3D12GraphicsCommandList* commandList) {
commandList->SetComputeRootSignature(m_computeRootSignature.Get());
commandList->SetPipelineState(m_computePipelineState.Get());
ID3D12DescriptorHeap* heaps[] = { m_srvUavHeap.Get() };
commandList->SetDescriptorHeaps(1, heaps);
commandList->SetComputeRootDescriptorTable(0, m_srvUavHeap->GetGPUDescriptorHandleForHeapStart());
UINT descriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandle(m_srvUavHeap->GetGPUDescriptorHandleForHeapStart(), 3, descriptorSize);
commandList->SetComputeRootDescriptorTable(1, uavHandle);
uint32_t dispatchX = (m_videoWidth + 7) / 8;
uint32_t dispatchY = (m_videoHeight + 7) / 8;
commandList->Dispatch(dispatchX, dispatchY, 1);
// Transition RGB texture to SRV for graphics pipeline
D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_rgbTexture.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
commandList->ResourceBarrier(1, &barrier);
// Transition YUV back to COPY_DEST for next frame
D3D12_RESOURCE_BARRIER barriers[3];
barriers[0] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_yTexture.Get(),
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_STATE_COPY_DEST);
barriers[1] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_uTexture.Get(),
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_STATE_COPY_DEST);
barriers[2] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_vTexture.Get(),
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_STATE_COPY_DEST);
commandList->ResourceBarrier(3, barriers);
return S_OK;
}
HRESULT YUV420PUploadBackend::RenderRGBToBackBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* backBuffer) {
// Update constant buffer for AspectFit
UpdateConstantBuffer();
// Get back buffer description
D3D12_RESOURCE_DESC backBufferDesc = backBuffer->GetDesc();
// Transition back buffer to render target
D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::CreateTransition(
backBuffer,
D3D12_RESOURCE_STATE_PRESENT,
D3D12_RESOURCE_STATE_RENDER_TARGET);
commandList->ResourceBarrier(1, &barrier);
// Set render target (need RTV - this is simplified, real implementation needs RTV heap)
// commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
// Set graphics pipeline
commandList->SetGraphicsRootSignature(m_graphicsRootSignature.Get());
commandList->SetPipelineState(m_graphicsPipelineState.Get());
ID3D12DescriptorHeap* heaps[] = { m_graphicsSrvHeap.Get() };
commandList->SetDescriptorHeaps(1, heaps);
commandList->SetGraphicsRootDescriptorTable(0, m_graphicsSrvHeap->GetGPUDescriptorHandleForHeapStart());
commandList->SetGraphicsRootConstantBufferView(1, m_constantBuffer->GetGPUVirtualAddress());
D3D12_VIEWPORT viewport = {};
viewport.Width = static_cast<float>(backBufferDesc.Width);
viewport.Height = static_cast<float>(backBufferDesc.Height);
viewport.MaxDepth = 1.0f;
D3D12_RECT scissor = {};
scissor.right = static_cast<LONG>(backBufferDesc.Width);
scissor.bottom = static_cast<LONG>(backBufferDesc.Height);
commandList->RSSetViewports(1, &viewport);
commandList->RSSetScissorRects(1, &scissor);
commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
commandList->DrawInstanced(3, 1, 0, 0);
// Transition RGB back to UAV
D3D12_RESOURCE_BARRIER barriers[2];
barriers[0] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
m_rgbTexture.Get(),
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
barriers[1] = CD3DX12_RESOURCE_BARRIER::CreateTransition(
backBuffer,
D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_PRESENT);
commandList->ResourceBarrier(2, barriers);
return S_OK;
}
HRESULT YUV420PUploadBackend::UpdateConstantBuffer() {
if (!m_constantBuffer) {
return E_NOT_VALID_STATE;
}
AspectFitConstants constants = {};
constants.videoAspectRatio = static_cast<float>(m_videoWidth) / m_videoHeight;
constants.containerAspectRatio = static_cast<float>(m_width) / m_height;
// Calculate AspectFit scaling
if (constants.videoAspectRatio > constants.containerAspectRatio) {
// Video is wider - fit to width
constants.uvScaleX = 1.0f;
constants.uvScaleY = constants.containerAspectRatio / constants.videoAspectRatio;
constants.uvOffsetX = 0.0f;
constants.uvOffsetY = (1.0f - constants.uvScaleY) * 0.5f;
} else {
// Video is taller - fit to height
constants.uvScaleX = constants.videoAspectRatio / constants.containerAspectRatio;
constants.uvScaleY = 1.0f;
constants.uvOffsetX = (1.0f - constants.uvScaleX) * 0.5f;
constants.uvOffsetY = 0.0f;
}
void* mappedData = nullptr;
HRESULT hr = m_constantBuffer->Map(0, nullptr, &mappedData);
if (FAILED(hr)) {
return hr;
}
memcpy(mappedData, &constants, sizeof(AspectFitConstants));
m_constantBuffer->Unmap(0, nullptr);
return S_OK;
}
// Legacy public API implementation
uint8_t* YUV420PUploadBackend::GetYMappedBuffer(uint32_t bufferIndex) const {
if (bufferIndex >= m_ringBuffers.size()) {
return nullptr;
}
return m_ringBuffers[bufferIndex].yMappedData;
}
uint8_t* YUV420PUploadBackend::GetUMappedBuffer(uint32_t bufferIndex) const {
if (bufferIndex >= m_ringBuffers.size()) {
return nullptr;
}
return m_ringBuffers[bufferIndex].uMappedData;
}
uint8_t* YUV420PUploadBackend::GetVMappedBuffer(uint32_t bufferIndex) const {
if (bufferIndex >= m_ringBuffers.size()) {
return nullptr;
}
return m_ringBuffers[bufferIndex].vMappedData;
}
} // namespace Vav2Player

View File

@@ -0,0 +1,146 @@
#pragma once
#include "IVideoBackend.h"
#include <d3dcompiler.h>
#include <DirectXMath.h>
#include <vector>
namespace Vav2Player {
/**
* YUV420PUploadBackend - YUV420P format rendering using CPU upload buffers
*
* Format: 2 (Planar YUV 4:2:0)
* Method: CPU upload buffers + GPU compute shader
*
* Pipeline:
* 1. dav1d decodes AV1 → YUV420P (CPU memory)
* 2. CPU writes to persistent mapped upload buffers (ring buffer system)
* 3. GPU copies upload buffers → GPU textures (CopyTextureRegion)
* 4. GPU YUV→RGB compute shader converts to RGB
* 5. Graphics pipeline renders RGB to back buffer with AspectFit
*
* Key Features:
* - Persistent CPU mapped upload buffers (zero map/unmap overhead)
* - Ring buffer system for multi-frame buffering
* - GPU YUV→RGB conversion (no CPU color space conversion)
* - AspectFit rendering (maintains video aspect ratio)
*
* Source: Renamed from D3D12VideoRenderer (2581 lines, proven code)
*/
class YUV420PUploadBackend : public IVideoBackend {
public:
YUV420PUploadBackend();
~YUV420PUploadBackend() override;
// IVideoBackend interface
HRESULT Initialize(
ID3D12Device* device,
ID3D12CommandQueue* commandQueue,
uint32_t width,
uint32_t height) override;
void Shutdown() override;
bool IsInitialized() const override { return m_initialized; }
HRESULT CreateVideoTexture(uint32_t width, uint32_t height) override;
ID3D12Resource* GetVideoTexture() const override { return nullptr; } // No single texture
HRESULT RenderToBackBuffer(
const VavCoreVideoFrame& frame,
ID3D12Resource* backBuffer,
ID3D12GraphicsCommandList* commandList) override;
HRESULT UpdateFrame(const VavCoreVideoFrame& frame) override;
int GetSupportedFormatId() const override {
return 2;
}
// Legacy public API for backward compatibility
uint8_t* GetYMappedBuffer(uint32_t bufferIndex) const;
uint8_t* GetUMappedBuffer(uint32_t bufferIndex) const;
uint8_t* GetVMappedBuffer(uint32_t bufferIndex) const;
private:
// D3D12 core objects (not owned - passed from orchestrator)
ID3D12Device* m_device = nullptr;
ID3D12CommandQueue* m_commandQueue = nullptr;
// Ring buffer system for zero-copy CPU upload
static const UINT BufferCount = 3; // Triple buffering
struct RingBufferSlot {
ComPtr<ID3D12Resource> yUploadBuffer; // D3D12_HEAP_TYPE_UPLOAD
ComPtr<ID3D12Resource> uUploadBuffer;
ComPtr<ID3D12Resource> vUploadBuffer;
uint8_t* yMappedData = nullptr; // Persistent CPU mapping
uint8_t* uMappedData = nullptr;
uint8_t* vMappedData = nullptr;
uint32_t yRowPitch = 0;
uint32_t uRowPitch = 0;
uint32_t vRowPitch = 0;
};
std::vector<RingBufferSlot> m_ringBuffers;
uint32_t m_currentBufferIndex = 0;
// GPU textures (D3D12_HEAP_TYPE_DEFAULT)
ComPtr<ID3D12Resource> m_yTexture;
ComPtr<ID3D12Resource> m_uTexture;
ComPtr<ID3D12Resource> m_vTexture;
ComPtr<ID3D12Resource> m_rgbTexture; // Intermediate RGB texture
// YUV→RGB compute shader pipeline
ComPtr<ID3D12RootSignature> m_computeRootSignature;
ComPtr<ID3D12PipelineState> m_computePipelineState;
ComPtr<ID3D12DescriptorHeap> m_srvUavHeap;
ComPtr<ID3DBlob> m_computeShaderBlob;
// Graphics pipeline for rendering RGB to back buffer
ComPtr<ID3D12RootSignature> m_graphicsRootSignature;
ComPtr<ID3D12PipelineState> m_graphicsPipelineState;
ComPtr<ID3D12DescriptorHeap> m_graphicsSrvHeap;
ComPtr<ID3DBlob> m_vertexShaderBlob;
ComPtr<ID3DBlob> m_pixelShaderBlob;
// Constant buffer for AspectFit rendering
struct AspectFitConstants {
float videoAspectRatio;
float containerAspectRatio;
float uvScaleX;
float uvScaleY;
float uvOffsetX;
float uvOffsetY;
float padding[2];
};
ComPtr<ID3D12Resource> m_constantBuffer;
// State
bool m_initialized = false;
uint32_t m_width = 0; // Container width
uint32_t m_height = 0; // Container height
uint32_t m_videoWidth = 0;
uint32_t m_videoHeight = 0;
// Helper methods
HRESULT CreateRingBuffers(uint32_t videoWidth, uint32_t videoHeight);
HRESULT CreateGPUTextures(uint32_t videoWidth, uint32_t videoHeight);
HRESULT CreateComputeShaderResources();
HRESULT CreateGraphicsResources();
HRESULT CopyUploadToGPU(ID3D12GraphicsCommandList* commandList, uint32_t bufferIndex);
HRESULT ExecuteYUVToRGBConversion(ID3D12GraphicsCommandList* commandList);
HRESULT RenderRGBToBackBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* backBuffer);
// Shader compilation
HRESULT CompileComputeShader();
HRESULT CompileGraphicsShaders();
HRESULT CreateComputeRootSignature();
HRESULT CreateComputePipelineState();
HRESULT CreateGraphicsRootSignature();
HRESULT CreateGraphicsPipelineState();
HRESULT CreateSrvUavHeap();
HRESULT CreateGraphicsSrvHeap();
HRESULT UpdateConstantBuffer();
};
} // namespace Vav2Player

View File

@@ -1,9 +1,15 @@
#pragma once
// Minimal D3D12 helper classes
// Full version available at: https://github.com/Microsoft/DirectX-Graphics-Samples
// D3D12 helper classes
// Based on Microsoft DirectX-Graphics-Samples
// https://github.com/Microsoft/DirectX-Graphics-Samples
#include <d3d12.h>
#include <cstring>
// Helper struct for default values
struct CD3DX12_DEFAULT {};
static const CD3DX12_DEFAULT D3D12_DEFAULT = {};
struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER
{
@@ -59,4 +65,317 @@ struct CD3DX12_CPU_DESCRIPTOR_HANDLE : public D3D12_CPU_DESCRIPTOR_HANDLE
{
return ptr != other.ptr;
}
};
// CD3DX12_GPU_DESCRIPTOR_HANDLE
struct CD3DX12_GPU_DESCRIPTOR_HANDLE : public D3D12_GPU_DESCRIPTOR_HANDLE
{
CD3DX12_GPU_DESCRIPTOR_HANDLE() = default;
explicit CD3DX12_GPU_DESCRIPTOR_HANDLE(const D3D12_GPU_DESCRIPTOR_HANDLE& o) : D3D12_GPU_DESCRIPTOR_HANDLE(o) {}
CD3DX12_GPU_DESCRIPTOR_HANDLE(D3D12_GPU_DESCRIPTOR_HANDLE other, INT offsetScaledByIncrementSize)
{
ptr = other.ptr + offsetScaledByIncrementSize;
}
CD3DX12_GPU_DESCRIPTOR_HANDLE(D3D12_GPU_DESCRIPTOR_HANDLE other, INT offsetInDescriptors, UINT descriptorIncrementSize)
{
ptr = other.ptr + offsetInDescriptors * descriptorIncrementSize;
}
CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize)
{
ptr += offsetInDescriptors * descriptorIncrementSize;
return *this;
}
CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize)
{
ptr += offsetScaledByIncrementSize;
return *this;
}
bool operator==(const D3D12_GPU_DESCRIPTOR_HANDLE& other) const
{
return ptr == other.ptr;
}
bool operator!=(const D3D12_GPU_DESCRIPTOR_HANDLE& other) const
{
return ptr != other.ptr;
}
};
// CD3DX12_DESCRIPTOR_RANGE
struct CD3DX12_DESCRIPTOR_RANGE : public D3D12_DESCRIPTOR_RANGE
{
CD3DX12_DESCRIPTOR_RANGE() = default;
explicit CD3DX12_DESCRIPTOR_RANGE(const D3D12_DESCRIPTOR_RANGE& o) : D3D12_DESCRIPTOR_RANGE(o) {}
CD3DX12_DESCRIPTOR_RANGE(
D3D12_DESCRIPTOR_RANGE_TYPE rangeType,
UINT numDescriptors,
UINT baseShaderRegister,
UINT registerSpace = 0,
UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND)
{
Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart);
}
inline void Init(
D3D12_DESCRIPTOR_RANGE_TYPE rangeType,
UINT numDescriptors,
UINT baseShaderRegister,
UINT registerSpace = 0,
UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND)
{
RangeType = rangeType;
NumDescriptors = numDescriptors;
BaseShaderRegister = baseShaderRegister;
RegisterSpace = registerSpace;
OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart;
}
};
// CD3DX12_ROOT_PARAMETER
struct CD3DX12_ROOT_PARAMETER : public D3D12_ROOT_PARAMETER
{
CD3DX12_ROOT_PARAMETER() = default;
explicit CD3DX12_ROOT_PARAMETER(const D3D12_ROOT_PARAMETER& o) : D3D12_ROOT_PARAMETER(o) {}
static inline void InitAsDescriptorTable(
D3D12_ROOT_PARAMETER& rootParam,
UINT numDescriptorRanges,
const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges,
D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL)
{
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParam.ShaderVisibility = visibility;
rootParam.DescriptorTable.NumDescriptorRanges = numDescriptorRanges;
rootParam.DescriptorTable.pDescriptorRanges = pDescriptorRanges;
}
inline void InitAsDescriptorTable(
UINT numDescriptorRanges,
const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges,
D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL)
{
InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility);
}
static inline void InitAsConstants(
D3D12_ROOT_PARAMETER& rootParam,
UINT num32BitValues,
UINT shaderRegister,
UINT registerSpace = 0,
D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL)
{
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
rootParam.ShaderVisibility = visibility;
rootParam.Constants.Num32BitValues = num32BitValues;
rootParam.Constants.ShaderRegister = shaderRegister;
rootParam.Constants.RegisterSpace = registerSpace;
}
inline void InitAsConstants(
UINT num32BitValues,
UINT shaderRegister,
UINT registerSpace = 0,
D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL)
{
InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility);
}
static inline void InitAsConstantBufferView(
D3D12_ROOT_PARAMETER& rootParam,
UINT shaderRegister,
UINT registerSpace = 0,
D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL)
{
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
rootParam.ShaderVisibility = visibility;
rootParam.Descriptor.ShaderRegister = shaderRegister;
rootParam.Descriptor.RegisterSpace = registerSpace;
}
inline void InitAsConstantBufferView(
UINT shaderRegister,
UINT registerSpace = 0,
D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL)
{
InitAsConstantBufferView(*this, shaderRegister, registerSpace, visibility);
}
};
// CD3DX12_ROOT_SIGNATURE_DESC
struct CD3DX12_ROOT_SIGNATURE_DESC : public D3D12_ROOT_SIGNATURE_DESC
{
CD3DX12_ROOT_SIGNATURE_DESC() = default;
explicit CD3DX12_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC& o) : D3D12_ROOT_SIGNATURE_DESC(o) {}
CD3DX12_ROOT_SIGNATURE_DESC(
UINT numParameters,
const D3D12_ROOT_PARAMETER* _pParameters,
UINT numStaticSamplers = 0,
const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr,
D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE)
{
Init(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags);
}
inline void Init(
UINT numParameters,
const D3D12_ROOT_PARAMETER* _pParameters,
UINT numStaticSamplers = 0,
const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr,
D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE)
{
NumParameters = numParameters;
pParameters = _pParameters;
NumStaticSamplers = numStaticSamplers;
pStaticSamplers = _pStaticSamplers;
Flags = flags;
}
};
// CD3DX12_BLEND_DESC
struct CD3DX12_BLEND_DESC : public D3D12_BLEND_DESC
{
CD3DX12_BLEND_DESC() = default;
explicit CD3DX12_BLEND_DESC(const D3D12_BLEND_DESC& o) : D3D12_BLEND_DESC(o) {}
explicit CD3DX12_BLEND_DESC(CD3DX12_DEFAULT)
{
AlphaToCoverageEnable = FALSE;
IndependentBlendEnable = FALSE;
const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = {
FALSE, FALSE,
D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD,
D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD,
D3D12_LOGIC_OP_NOOP,
D3D12_COLOR_WRITE_ENABLE_ALL,
};
for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i)
RenderTarget[i] = defaultRenderTargetBlendDesc;
}
};
// CD3DX12_RASTERIZER_DESC
struct CD3DX12_RASTERIZER_DESC : public D3D12_RASTERIZER_DESC
{
CD3DX12_RASTERIZER_DESC() = default;
explicit CD3DX12_RASTERIZER_DESC(const D3D12_RASTERIZER_DESC& o) : D3D12_RASTERIZER_DESC(o) {}
explicit CD3DX12_RASTERIZER_DESC(CD3DX12_DEFAULT)
{
FillMode = D3D12_FILL_MODE_SOLID;
CullMode = D3D12_CULL_MODE_BACK;
FrontCounterClockwise = FALSE;
DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
DepthClipEnable = TRUE;
MultisampleEnable = FALSE;
AntialiasedLineEnable = FALSE;
ForcedSampleCount = 0;
ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
}
};
// CD3DX12_DEPTH_STENCIL_DESC
struct CD3DX12_DEPTH_STENCIL_DESC : public D3D12_DEPTH_STENCIL_DESC
{
CD3DX12_DEPTH_STENCIL_DESC() = default;
explicit CD3DX12_DEPTH_STENCIL_DESC(const D3D12_DEPTH_STENCIL_DESC& o) : D3D12_DEPTH_STENCIL_DESC(o) {}
explicit CD3DX12_DEPTH_STENCIL_DESC(CD3DX12_DEFAULT)
{
DepthEnable = TRUE;
DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
DepthFunc = D3D12_COMPARISON_FUNC_LESS;
StencilEnable = FALSE;
StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;
const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = {
D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS
};
FrontFace = defaultStencilOp;
BackFace = defaultStencilOp;
}
};
// CD3DX12_HEAP_PROPERTIES
struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES
{
CD3DX12_HEAP_PROPERTIES() = default;
explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES& o) : D3D12_HEAP_PROPERTIES(o) {}
CD3DX12_HEAP_PROPERTIES(
D3D12_CPU_PAGE_PROPERTY cpuPageProperty,
D3D12_MEMORY_POOL memoryPoolPreference,
UINT creationNodeMask = 1,
UINT nodeMask = 1)
{
Type = D3D12_HEAP_TYPE_CUSTOM;
CPUPageProperty = cpuPageProperty;
MemoryPoolPreference = memoryPoolPreference;
CreationNodeMask = creationNodeMask;
VisibleNodeMask = nodeMask;
}
explicit CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE type,
UINT creationNodeMask = 1,
UINT nodeMask = 1)
{
Type = type;
CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
CreationNodeMask = creationNodeMask;
VisibleNodeMask = nodeMask;
}
};
// CD3DX12_RESOURCE_DESC
struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC
{
CD3DX12_RESOURCE_DESC() = default;
explicit CD3DX12_RESOURCE_DESC(const D3D12_RESOURCE_DESC& o) : D3D12_RESOURCE_DESC(o) {}
CD3DX12_RESOURCE_DESC(
D3D12_RESOURCE_DIMENSION dimension,
UINT64 alignment,
UINT64 width,
UINT height,
UINT16 depthOrArraySize,
UINT16 mipLevels,
DXGI_FORMAT format,
UINT sampleCount,
UINT sampleQuality,
D3D12_TEXTURE_LAYOUT layout,
D3D12_RESOURCE_FLAGS flags)
{
Dimension = dimension;
Alignment = alignment;
Width = width;
Height = height;
DepthOrArraySize = depthOrArraySize;
MipLevels = mipLevels;
Format = format;
SampleDesc.Count = sampleCount;
SampleDesc.Quality = sampleQuality;
Layout = layout;
Flags = flags;
}
static inline CD3DX12_RESOURCE_DESC Buffer(
UINT64 width,
D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE,
UINT64 alignment = 0)
{
return CD3DX12_RESOURCE_DESC(D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags);
}
static inline CD3DX12_RESOURCE_DESC Tex2D(
DXGI_FORMAT format,
UINT64 width,
UINT height,
UINT16 arraySize = 1,
UINT16 mipLevels = 0,
UINT sampleCount = 1,
UINT sampleQuality = 0,
D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE,
D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
UINT64 alignment = 0)
{
return CD3DX12_RESOURCE_DESC(D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize,
mipLevels, format, sampleCount, sampleQuality, layout, flags);
}
};