Validate surface using shader code and write BMP file for each frame

This commit is contained in:
2025-10-06 03:13:02 +09:00
parent b4efc1be82
commit 1fa499013f
10 changed files with 691 additions and 216 deletions

View File

@@ -106,7 +106,9 @@
"Read(//c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/lib/x64/**)",
"Bash(./compile_kernel.bat)",
"Bash(\"./compile_validation_kernel.bat\")",
"Bash(./compile_validation_kernel.bat)"
"Bash(./compile_validation_kernel.bat)",
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")",
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")"
],
"deny": [],
"ask": []

View File

@@ -42,7 +42,7 @@
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\vavcore\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>VavCore-debug.lib;d3d12.lib;dxgi.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>VavCore-debug.lib;d3d12.lib;dxgi.lib;d3dcompiler.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>echo Copying VavCore DLL...
@@ -53,11 +53,11 @@ echo Done.</Command>
<ItemGroup>
<ClCompile Include="src\main.cpp" />
<ClCompile Include="src\D3D12Manager.cpp" />
<ClCompile Include="src\PixelVerifier.cpp" />
<ClCompile Include="src\D3D12PixelValidator.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\D3D12Manager.h" />
<ClInclude Include="src\PixelVerifier.h" />
<ClInclude Include="src\D3D12PixelValidator.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
</Project>

View File

@@ -0,0 +1,52 @@
// D3D12 Compute Shader for GPU-based stripe pattern validation
// No CPU readback needed - only error count is copied back
// Input RGBA texture (from NVDEC CUDA conversion)
Texture2D<float4> RGBATexture : register(t0);
// Output error count buffer
RWStructuredBuffer<uint> ErrorCount : register(u0);
// Validation parameters
cbuffer ValidationParams : register(b0)
{
uint Width; // Frame width
uint Height; // Frame height
uint StripeWidth; // Width of each color stripe
uint Padding; // Padding for alignment
};
[numthreads(16, 16, 1)]
void ValidateStripePattern(uint3 DTid : SV_DispatchThreadID)
{
// Bounds check
if (DTid.x >= Width || DTid.y >= Height)
return;
// Sample RGBA texture
float4 rgba = RGBATexture[DTid.xy];
float3 rgb = rgba.rgb;
// Calculate expected color based on 4-pixel stripe pattern (Red/Black alternating)
uint stripe_index = DTid.x / StripeWidth;
bool is_red_stripe = (stripe_index % 2 == 0);
bool is_valid = false;
if (is_red_stripe)
{
// Red stripe: R > 200/255 (0.78), G < 50/255 (0.2), B < 50/255 (0.2)
is_valid = (rgb.r > 0.78f && rgb.g < 0.2f && rgb.b < 0.2f);
}
else
{
// Black stripe: ALL < 50/255 (0.2)
is_valid = (rgb.r < 0.2f && rgb.g < 0.2f && rgb.b < 0.2f);
}
// Increment error count if pixel doesn't match expected pattern
if (!is_valid)
{
InterlockedAdd(ErrorCount[0], 1);
}
}

View File

@@ -177,6 +177,91 @@ void D3D12Manager::WaitForGPU()
}
}
void D3D12Manager::ExecuteCommandListAndWait()
{
// Close command list
m_command_list->Close();
// Execute command list
ID3D12CommandList* cmdLists[] = { m_command_list };
m_command_queue->ExecuteCommandLists(1, cmdLists);
// Wait for GPU to finish
WaitForGPU();
// Reset command list for next use
m_command_allocator->Reset();
m_command_list->Reset(m_command_allocator, nullptr);
}
bool D3D12Manager::SaveTextureToBMP(ID3D12Resource* texture, uint32_t width, uint32_t height, const char* filename)
{
if (!texture || !filename) {
printf("[D3D12Manager] Invalid parameters for SaveTextureToBMP\n");
return false;
}
// Readback texture to CPU
uint8_t* rgba_data = ReadbackTexture(texture, width, height);
if (!rgba_data) {
printf("[D3D12Manager] Failed to readback texture\n");
return false;
}
// BMP file header (14 bytes)
uint32_t file_size = 54 + (width * height * 4);
uint8_t bmp_header[54] = {
'B', 'M', // Signature
(uint8_t)(file_size), (uint8_t)(file_size >> 8), (uint8_t)(file_size >> 16), (uint8_t)(file_size >> 24), // File size
0, 0, 0, 0, // Reserved
54, 0, 0, 0, // Data offset
40, 0, 0, 0, // Info header size
(uint8_t)(width), (uint8_t)(width >> 8), (uint8_t)(width >> 16), (uint8_t)(width >> 24), // Width
(uint8_t)(height), (uint8_t)(height >> 8), (uint8_t)(height >> 16), (uint8_t)(height >> 24), // Height
1, 0, // Planes
32, 0, // Bits per pixel (32-bit RGBA)
0, 0, 0, 0, // Compression (none)
(uint8_t)(width * height * 4), (uint8_t)((width * height * 4) >> 8),
(uint8_t)((width * height * 4) >> 16), (uint8_t)((width * height * 4) >> 24), // Image size
0, 0, 0, 0, // X pixels per meter
0, 0, 0, 0, // Y pixels per meter
0, 0, 0, 0, // Colors used
0, 0, 0, 0 // Important colors
};
// Open file
FILE* file = nullptr;
fopen_s(&file, filename, "wb");
if (!file) {
printf("[D3D12Manager] Failed to open file: %s\n", filename);
delete[] rgba_data;
return false;
}
// Write BMP header
fwrite(bmp_header, 1, 54, file);
// Convert RGBA to BGRA for BMP and flip vertically
for (int y = height - 1; y >= 0; y--) {
for (uint32_t x = 0; x < width; x++) {
uint32_t src_idx = (y * width + x) * 4;
uint8_t bgra[4] = {
rgba_data[src_idx + 2], // B
rgba_data[src_idx + 1], // G
rgba_data[src_idx + 0], // R
rgba_data[src_idx + 3] // A
};
fwrite(bgra, 1, 4, file);
}
}
fclose(file);
delete[] rgba_data;
printf("[D3D12Manager] Saved texture to: %s\n", filename);
return true;
}
ID3D12Resource* D3D12Manager::CreateNV12Texture(uint32_t width, uint32_t height)
{
D3D12_RESOURCE_DESC desc = {};
@@ -252,13 +337,13 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width,
// Get texture description
D3D12_RESOURCE_DESC desc = texture->GetDesc();
// Calculate layout for both Y and UV planes
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layouts[2];
UINT num_rows[2] = { 0 };
UINT64 row_sizes[2] = { 0 };
// RGBA format has only 1 subresource
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
UINT num_rows = 0;
UINT64 row_size = 0;
UINT64 total_bytes = 0;
m_device->GetCopyableFootprints(&desc, 0, 2, 0, layouts, num_rows, row_sizes, &total_bytes);
m_device->GetCopyableFootprints(&desc, 0, 1, 0, &layout, &num_rows, &row_size, &total_bytes);
// Create readback buffer
D3D12_HEAP_PROPERTIES readback_heap_props = {};
@@ -301,31 +386,18 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width,
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
m_command_list->ResourceBarrier(1, &barrier);
// Copy Y plane (subresource 0)
D3D12_TEXTURE_COPY_LOCATION src_y = {};
src_y.pResource = texture;
src_y.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src_y.SubresourceIndex = 0;
// Copy RGBA texture (single subresource)
D3D12_TEXTURE_COPY_LOCATION src = {};
src.pResource = texture;
src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src.SubresourceIndex = 0;
D3D12_TEXTURE_COPY_LOCATION dst_y = {};
dst_y.pResource = readback_buffer;
dst_y.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_y.PlacedFootprint = layouts[0];
D3D12_TEXTURE_COPY_LOCATION dst = {};
dst.pResource = readback_buffer;
dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst.PlacedFootprint = layout;
m_command_list->CopyTextureRegion(&dst_y, 0, 0, 0, &src_y, nullptr);
// Copy UV plane (subresource 1)
D3D12_TEXTURE_COPY_LOCATION src_uv = {};
src_uv.pResource = texture;
src_uv.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src_uv.SubresourceIndex = 1;
D3D12_TEXTURE_COPY_LOCATION dst_uv = {};
dst_uv.pResource = readback_buffer;
dst_uv.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_uv.PlacedFootprint = layouts[1];
m_command_list->CopyTextureRegion(&dst_uv, 0, 0, 0, &src_uv, nullptr);
m_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr);
// Transition texture back to COMMON
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
@@ -350,28 +422,20 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width,
return nullptr;
}
// Allocate CPU buffer (NV12 format: Y plane + UV plane)
uint32_t y_size = width * height;
uint32_t uv_size = width * (height / 2);
uint8_t* cpu_buffer = new uint8_t[y_size + uv_size];
// Allocate CPU buffer for RGBA data
uint8_t* rgba_data = new uint8_t[width * height * 4];
// Copy Y plane
uint8_t* src_y_data = static_cast<uint8_t*>(mapped_data) + layouts[0].Offset;
uint8_t* dst_y_data = cpu_buffer;
for (UINT row = 0; row < height; ++row) {
memcpy(dst_y_data + row * width, src_y_data + row * layouts[0].Footprint.RowPitch, width);
}
// Copy UV plane
uint8_t* src_uv_data = static_cast<uint8_t*>(mapped_data) + layouts[1].Offset;
uint8_t* dst_uv_data = cpu_buffer + y_size;
for (UINT row = 0; row < height / 2; ++row) {
memcpy(dst_uv_data + row * width, src_uv_data + row * layouts[1].Footprint.RowPitch, width);
// Copy RGBA data (considering row pitch alignment)
uint8_t* src_ptr = static_cast<uint8_t*>(mapped_data);
for (uint32_t y = 0; y < height; y++) {
memcpy(rgba_data + y * width * 4,
src_ptr + y * layout.Footprint.RowPitch,
width * 4);
}
// Unmap and release readback buffer
readback_buffer->Unmap(0, nullptr);
readback_buffer->Release();
return cpu_buffer;
return rgba_data;
}

View File

@@ -26,6 +26,15 @@ public:
// Get D3D12 device
ID3D12Device* GetDevice() const { return m_device; }
// Get D3D12 command list (for GPU validation)
ID3D12GraphicsCommandList* GetCommandList() const { return m_command_list; }
// Execute command list and wait for completion
void ExecuteCommandListAndWait();
// Save RGBA texture to BMP file
bool SaveTextureToBMP(ID3D12Resource* texture, uint32_t width, uint32_t height, const char* filename);
private:
ID3D12Device* m_device;
ID3D12CommandQueue* m_command_queue;

View File

@@ -0,0 +1,386 @@
#include "D3D12PixelValidator.h"
#include "D3D12Manager.h"
#include <d3dcompiler.h>
#include <stdio.h>
#pragma comment(lib, "d3dcompiler.lib")
D3D12PixelValidator::D3D12PixelValidator()
{
}
D3D12PixelValidator::~D3D12PixelValidator()
{
Cleanup();
}
bool D3D12PixelValidator::Initialize(ID3D12Device* device)
{
if (!device) {
printf("[D3D12PixelValidator] Invalid device pointer\n");
return false;
}
m_device = device;
m_device->AddRef();
// Compile shader
if (!CompileShader(L"shaders\\StripeValidator.hlsl")) {
printf("[D3D12PixelValidator] Failed to compile shader\n");
return false;
}
// Create pipeline state
if (!CreatePipelineState()) {
printf("[D3D12PixelValidator] Failed to create pipeline state\n");
return false;
}
// Create buffers
if (!CreateBuffers()) {
printf("[D3D12PixelValidator] Failed to create buffers\n");
return false;
}
printf("[D3D12PixelValidator] Initialized successfully\n");
return true;
}
bool D3D12PixelValidator::CompileShader(const wchar_t* shader_path)
{
UINT compileFlags = 0;
#ifdef _DEBUG
compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#endif
ID3DBlob* errorBlob = nullptr;
HRESULT hr = D3DCompileFromFile(
shader_path,
nullptr,
D3D_COMPILE_STANDARD_FILE_INCLUDE,
"ValidateStripePattern",
"cs_5_0",
compileFlags,
0,
&m_shaderBlob,
&errorBlob
);
if (FAILED(hr)) {
if (errorBlob) {
printf("[D3D12PixelValidator] Shader compilation failed: %s\n",
(const char*)errorBlob->GetBufferPointer());
errorBlob->Release();
} else {
printf("[D3D12PixelValidator] Shader compilation failed: 0x%08X\n", hr);
}
return false;
}
if (errorBlob) {
errorBlob->Release();
}
return true;
}
bool D3D12PixelValidator::CreatePipelineState()
{
// Create root signature
D3D12_ROOT_PARAMETER rootParams[3] = {};
// t0: Input RGBA texture
D3D12_DESCRIPTOR_RANGE srvRange = {};
srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
srvRange.NumDescriptors = 1;
srvRange.BaseShaderRegister = 0;
srvRange.RegisterSpace = 0;
srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParams[0].DescriptorTable.NumDescriptorRanges = 1;
rootParams[0].DescriptorTable.pDescriptorRanges = &srvRange;
rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// u0: Output error count buffer
D3D12_DESCRIPTOR_RANGE uavRange = {};
uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
uavRange.NumDescriptors = 1;
uavRange.BaseShaderRegister = 0;
uavRange.RegisterSpace = 0;
uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
rootParams[1].DescriptorTable.pDescriptorRanges = &uavRange;
rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// b0: Validation parameters (inline constants)
rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
rootParams[2].Constants.ShaderRegister = 0;
rootParams[2].Constants.RegisterSpace = 0;
rootParams[2].Constants.Num32BitValues = 4; // Width, Height, StripeWidth, Padding
rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
rootSigDesc.NumParameters = 3;
rootSigDesc.pParameters = rootParams;
rootSigDesc.NumStaticSamplers = 0;
rootSigDesc.pStaticSamplers = nullptr;
rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
ID3DBlob* signatureBlob = nullptr;
ID3DBlob* errorBlob = nullptr;
HRESULT hr = D3D12SerializeRootSignature(
&rootSigDesc,
D3D_ROOT_SIGNATURE_VERSION_1,
&signatureBlob,
&errorBlob
);
if (FAILED(hr)) {
if (errorBlob) {
printf("[D3D12PixelValidator] Root signature serialization failed: %s\n",
(const char*)errorBlob->GetBufferPointer());
errorBlob->Release();
}
return false;
}
hr = m_device->CreateRootSignature(
0,
signatureBlob->GetBufferPointer(),
signatureBlob->GetBufferSize(),
IID_PPV_ARGS(&m_rootSignature)
);
signatureBlob->Release();
if (FAILED(hr)) {
printf("[D3D12PixelValidator] Failed to create root signature: 0x%08X\n", hr);
return false;
}
// Create compute pipeline state
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = m_rootSignature;
psoDesc.CS.pShaderBytecode = m_shaderBlob->GetBufferPointer();
psoDesc.CS.BytecodeLength = m_shaderBlob->GetBufferSize();
hr = m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState));
if (FAILED(hr)) {
printf("[D3D12PixelValidator] Failed to create pipeline state: 0x%08X\n", hr);
return false;
}
return true;
}
bool D3D12PixelValidator::CreateBuffers()
{
// Create GPU buffer for error count (UAV)
D3D12_HEAP_PROPERTIES heapProps = {};
heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
D3D12_RESOURCE_DESC bufferDesc = {};
bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
bufferDesc.Width = sizeof(uint32_t);
bufferDesc.Height = 1;
bufferDesc.DepthOrArraySize = 1;
bufferDesc.MipLevels = 1;
bufferDesc.Format = DXGI_FORMAT_UNKNOWN;
bufferDesc.SampleDesc.Count = 1;
bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
bufferDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
HRESULT hr = m_device->CreateCommittedResource(
&heapProps,
D3D12_HEAP_FLAG_NONE,
&bufferDesc,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
nullptr,
IID_PPV_ARGS(&m_errorCountBuffer)
);
if (FAILED(hr)) {
printf("[D3D12PixelValidator] Failed to create error count buffer: 0x%08X\n", hr);
return false;
}
// Create readback buffer (CPU accessible)
heapProps.Type = D3D12_HEAP_TYPE_READBACK;
bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
hr = m_device->CreateCommittedResource(
&heapProps,
D3D12_HEAP_FLAG_NONE,
&bufferDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_readbackBuffer)
);
if (FAILED(hr)) {
printf("[D3D12PixelValidator] Failed to create readback buffer: 0x%08X\n", hr);
return false;
}
return true;
}
bool D3D12PixelValidator::ValidateStripePattern(
D3D12Manager* d3dManager,
ID3D12Resource* rgbaTexture,
uint32_t width,
uint32_t height,
uint32_t stripe_width,
unsigned int* error_count_host)
{
if (!d3dManager || !rgbaTexture || !error_count_host) {
printf("[D3D12PixelValidator] Invalid parameters\n");
return false;
}
ID3D12GraphicsCommandList* commandList = d3dManager->GetCommandList();
// Create descriptor heap for SRV and UAV
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
heapDesc.NumDescriptors = 2; // 1 SRV (RGBA) + 1 UAV (error count)
heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
ID3D12DescriptorHeap* descriptorHeap = nullptr;
HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&descriptorHeap));
if (FAILED(hr)) {
printf("[D3D12PixelValidator] Failed to create descriptor heap: 0x%08X\n", hr);
return false;
}
UINT descriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = descriptorHeap->GetCPUDescriptorHandleForHeapStart();
D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = descriptorHeap->GetGPUDescriptorHandleForHeapStart();
// Create SRV for RGBA texture
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Texture2D.MipLevels = 1;
m_device->CreateShaderResourceView(rgbaTexture, &srvDesc, cpuHandle);
// Create UAV for error count buffer
cpuHandle.ptr += descriptorSize;
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.NumElements = 1;
uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
m_device->CreateUnorderedAccessView(m_errorCountBuffer, nullptr, &uavDesc, cpuHandle);
// Clear error count to 0
UINT clearValues[4] = {0, 0, 0, 0};
D3D12_GPU_DESCRIPTOR_HANDLE uavGpuHandle = gpuHandle;
uavGpuHandle.ptr += descriptorSize * 1; // Skip 1 SRV
commandList->ClearUnorderedAccessViewUint(
uavGpuHandle,
cpuHandle,
m_errorCountBuffer,
clearValues,
0,
nullptr
);
// Set pipeline state and root signature
ID3D12DescriptorHeap* heaps[] = {descriptorHeap};
commandList->SetDescriptorHeaps(1, heaps);
commandList->SetComputeRootSignature(m_rootSignature);
commandList->SetPipelineState(m_pipelineState);
// Bind descriptor tables
D3D12_GPU_DESCRIPTOR_HANDLE srvTableHandle = gpuHandle;
D3D12_GPU_DESCRIPTOR_HANDLE uavTableHandle = gpuHandle;
uavTableHandle.ptr += descriptorSize * 1;
commandList->SetComputeRootDescriptorTable(0, srvTableHandle); // SRV (RGBA)
commandList->SetComputeRootDescriptorTable(1, uavTableHandle); // UAV (error count)
// Set validation parameters as root constants
UINT params[4] = {width, height, stripe_width, 0};
commandList->SetComputeRoot32BitConstants(2, 4, params, 0);
// Dispatch compute shader (16x16 thread groups)
UINT dispatchX = (width + 15) / 16;
UINT dispatchY = (height + 15) / 16;
commandList->Dispatch(dispatchX, dispatchY, 1);
// Barrier: Wait for compute shader to finish
D3D12_RESOURCE_BARRIER barrier = {};
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barrier.UAV.pResource = m_errorCountBuffer;
commandList->ResourceBarrier(1, &barrier);
// Copy error count from GPU buffer to readback buffer
commandList->CopyResource(m_readbackBuffer, m_errorCountBuffer);
// Execute command list and wait for GPU to finish
d3dManager->ExecuteCommandListAndWait();
// Map readback buffer and copy to host
void* mappedData = nullptr;
hr = m_readbackBuffer->Map(0, nullptr, &mappedData);
if (FAILED(hr)) {
printf("[D3D12PixelValidator] Failed to map readback buffer: 0x%08X\n", hr);
descriptorHeap->Release();
return false;
}
*error_count_host = *reinterpret_cast<uint32_t*>(mappedData);
m_readbackBuffer->Unmap(0, nullptr);
// Cleanup descriptor heap
descriptorHeap->Release();
printf("[D3D12PixelValidator] Validation complete: %u pixel errors\n", *error_count_host);
return true;
}
void D3D12PixelValidator::Cleanup()
{
if (m_readbackBuffer) {
m_readbackBuffer->Release();
m_readbackBuffer = nullptr;
}
if (m_errorCountBuffer) {
m_errorCountBuffer->Release();
m_errorCountBuffer = nullptr;
}
if (m_pipelineState) {
m_pipelineState->Release();
m_pipelineState = nullptr;
}
if (m_rootSignature) {
m_rootSignature->Release();
m_rootSignature = nullptr;
}
if (m_shaderBlob) {
m_shaderBlob->Release();
m_shaderBlob = nullptr;
}
if (m_device) {
m_device->Release();
m_device = nullptr;
}
}

View File

@@ -0,0 +1,68 @@
#pragma once
#include <d3d12.h>
#include <cstdint>
// Forward declaration
class D3D12Manager;
// D3D12 Compute Shader-based pixel validator
// GPU-only validation without CPU readback
class D3D12PixelValidator {
public:
D3D12PixelValidator();
~D3D12PixelValidator();
// Prevent copying
D3D12PixelValidator(const D3D12PixelValidator&) = delete;
D3D12PixelValidator& operator=(const D3D12PixelValidator&) = delete;
// Initialize validator with D3D12 device
bool Initialize(ID3D12Device* device);
// Validate stripe pattern on GPU (RGBA texture input)
// Returns true if validation executed successfully
// error_count_host: Output - number of mismatched pixels
bool ValidateStripePattern(
D3D12Manager* d3dManager,
ID3D12Resource* rgbaTexture,
uint32_t width,
uint32_t height,
uint32_t stripe_width,
unsigned int* error_count_host
);
// Cleanup resources
void Cleanup();
private:
// Compile HLSL shader to DXIL bytecode
bool CompileShader(const wchar_t* shader_path);
// Create compute pipeline state
bool CreatePipelineState();
// Create GPU buffers
bool CreateBuffers();
ID3D12Device* m_device = nullptr;
ID3D12RootSignature* m_rootSignature = nullptr;
ID3D12PipelineState* m_pipelineState = nullptr;
// GPU buffer for error count (single uint32_t)
ID3D12Resource* m_errorCountBuffer = nullptr;
// Readback buffer for error count (CPU accessible)
ID3D12Resource* m_readbackBuffer = nullptr;
// Compiled shader bytecode
ID3DBlob* m_shaderBlob = nullptr;
// Constants
struct ValidationParams {
uint32_t Width;
uint32_t Height;
uint32_t StripeWidth;
uint32_t Padding;
};
};

View File

@@ -1,103 +0,0 @@
#include "PixelVerifier.h"
#include <stdio.h>
#include <algorithm>
PixelVerifier::PixelVerifier()
{
}
PixelVerifier::~PixelVerifier()
{
}
RGB PixelVerifier::ConvertNV12ToRGB(uint8_t y, uint8_t u, uint8_t v)
{
// YUV to RGB conversion (ITU-R BT.709)
// R = Y + 1.5748 * (V - 128)
// G = Y - 0.1873 * (U - 128) - 0.4681 * (V - 128)
// B = Y + 1.8556 * (U - 128)
int c = y - 16;
int d = u - 128;
int e = v - 128;
int r = (298 * c + 409 * e + 128) >> 8;
int g = (298 * c - 100 * d - 208 * e + 128) >> 8;
int b = (298 * c + 516 * d + 128) >> 8;
RGB result;
result.r = static_cast<uint8_t>(std::clamp(r, 0, 255));
result.g = static_cast<uint8_t>(std::clamp(g, 0, 255));
result.b = static_cast<uint8_t>(std::clamp(b, 0, 255));
return result;
}
bool PixelVerifier::IsBlackPixel(const RGB& pixel, int tolerance)
{
// Black: R=0, G=0, B=0 (with tolerance)
return (pixel.r <= tolerance && pixel.g <= tolerance && pixel.b <= tolerance);
}
bool PixelVerifier::IsRedPixel(const RGB& pixel, int tolerance)
{
// Red: R=255, G=0, B=0 (with tolerance)
return (pixel.r >= (255 - tolerance) && pixel.g <= tolerance && pixel.b <= tolerance);
}
bool PixelVerifier::VerifyStripePattern(const uint8_t* nv12_data, uint32_t width, uint32_t height)
{
// NV12 format:
// - Y plane: width * height bytes
// - UV plane: width * (height/2) bytes (interleaved U and V)
const uint8_t* y_plane = nv12_data;
const uint8_t* uv_plane = nv12_data + (width * height);
// Check first row (representative sample)
int error_count = 0;
const int max_errors_to_print = 5;
for (uint32_t x = 0; x < width; ++x) {
// Calculate stripe index (8 pixels per stripe)
int stripe_index = x / 8;
bool should_be_red = (stripe_index % 2) == 1;
// Get Y value for this pixel
uint8_t y = y_plane[x];
// Get U and V values (UV is subsampled 2x horizontally)
uint32_t uv_x = (x / 2) * 2;
uint8_t u = uv_plane[uv_x];
uint8_t v = uv_plane[uv_x + 1];
// Convert to RGB
RGB pixel = ConvertNV12ToRGB(y, u, v);
// Verify pixel color
if (should_be_red) {
if (!IsRedPixel(pixel)) {
if (error_count < max_errors_to_print) {
printf(" FAIL at X=%d: Expected red, got RGB(%d,%d,%d)\n",
x, pixel.r, pixel.g, pixel.b);
}
error_count++;
}
}
else {
if (!IsBlackPixel(pixel)) {
if (error_count < max_errors_to_print) {
printf(" FAIL at X=%d: Expected black, got RGB(%d,%d,%d)\n",
x, pixel.r, pixel.g, pixel.b);
}
error_count++;
}
}
}
if (error_count > max_errors_to_print) {
printf(" ... and %d more errors\n", error_count - max_errors_to_print);
}
return (error_count == 0);
}

View File

@@ -1,29 +0,0 @@
#pragma once
#include <stdint.h>
struct RGB
{
uint8_t r;
uint8_t g;
uint8_t b;
};
class PixelVerifier
{
public:
PixelVerifier();
~PixelVerifier();
// Verify 8-pixel stripe pattern (black-red-black-red...)
// Returns true if all pixels match expected pattern
bool VerifyStripePattern(const uint8_t* nv12_data, uint32_t width, uint32_t height);
private:
// Convert NV12 to RGB (single pixel)
RGB ConvertNV12ToRGB(uint8_t y, uint8_t u, uint8_t v);
// Check if pixel is within tolerance of expected color
bool IsBlackPixel(const RGB& pixel, int tolerance = 50);
bool IsRedPixel(const RGB& pixel, int tolerance = 50);
};

View File

@@ -4,14 +4,13 @@
#include <vector>
#include <VavCore/VavCore.h>
#include "D3D12Manager.h"
#include "PixelVerifier.h"
#include "D3D12PixelValidator.h"
struct FrameTask {
int frame_index;
ID3D12Resource* texture;
VavCoreVideoFrame frame;
VavCoreResult result;
uint8_t* cpu_buffer;
};
int main(int argc, char* argv[])
@@ -112,11 +111,21 @@ int main(int argc, char* argv[])
printf("[RedSurfaceNVDECTest] Decoder: NVDEC (explicit)\n");
printf("[RedSurfaceNVDECTest] Surface: D3D12\n");
// Step 7: Create PixelVerifier
PixelVerifier verifier;
// Step 7: Create D3D12PixelValidator
printf("\n[Step 7] Creating D3D12 GPU validator...\n");
D3D12PixelValidator validator;
if (!validator.Initialize(d3d12.GetDevice())) {
printf("[ERROR] Failed to initialize D3D12PixelValidator\n");
vavcore_close_file(player);
vavcore_destroy_player(player);
d3d12.Cleanup();
vavcore_cleanup();
return 1;
}
printf("[OK] D3D12PixelValidator initialized\n");
// Step 8: Continuous frame decoding (pipeline priming approach)
printf("\n[Step 7] Decoding and verifying frames (continuous decode calls)...\n\n");
printf("\n[Step 8] Decoding and verifying frames (GPU validation)...\n\n");
printf("[INFO] NVDEC pipeline priming: Calling decode continuously\n");
printf("[INFO] First 2 frames will be skipped for verification (priming frames)\n\n");
@@ -124,14 +133,14 @@ int main(int argc, char* argv[])
std::vector<FrameTask> frame_tasks(MAX_FRAMES);
int decode_errors = 0;
const uint32_t STRIPE_WIDTH = 4; // 4-pixel wide stripes (Red/Black alternating)
// Continuous decode loop - keep calling DecodeToSurface
// NVDEC will pipeline internally and return frames in order
// Process frames one-by-one to avoid memory pressure
for (int i = 0; i < MAX_FRAMES && !vavcore_is_end_of_file(player); i++) {
FrameTask& task = frame_tasks[i];
task.frame_index = i;
// Create RGBA texture for CUDA interop
// Create RGBA texture for NVDEC output (CUDA NV12ToRGBA conversion)
task.texture = d3d12.CreateRGBATexture(metadata.width, metadata.height);
if (!task.texture) {
printf("[ERROR] Failed to create texture for frame %d\n", i);
@@ -140,8 +149,7 @@ int main(int argc, char* argv[])
continue;
}
// Decode frame to D3D12 surface
// This will block and wait (pending) until the frame is ready
// Decode frame to D3D12 surface (RGBA format after CUDA conversion)
task.result = vavcore_decode_to_surface(
player,
VAVCORE_SURFACE_D3D12_RESOURCE,
@@ -157,22 +165,32 @@ int main(int argc, char* argv[])
continue;
}
// Readback D3D12 texture to CPU memory
task.cpu_buffer = d3d12.ReadbackTexture(task.texture, metadata.width, metadata.height);
if (!task.cpu_buffer) {
printf("Frame %3d: Readback failed\n", i);
task.texture->Release();
task.texture = nullptr;
decode_errors++;
continue;
}
printf("Frame %3d: Decoded successfully (RGBA)\n", i);
printf("Frame %3d: Decoded successfully\n", i);
// Save first 5 frames as BMP for inspection (one at a time to reduce memory pressure)
if (i < 5) {
char filename[256];
sprintf_s(filename, sizeof(filename), "frame_%03d.bmp", i);
bool saved = d3d12.SaveTextureToBMP(task.texture, metadata.width, metadata.height, filename);
// If save failed, try waiting a bit and retry once
if (!saved) {
printf("Frame %3d: BMP save failed, waiting 500ms and retrying...\n", i);
Sleep(500);
saved = d3d12.SaveTextureToBMP(task.texture, metadata.width, metadata.height, filename);
if (!saved) {
printf("Frame %3d: BMP save failed after retry\n", i);
}
}
// Wait a bit before next frame to avoid memory pressure
Sleep(100);
}
}
printf("\n[Step 8] Verifying decoded frames...\n\n");
printf("\n[Step 9] Verifying decoded frames (GPU compute shader)...\n\n");
// Verify frames in order
// Verify frames in order using GPU compute shader
int passed_frames = 0;
int failed_frames = 0;
int total_verified = 0;
@@ -181,31 +199,39 @@ int main(int argc, char* argv[])
FrameTask& task = frame_tasks[i];
// Skip if decode failed
if (task.result != VAVCORE_SUCCESS || !task.cpu_buffer) {
if (task.result != VAVCORE_SUCCESS || !task.texture) {
continue;
}
// Skip first 2 frames (NVDEC priming)
if (i < 2) {
printf("Frame %3d: PRIMING (skipped verification)\n", i);
delete[] task.cpu_buffer;
task.cpu_buffer = nullptr;
continue;
}
// Verify stripe pattern
bool passed = verifier.VerifyStripePattern(task.cpu_buffer, metadata.width, metadata.height);
if (passed) {
printf("Frame %3d: PASS\n", i);
// GPU validation (only 4 bytes copied to CPU!)
unsigned int error_count = 0;
bool validation_success = validator.ValidateStripePattern(
&d3d12,
task.texture,
metadata.width,
metadata.height,
STRIPE_WIDTH,
&error_count
);
if (!validation_success) {
printf("Frame %3d: GPU validation failed\n", i);
failed_frames++;
} else if (error_count == 0) {
printf("Frame %3d: PASS (0 pixel errors)\n", i);
passed_frames++;
} else {
printf("Frame %3d: FAIL\n", i);
printf("Frame %3d: FAIL (%u pixel errors)\n", i, error_count);
failed_frames++;
}
total_verified++;
delete[] task.cpu_buffer;
task.cpu_buffer = nullptr;
}
// Cleanup textures
@@ -213,11 +239,11 @@ int main(int argc, char* argv[])
if (task.texture) {
task.texture->Release();
}
if (task.cpu_buffer) {
delete[] task.cpu_buffer;
}
}
// Cleanup validator
validator.Cleanup();
// Step 9: Print results
printf("\n[RedSurfaceNVDECTest] Results:\n");
printf(" Total frames decoded: %d\n", MAX_FRAMES - decode_errors);