diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index f44c25a..9c659a1 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -106,7 +106,9 @@
"Read(//c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/lib/x64/**)",
"Bash(./compile_kernel.bat)",
"Bash(\"./compile_validation_kernel.bat\")",
- "Bash(./compile_validation_kernel.bat)"
+ "Bash(./compile_validation_kernel.bat)",
+ "Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")",
+ "Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")"
],
"deny": [],
"ask": []
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj b/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj
index b7ce7db..8b8ab80 100644
--- a/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj
@@ -42,7 +42,7 @@
Console
true
$(ProjectDir)..\..\vavcore\lib
- VavCore-debug.lib;d3d12.lib;dxgi.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)
+ VavCore-debug.lib;d3d12.lib;dxgi.lib;d3dcompiler.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)
echo Copying VavCore DLL...
@@ -53,11 +53,11 @@ echo Done.
-
+
-
+
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/shaders/StripeValidator.hlsl b/vav2/platforms/windows/tests/red-surface-nvdec/shaders/StripeValidator.hlsl
new file mode 100644
index 0000000..7e2c0f8
--- /dev/null
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/shaders/StripeValidator.hlsl
@@ -0,0 +1,52 @@
+// D3D12 Compute Shader for GPU-based stripe pattern validation
+// No CPU readback needed - only error count is copied back
+
+// Input RGBA texture (from NVDEC CUDA conversion)
+Texture2D RGBATexture : register(t0);
+
+// Output error count buffer
+RWStructuredBuffer ErrorCount : register(u0);
+
+// Validation parameters
+cbuffer ValidationParams : register(b0)
+{
+ uint Width; // Frame width
+ uint Height; // Frame height
+ uint StripeWidth; // Width of each color stripe
+ uint Padding; // Padding for alignment
+};
+
+[numthreads(16, 16, 1)]
+void ValidateStripePattern(uint3 DTid : SV_DispatchThreadID)
+{
+ // Bounds check
+ if (DTid.x >= Width || DTid.y >= Height)
+ return;
+
+ // Sample RGBA texture
+ float4 rgba = RGBATexture[DTid.xy];
+ float3 rgb = rgba.rgb;
+
+ // Calculate expected color based on 4-pixel stripe pattern (Red/Black alternating)
+ uint stripe_index = DTid.x / StripeWidth;
+ bool is_red_stripe = (stripe_index % 2 == 0);
+
+ bool is_valid = false;
+
+ if (is_red_stripe)
+ {
+ // Red stripe: R > 200/255 (0.78), G < 50/255 (0.2), B < 50/255 (0.2)
+ is_valid = (rgb.r > 0.78f && rgb.g < 0.2f && rgb.b < 0.2f);
+ }
+ else
+ {
+ // Black stripe: ALL < 50/255 (0.2)
+ is_valid = (rgb.r < 0.2f && rgb.g < 0.2f && rgb.b < 0.2f);
+ }
+
+ // Increment error count if pixel doesn't match expected pattern
+ if (!is_valid)
+ {
+ InterlockedAdd(ErrorCount[0], 1);
+ }
+}
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp
index 8ec7362..cfaf10e 100644
--- a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp
@@ -177,6 +177,91 @@ void D3D12Manager::WaitForGPU()
}
}
+void D3D12Manager::ExecuteCommandListAndWait()
+{
+ // Close command list
+ m_command_list->Close();
+
+ // Execute command list
+ ID3D12CommandList* cmdLists[] = { m_command_list };
+ m_command_queue->ExecuteCommandLists(1, cmdLists);
+
+ // Wait for GPU to finish
+ WaitForGPU();
+
+ // Reset command list for next use
+ m_command_allocator->Reset();
+ m_command_list->Reset(m_command_allocator, nullptr);
+}
+
+bool D3D12Manager::SaveTextureToBMP(ID3D12Resource* texture, uint32_t width, uint32_t height, const char* filename)
+{
+ if (!texture || !filename) {
+ printf("[D3D12Manager] Invalid parameters for SaveTextureToBMP\n");
+ return false;
+ }
+
+ // Readback texture to CPU
+ uint8_t* rgba_data = ReadbackTexture(texture, width, height);
+ if (!rgba_data) {
+ printf("[D3D12Manager] Failed to readback texture\n");
+ return false;
+ }
+
+ // BMP file header (14 bytes)
+ uint32_t file_size = 54 + (width * height * 4);
+ uint8_t bmp_header[54] = {
+ 'B', 'M', // Signature
+ (uint8_t)(file_size), (uint8_t)(file_size >> 8), (uint8_t)(file_size >> 16), (uint8_t)(file_size >> 24), // File size
+ 0, 0, 0, 0, // Reserved
+ 54, 0, 0, 0, // Data offset
+ 40, 0, 0, 0, // Info header size
+ (uint8_t)(width), (uint8_t)(width >> 8), (uint8_t)(width >> 16), (uint8_t)(width >> 24), // Width
+ (uint8_t)(height), (uint8_t)(height >> 8), (uint8_t)(height >> 16), (uint8_t)(height >> 24), // Height
+ 1, 0, // Planes
+ 32, 0, // Bits per pixel (32-bit RGBA)
+ 0, 0, 0, 0, // Compression (none)
+ (uint8_t)(width * height * 4), (uint8_t)((width * height * 4) >> 8),
+ (uint8_t)((width * height * 4) >> 16), (uint8_t)((width * height * 4) >> 24), // Image size
+ 0, 0, 0, 0, // X pixels per meter
+ 0, 0, 0, 0, // Y pixels per meter
+ 0, 0, 0, 0, // Colors used
+ 0, 0, 0, 0 // Important colors
+ };
+
+ // Open file
+ FILE* file = nullptr;
+ fopen_s(&file, filename, "wb");
+ if (!file) {
+ printf("[D3D12Manager] Failed to open file: %s\n", filename);
+ delete[] rgba_data;
+ return false;
+ }
+
+ // Write BMP header
+ fwrite(bmp_header, 1, 54, file);
+
+ // Convert RGBA to BGRA for BMP and flip vertically
+ for (int y = height - 1; y >= 0; y--) {
+ for (uint32_t x = 0; x < width; x++) {
+ uint32_t src_idx = (y * width + x) * 4;
+ uint8_t bgra[4] = {
+ rgba_data[src_idx + 2], // B
+ rgba_data[src_idx + 1], // G
+ rgba_data[src_idx + 0], // R
+ rgba_data[src_idx + 3] // A
+ };
+ fwrite(bgra, 1, 4, file);
+ }
+ }
+
+ fclose(file);
+ delete[] rgba_data;
+
+ printf("[D3D12Manager] Saved texture to: %s\n", filename);
+ return true;
+}
+
ID3D12Resource* D3D12Manager::CreateNV12Texture(uint32_t width, uint32_t height)
{
D3D12_RESOURCE_DESC desc = {};
@@ -252,13 +337,13 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width,
// Get texture description
D3D12_RESOURCE_DESC desc = texture->GetDesc();
- // Calculate layout for both Y and UV planes
- D3D12_PLACED_SUBRESOURCE_FOOTPRINT layouts[2];
- UINT num_rows[2] = { 0 };
- UINT64 row_sizes[2] = { 0 };
+ // RGBA format has only 1 subresource
+ D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
+ UINT num_rows = 0;
+ UINT64 row_size = 0;
UINT64 total_bytes = 0;
- m_device->GetCopyableFootprints(&desc, 0, 2, 0, layouts, num_rows, row_sizes, &total_bytes);
+ m_device->GetCopyableFootprints(&desc, 0, 1, 0, &layout, &num_rows, &row_size, &total_bytes);
// Create readback buffer
D3D12_HEAP_PROPERTIES readback_heap_props = {};
@@ -301,31 +386,18 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width,
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
m_command_list->ResourceBarrier(1, &barrier);
- // Copy Y plane (subresource 0)
- D3D12_TEXTURE_COPY_LOCATION src_y = {};
- src_y.pResource = texture;
- src_y.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
- src_y.SubresourceIndex = 0;
+ // Copy RGBA texture (single subresource)
+ D3D12_TEXTURE_COPY_LOCATION src = {};
+ src.pResource = texture;
+ src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+ src.SubresourceIndex = 0;
- D3D12_TEXTURE_COPY_LOCATION dst_y = {};
- dst_y.pResource = readback_buffer;
- dst_y.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
- dst_y.PlacedFootprint = layouts[0];
+ D3D12_TEXTURE_COPY_LOCATION dst = {};
+ dst.pResource = readback_buffer;
+ dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+ dst.PlacedFootprint = layout;
- m_command_list->CopyTextureRegion(&dst_y, 0, 0, 0, &src_y, nullptr);
-
- // Copy UV plane (subresource 1)
- D3D12_TEXTURE_COPY_LOCATION src_uv = {};
- src_uv.pResource = texture;
- src_uv.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
- src_uv.SubresourceIndex = 1;
-
- D3D12_TEXTURE_COPY_LOCATION dst_uv = {};
- dst_uv.pResource = readback_buffer;
- dst_uv.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
- dst_uv.PlacedFootprint = layouts[1];
-
- m_command_list->CopyTextureRegion(&dst_uv, 0, 0, 0, &src_uv, nullptr);
+ m_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr);
// Transition texture back to COMMON
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
@@ -350,28 +422,20 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width,
return nullptr;
}
- // Allocate CPU buffer (NV12 format: Y plane + UV plane)
- uint32_t y_size = width * height;
- uint32_t uv_size = width * (height / 2);
- uint8_t* cpu_buffer = new uint8_t[y_size + uv_size];
+ // Allocate CPU buffer for RGBA data
+ uint8_t* rgba_data = new uint8_t[width * height * 4];
- // Copy Y plane
- uint8_t* src_y_data = static_cast(mapped_data) + layouts[0].Offset;
- uint8_t* dst_y_data = cpu_buffer;
- for (UINT row = 0; row < height; ++row) {
- memcpy(dst_y_data + row * width, src_y_data + row * layouts[0].Footprint.RowPitch, width);
- }
-
- // Copy UV plane
- uint8_t* src_uv_data = static_cast(mapped_data) + layouts[1].Offset;
- uint8_t* dst_uv_data = cpu_buffer + y_size;
- for (UINT row = 0; row < height / 2; ++row) {
- memcpy(dst_uv_data + row * width, src_uv_data + row * layouts[1].Footprint.RowPitch, width);
+ // Copy RGBA data (considering row pitch alignment)
+ uint8_t* src_ptr = static_cast(mapped_data);
+ for (uint32_t y = 0; y < height; y++) {
+ memcpy(rgba_data + y * width * 4,
+ src_ptr + y * layout.Footprint.RowPitch,
+ width * 4);
}
// Unmap and release readback buffer
readback_buffer->Unmap(0, nullptr);
readback_buffer->Release();
- return cpu_buffer;
+ return rgba_data;
}
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h
index 54eb541..041ed5b 100644
--- a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h
@@ -26,6 +26,15 @@ public:
// Get D3D12 device
ID3D12Device* GetDevice() const { return m_device; }
+ // Get D3D12 command list (for GPU validation)
+ ID3D12GraphicsCommandList* GetCommandList() const { return m_command_list; }
+
+ // Execute command list and wait for completion
+ void ExecuteCommandListAndWait();
+
+ // Save RGBA texture to BMP file
+ bool SaveTextureToBMP(ID3D12Resource* texture, uint32_t width, uint32_t height, const char* filename);
+
private:
ID3D12Device* m_device;
ID3D12CommandQueue* m_command_queue;
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.cpp
new file mode 100644
index 0000000..f556ec9
--- /dev/null
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.cpp
@@ -0,0 +1,386 @@
+#include "D3D12PixelValidator.h"
+#include "D3D12Manager.h"
+#include
+#include
+
+#pragma comment(lib, "d3dcompiler.lib")
+
+D3D12PixelValidator::D3D12PixelValidator()
+{
+}
+
+D3D12PixelValidator::~D3D12PixelValidator()
+{
+ Cleanup();
+}
+
+bool D3D12PixelValidator::Initialize(ID3D12Device* device)
+{
+ if (!device) {
+ printf("[D3D12PixelValidator] Invalid device pointer\n");
+ return false;
+ }
+
+ m_device = device;
+ m_device->AddRef();
+
+ // Compile shader
+ if (!CompileShader(L"shaders\\StripeValidator.hlsl")) {
+ printf("[D3D12PixelValidator] Failed to compile shader\n");
+ return false;
+ }
+
+ // Create pipeline state
+ if (!CreatePipelineState()) {
+ printf("[D3D12PixelValidator] Failed to create pipeline state\n");
+ return false;
+ }
+
+ // Create buffers
+ if (!CreateBuffers()) {
+ printf("[D3D12PixelValidator] Failed to create buffers\n");
+ return false;
+ }
+
+ printf("[D3D12PixelValidator] Initialized successfully\n");
+ return true;
+}
+
+bool D3D12PixelValidator::CompileShader(const wchar_t* shader_path)
+{
+ UINT compileFlags = 0;
+#ifdef _DEBUG
+ compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
+#endif
+
+ ID3DBlob* errorBlob = nullptr;
+
+ HRESULT hr = D3DCompileFromFile(
+ shader_path,
+ nullptr,
+ D3D_COMPILE_STANDARD_FILE_INCLUDE,
+ "ValidateStripePattern",
+ "cs_5_0",
+ compileFlags,
+ 0,
+ &m_shaderBlob,
+ &errorBlob
+ );
+
+ if (FAILED(hr)) {
+ if (errorBlob) {
+ printf("[D3D12PixelValidator] Shader compilation failed: %s\n",
+ (const char*)errorBlob->GetBufferPointer());
+ errorBlob->Release();
+ } else {
+ printf("[D3D12PixelValidator] Shader compilation failed: 0x%08X\n", hr);
+ }
+ return false;
+ }
+
+ if (errorBlob) {
+ errorBlob->Release();
+ }
+
+ return true;
+}
+
+bool D3D12PixelValidator::CreatePipelineState()
+{
+ // Create root signature
+ D3D12_ROOT_PARAMETER rootParams[3] = {};
+
+ // t0: Input RGBA texture
+ D3D12_DESCRIPTOR_RANGE srvRange = {};
+ srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+ srvRange.NumDescriptors = 1;
+ srvRange.BaseShaderRegister = 0;
+ srvRange.RegisterSpace = 0;
+ srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+ rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+ rootParams[0].DescriptorTable.NumDescriptorRanges = 1;
+ rootParams[0].DescriptorTable.pDescriptorRanges = &srvRange;
+ rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+ // u0: Output error count buffer
+ D3D12_DESCRIPTOR_RANGE uavRange = {};
+ uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+ uavRange.NumDescriptors = 1;
+ uavRange.BaseShaderRegister = 0;
+ uavRange.RegisterSpace = 0;
+ uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+ rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+ rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
+ rootParams[1].DescriptorTable.pDescriptorRanges = &uavRange;
+ rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+ // b0: Validation parameters (inline constants)
+ rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+ rootParams[2].Constants.ShaderRegister = 0;
+ rootParams[2].Constants.RegisterSpace = 0;
+ rootParams[2].Constants.Num32BitValues = 4; // Width, Height, StripeWidth, Padding
+ rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+ D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
+ rootSigDesc.NumParameters = 3;
+ rootSigDesc.pParameters = rootParams;
+ rootSigDesc.NumStaticSamplers = 0;
+ rootSigDesc.pStaticSamplers = nullptr;
+ rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
+
+ ID3DBlob* signatureBlob = nullptr;
+ ID3DBlob* errorBlob = nullptr;
+
+ HRESULT hr = D3D12SerializeRootSignature(
+ &rootSigDesc,
+ D3D_ROOT_SIGNATURE_VERSION_1,
+ &signatureBlob,
+ &errorBlob
+ );
+
+ if (FAILED(hr)) {
+ if (errorBlob) {
+ printf("[D3D12PixelValidator] Root signature serialization failed: %s\n",
+ (const char*)errorBlob->GetBufferPointer());
+ errorBlob->Release();
+ }
+ return false;
+ }
+
+ hr = m_device->CreateRootSignature(
+ 0,
+ signatureBlob->GetBufferPointer(),
+ signatureBlob->GetBufferSize(),
+ IID_PPV_ARGS(&m_rootSignature)
+ );
+
+ signatureBlob->Release();
+
+ if (FAILED(hr)) {
+ printf("[D3D12PixelValidator] Failed to create root signature: 0x%08X\n", hr);
+ return false;
+ }
+
+ // Create compute pipeline state
+ D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
+ psoDesc.pRootSignature = m_rootSignature;
+ psoDesc.CS.pShaderBytecode = m_shaderBlob->GetBufferPointer();
+ psoDesc.CS.BytecodeLength = m_shaderBlob->GetBufferSize();
+
+ hr = m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState));
+
+ if (FAILED(hr)) {
+ printf("[D3D12PixelValidator] Failed to create pipeline state: 0x%08X\n", hr);
+ return false;
+ }
+
+ return true;
+}
+
+bool D3D12PixelValidator::CreateBuffers()
+{
+ // Create GPU buffer for error count (UAV)
+ D3D12_HEAP_PROPERTIES heapProps = {};
+ heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
+
+ D3D12_RESOURCE_DESC bufferDesc = {};
+ bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+ bufferDesc.Width = sizeof(uint32_t);
+ bufferDesc.Height = 1;
+ bufferDesc.DepthOrArraySize = 1;
+ bufferDesc.MipLevels = 1;
+ bufferDesc.Format = DXGI_FORMAT_UNKNOWN;
+ bufferDesc.SampleDesc.Count = 1;
+ bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+ bufferDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+ HRESULT hr = m_device->CreateCommittedResource(
+ &heapProps,
+ D3D12_HEAP_FLAG_NONE,
+ &bufferDesc,
+ D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ nullptr,
+ IID_PPV_ARGS(&m_errorCountBuffer)
+ );
+
+ if (FAILED(hr)) {
+ printf("[D3D12PixelValidator] Failed to create error count buffer: 0x%08X\n", hr);
+ return false;
+ }
+
+ // Create readback buffer (CPU accessible)
+ heapProps.Type = D3D12_HEAP_TYPE_READBACK;
+ bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+ hr = m_device->CreateCommittedResource(
+ &heapProps,
+ D3D12_HEAP_FLAG_NONE,
+ &bufferDesc,
+ D3D12_RESOURCE_STATE_COPY_DEST,
+ nullptr,
+ IID_PPV_ARGS(&m_readbackBuffer)
+ );
+
+ if (FAILED(hr)) {
+ printf("[D3D12PixelValidator] Failed to create readback buffer: 0x%08X\n", hr);
+ return false;
+ }
+
+ return true;
+}
+
+bool D3D12PixelValidator::ValidateStripePattern(
+ D3D12Manager* d3dManager,
+ ID3D12Resource* rgbaTexture,
+ uint32_t width,
+ uint32_t height,
+ uint32_t stripe_width,
+ unsigned int* error_count_host)
+{
+ if (!d3dManager || !rgbaTexture || !error_count_host) {
+ printf("[D3D12PixelValidator] Invalid parameters\n");
+ return false;
+ }
+
+ ID3D12GraphicsCommandList* commandList = d3dManager->GetCommandList();
+
+ // Create descriptor heap for SRV and UAV
+ D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
+ heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+ heapDesc.NumDescriptors = 2; // 1 SRV (RGBA) + 1 UAV (error count)
+ heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+
+ ID3D12DescriptorHeap* descriptorHeap = nullptr;
+ HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&descriptorHeap));
+ if (FAILED(hr)) {
+ printf("[D3D12PixelValidator] Failed to create descriptor heap: 0x%08X\n", hr);
+ return false;
+ }
+
+ UINT descriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+ D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = descriptorHeap->GetCPUDescriptorHandleForHeapStart();
+ D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = descriptorHeap->GetGPUDescriptorHandleForHeapStart();
+
+ // Create SRV for RGBA texture
+ D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+ srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+ srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+ srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+ srvDesc.Texture2D.MipLevels = 1;
+
+ m_device->CreateShaderResourceView(rgbaTexture, &srvDesc, cpuHandle);
+
+ // Create UAV for error count buffer
+ cpuHandle.ptr += descriptorSize;
+ D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
+ uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
+ uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
+ uavDesc.Buffer.FirstElement = 0;
+ uavDesc.Buffer.NumElements = 1;
+ uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
+
+ m_device->CreateUnorderedAccessView(m_errorCountBuffer, nullptr, &uavDesc, cpuHandle);
+
+ // Clear error count to 0
+ UINT clearValues[4] = {0, 0, 0, 0};
+ D3D12_GPU_DESCRIPTOR_HANDLE uavGpuHandle = gpuHandle;
+ uavGpuHandle.ptr += descriptorSize * 1; // Skip 1 SRV
+
+ commandList->ClearUnorderedAccessViewUint(
+ uavGpuHandle,
+ cpuHandle,
+ m_errorCountBuffer,
+ clearValues,
+ 0,
+ nullptr
+ );
+
+ // Set pipeline state and root signature
+ ID3D12DescriptorHeap* heaps[] = {descriptorHeap};
+ commandList->SetDescriptorHeaps(1, heaps);
+ commandList->SetComputeRootSignature(m_rootSignature);
+ commandList->SetPipelineState(m_pipelineState);
+
+ // Bind descriptor tables
+ D3D12_GPU_DESCRIPTOR_HANDLE srvTableHandle = gpuHandle;
+ D3D12_GPU_DESCRIPTOR_HANDLE uavTableHandle = gpuHandle;
+ uavTableHandle.ptr += descriptorSize * 1;
+
+ commandList->SetComputeRootDescriptorTable(0, srvTableHandle); // SRV (RGBA)
+ commandList->SetComputeRootDescriptorTable(1, uavTableHandle); // UAV (error count)
+
+ // Set validation parameters as root constants
+ UINT params[4] = {width, height, stripe_width, 0};
+ commandList->SetComputeRoot32BitConstants(2, 4, params, 0);
+
+ // Dispatch compute shader (16x16 thread groups)
+ UINT dispatchX = (width + 15) / 16;
+ UINT dispatchY = (height + 15) / 16;
+ commandList->Dispatch(dispatchX, dispatchY, 1);
+
+ // Barrier: Wait for compute shader to finish
+ D3D12_RESOURCE_BARRIER barrier = {};
+ barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+ barrier.UAV.pResource = m_errorCountBuffer;
+ commandList->ResourceBarrier(1, &barrier);
+
+ // Copy error count from GPU buffer to readback buffer
+ commandList->CopyResource(m_readbackBuffer, m_errorCountBuffer);
+
+ // Execute command list and wait for GPU to finish
+ d3dManager->ExecuteCommandListAndWait();
+
+ // Map readback buffer and copy to host
+ void* mappedData = nullptr;
+ hr = m_readbackBuffer->Map(0, nullptr, &mappedData);
+ if (FAILED(hr)) {
+ printf("[D3D12PixelValidator] Failed to map readback buffer: 0x%08X\n", hr);
+ descriptorHeap->Release();
+ return false;
+ }
+
+ *error_count_host = *reinterpret_cast(mappedData);
+
+ m_readbackBuffer->Unmap(0, nullptr);
+
+ // Cleanup descriptor heap
+ descriptorHeap->Release();
+
+ printf("[D3D12PixelValidator] Validation complete: %u pixel errors\n", *error_count_host);
+ return true;
+}
+
+void D3D12PixelValidator::Cleanup()
+{
+ if (m_readbackBuffer) {
+ m_readbackBuffer->Release();
+ m_readbackBuffer = nullptr;
+ }
+
+ if (m_errorCountBuffer) {
+ m_errorCountBuffer->Release();
+ m_errorCountBuffer = nullptr;
+ }
+
+ if (m_pipelineState) {
+ m_pipelineState->Release();
+ m_pipelineState = nullptr;
+ }
+
+ if (m_rootSignature) {
+ m_rootSignature->Release();
+ m_rootSignature = nullptr;
+ }
+
+ if (m_shaderBlob) {
+ m_shaderBlob->Release();
+ m_shaderBlob = nullptr;
+ }
+
+ if (m_device) {
+ m_device->Release();
+ m_device = nullptr;
+ }
+}
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.h b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.h
new file mode 100644
index 0000000..78e45ab
--- /dev/null
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include
+#include
+
+// Forward declaration
+class D3D12Manager;
+
+// D3D12 Compute Shader-based pixel validator
+// GPU-only validation without CPU readback
+class D3D12PixelValidator {
+public:
+ D3D12PixelValidator();
+ ~D3D12PixelValidator();
+
+ // Prevent copying
+ D3D12PixelValidator(const D3D12PixelValidator&) = delete;
+ D3D12PixelValidator& operator=(const D3D12PixelValidator&) = delete;
+
+ // Initialize validator with D3D12 device
+ bool Initialize(ID3D12Device* device);
+
+ // Validate stripe pattern on GPU (RGBA texture input)
+ // Returns true if validation executed successfully
+ // error_count_host: Output - number of mismatched pixels
+ bool ValidateStripePattern(
+ D3D12Manager* d3dManager,
+ ID3D12Resource* rgbaTexture,
+ uint32_t width,
+ uint32_t height,
+ uint32_t stripe_width,
+ unsigned int* error_count_host
+ );
+
+ // Cleanup resources
+ void Cleanup();
+
+private:
+ // Compile HLSL shader to DXIL bytecode
+ bool CompileShader(const wchar_t* shader_path);
+
+ // Create compute pipeline state
+ bool CreatePipelineState();
+
+ // Create GPU buffers
+ bool CreateBuffers();
+
+ ID3D12Device* m_device = nullptr;
+ ID3D12RootSignature* m_rootSignature = nullptr;
+ ID3D12PipelineState* m_pipelineState = nullptr;
+
+ // GPU buffer for error count (single uint32_t)
+ ID3D12Resource* m_errorCountBuffer = nullptr;
+
+ // Readback buffer for error count (CPU accessible)
+ ID3D12Resource* m_readbackBuffer = nullptr;
+
+ // Compiled shader bytecode
+ ID3DBlob* m_shaderBlob = nullptr;
+
+ // Constants
+ struct ValidationParams {
+ uint32_t Width;
+ uint32_t Height;
+ uint32_t StripeWidth;
+ uint32_t Padding;
+ };
+};
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.cpp
deleted file mode 100644
index 68f846c..0000000
--- a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include "PixelVerifier.h"
-#include
-#include
-
-PixelVerifier::PixelVerifier()
-{
-}
-
-PixelVerifier::~PixelVerifier()
-{
-}
-
-RGB PixelVerifier::ConvertNV12ToRGB(uint8_t y, uint8_t u, uint8_t v)
-{
- // YUV to RGB conversion (ITU-R BT.709)
- // R = Y + 1.5748 * (V - 128)
- // G = Y - 0.1873 * (U - 128) - 0.4681 * (V - 128)
- // B = Y + 1.8556 * (U - 128)
-
- int c = y - 16;
- int d = u - 128;
- int e = v - 128;
-
- int r = (298 * c + 409 * e + 128) >> 8;
- int g = (298 * c - 100 * d - 208 * e + 128) >> 8;
- int b = (298 * c + 516 * d + 128) >> 8;
-
- RGB result;
- result.r = static_cast(std::clamp(r, 0, 255));
- result.g = static_cast(std::clamp(g, 0, 255));
- result.b = static_cast(std::clamp(b, 0, 255));
-
- return result;
-}
-
-bool PixelVerifier::IsBlackPixel(const RGB& pixel, int tolerance)
-{
- // Black: R=0, G=0, B=0 (with tolerance)
- return (pixel.r <= tolerance && pixel.g <= tolerance && pixel.b <= tolerance);
-}
-
-bool PixelVerifier::IsRedPixel(const RGB& pixel, int tolerance)
-{
- // Red: R=255, G=0, B=0 (with tolerance)
- return (pixel.r >= (255 - tolerance) && pixel.g <= tolerance && pixel.b <= tolerance);
-}
-
-bool PixelVerifier::VerifyStripePattern(const uint8_t* nv12_data, uint32_t width, uint32_t height)
-{
- // NV12 format:
- // - Y plane: width * height bytes
- // - UV plane: width * (height/2) bytes (interleaved U and V)
-
- const uint8_t* y_plane = nv12_data;
- const uint8_t* uv_plane = nv12_data + (width * height);
-
- // Check first row (representative sample)
- int error_count = 0;
- const int max_errors_to_print = 5;
-
- for (uint32_t x = 0; x < width; ++x) {
- // Calculate stripe index (8 pixels per stripe)
- int stripe_index = x / 8;
- bool should_be_red = (stripe_index % 2) == 1;
-
- // Get Y value for this pixel
- uint8_t y = y_plane[x];
-
- // Get U and V values (UV is subsampled 2x horizontally)
- uint32_t uv_x = (x / 2) * 2;
- uint8_t u = uv_plane[uv_x];
- uint8_t v = uv_plane[uv_x + 1];
-
- // Convert to RGB
- RGB pixel = ConvertNV12ToRGB(y, u, v);
-
- // Verify pixel color
- if (should_be_red) {
- if (!IsRedPixel(pixel)) {
- if (error_count < max_errors_to_print) {
- printf(" FAIL at X=%d: Expected red, got RGB(%d,%d,%d)\n",
- x, pixel.r, pixel.g, pixel.b);
- }
- error_count++;
- }
- }
- else {
- if (!IsBlackPixel(pixel)) {
- if (error_count < max_errors_to_print) {
- printf(" FAIL at X=%d: Expected black, got RGB(%d,%d,%d)\n",
- x, pixel.r, pixel.g, pixel.b);
- }
- error_count++;
- }
- }
- }
-
- if (error_count > max_errors_to_print) {
- printf(" ... and %d more errors\n", error_count - max_errors_to_print);
- }
-
- return (error_count == 0);
-}
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.h b/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.h
deleted file mode 100644
index 6b30a7e..0000000
--- a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-#include
-
-struct RGB
-{
- uint8_t r;
- uint8_t g;
- uint8_t b;
-};
-
-class PixelVerifier
-{
-public:
- PixelVerifier();
- ~PixelVerifier();
-
- // Verify 8-pixel stripe pattern (black-red-black-red...)
- // Returns true if all pixels match expected pattern
- bool VerifyStripePattern(const uint8_t* nv12_data, uint32_t width, uint32_t height);
-
-private:
- // Convert NV12 to RGB (single pixel)
- RGB ConvertNV12ToRGB(uint8_t y, uint8_t u, uint8_t v);
-
- // Check if pixel is within tolerance of expected color
- bool IsBlackPixel(const RGB& pixel, int tolerance = 50);
- bool IsRedPixel(const RGB& pixel, int tolerance = 50);
-};
diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp
index 3ccbc1c..2988166 100644
--- a/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp
+++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp
@@ -4,14 +4,13 @@
#include
#include
#include "D3D12Manager.h"
-#include "PixelVerifier.h"
+#include "D3D12PixelValidator.h"
struct FrameTask {
int frame_index;
ID3D12Resource* texture;
VavCoreVideoFrame frame;
VavCoreResult result;
- uint8_t* cpu_buffer;
};
int main(int argc, char* argv[])
@@ -112,11 +111,21 @@ int main(int argc, char* argv[])
printf("[RedSurfaceNVDECTest] Decoder: NVDEC (explicit)\n");
printf("[RedSurfaceNVDECTest] Surface: D3D12\n");
- // Step 7: Create PixelVerifier
- PixelVerifier verifier;
+ // Step 7: Create D3D12PixelValidator
+ printf("\n[Step 7] Creating D3D12 GPU validator...\n");
+ D3D12PixelValidator validator;
+ if (!validator.Initialize(d3d12.GetDevice())) {
+ printf("[ERROR] Failed to initialize D3D12PixelValidator\n");
+ vavcore_close_file(player);
+ vavcore_destroy_player(player);
+ d3d12.Cleanup();
+ vavcore_cleanup();
+ return 1;
+ }
+ printf("[OK] D3D12PixelValidator initialized\n");
// Step 8: Continuous frame decoding (pipeline priming approach)
- printf("\n[Step 7] Decoding and verifying frames (continuous decode calls)...\n\n");
+ printf("\n[Step 8] Decoding and verifying frames (GPU validation)...\n\n");
printf("[INFO] NVDEC pipeline priming: Calling decode continuously\n");
printf("[INFO] First 2 frames will be skipped for verification (priming frames)\n\n");
@@ -124,14 +133,14 @@ int main(int argc, char* argv[])
std::vector frame_tasks(MAX_FRAMES);
int decode_errors = 0;
+ const uint32_t STRIPE_WIDTH = 4; // 4-pixel wide stripes (Red/Black alternating)
- // Continuous decode loop - keep calling DecodeToSurface
- // NVDEC will pipeline internally and return frames in order
+ // Process frames one-by-one to avoid memory pressure
for (int i = 0; i < MAX_FRAMES && !vavcore_is_end_of_file(player); i++) {
FrameTask& task = frame_tasks[i];
task.frame_index = i;
- // Create RGBA texture for CUDA interop
+ // Create RGBA texture for NVDEC output (CUDA NV12ToRGBA conversion)
task.texture = d3d12.CreateRGBATexture(metadata.width, metadata.height);
if (!task.texture) {
printf("[ERROR] Failed to create texture for frame %d\n", i);
@@ -140,8 +149,7 @@ int main(int argc, char* argv[])
continue;
}
- // Decode frame to D3D12 surface
- // This will block and wait (pending) until the frame is ready
+ // Decode frame to D3D12 surface (RGBA format after CUDA conversion)
task.result = vavcore_decode_to_surface(
player,
VAVCORE_SURFACE_D3D12_RESOURCE,
@@ -157,22 +165,32 @@ int main(int argc, char* argv[])
continue;
}
- // Readback D3D12 texture to CPU memory
- task.cpu_buffer = d3d12.ReadbackTexture(task.texture, metadata.width, metadata.height);
- if (!task.cpu_buffer) {
- printf("Frame %3d: Readback failed\n", i);
- task.texture->Release();
- task.texture = nullptr;
- decode_errors++;
- continue;
- }
+ printf("Frame %3d: Decoded successfully (RGBA)\n", i);
- printf("Frame %3d: Decoded successfully\n", i);
+ // Save first 5 frames as BMP for inspection (one at a time to reduce memory pressure)
+ if (i < 5) {
+ char filename[256];
+ sprintf_s(filename, sizeof(filename), "frame_%03d.bmp", i);
+ bool saved = d3d12.SaveTextureToBMP(task.texture, metadata.width, metadata.height, filename);
+
+ // If save failed, try waiting a bit and retry once
+ if (!saved) {
+ printf("Frame %3d: BMP save failed, waiting 500ms and retrying...\n", i);
+ Sleep(500);
+ saved = d3d12.SaveTextureToBMP(task.texture, metadata.width, metadata.height, filename);
+ if (!saved) {
+ printf("Frame %3d: BMP save failed after retry\n", i);
+ }
+ }
+
+ // Wait a bit before next frame to avoid memory pressure
+ Sleep(100);
+ }
}
- printf("\n[Step 8] Verifying decoded frames...\n\n");
+ printf("\n[Step 9] Verifying decoded frames (GPU compute shader)...\n\n");
- // Verify frames in order
+ // Verify frames in order using GPU compute shader
int passed_frames = 0;
int failed_frames = 0;
int total_verified = 0;
@@ -181,31 +199,39 @@ int main(int argc, char* argv[])
FrameTask& task = frame_tasks[i];
// Skip if decode failed
- if (task.result != VAVCORE_SUCCESS || !task.cpu_buffer) {
+ if (task.result != VAVCORE_SUCCESS || !task.texture) {
continue;
}
// Skip first 2 frames (NVDEC priming)
if (i < 2) {
printf("Frame %3d: PRIMING (skipped verification)\n", i);
- delete[] task.cpu_buffer;
- task.cpu_buffer = nullptr;
continue;
}
- // Verify stripe pattern
- bool passed = verifier.VerifyStripePattern(task.cpu_buffer, metadata.width, metadata.height);
- if (passed) {
- printf("Frame %3d: PASS\n", i);
+ // GPU validation (only 4 bytes copied to CPU!)
+ unsigned int error_count = 0;
+ bool validation_success = validator.ValidateStripePattern(
+ &d3d12,
+ task.texture,
+ metadata.width,
+ metadata.height,
+ STRIPE_WIDTH,
+ &error_count
+ );
+
+ if (!validation_success) {
+ printf("Frame %3d: GPU validation failed\n", i);
+ failed_frames++;
+ } else if (error_count == 0) {
+ printf("Frame %3d: PASS (0 pixel errors)\n", i);
passed_frames++;
} else {
- printf("Frame %3d: FAIL\n", i);
+ printf("Frame %3d: FAIL (%u pixel errors)\n", i, error_count);
failed_frames++;
}
total_verified++;
- delete[] task.cpu_buffer;
- task.cpu_buffer = nullptr;
}
// Cleanup textures
@@ -213,11 +239,11 @@ int main(int argc, char* argv[])
if (task.texture) {
task.texture->Release();
}
- if (task.cpu_buffer) {
- delete[] task.cpu_buffer;
- }
}
+ // Cleanup validator
+ validator.Cleanup();
+
// Step 9: Print results
printf("\n[RedSurfaceNVDECTest] Results:\n");
printf(" Total frames decoded: %d\n", MAX_FRAMES - decode_errors);