diff --git a/.claude/settings.local.json b/.claude/settings.local.json index f44c25a..9c659a1 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -106,7 +106,9 @@ "Read(//c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.0/lib/x64/**)", "Bash(./compile_kernel.bat)", "Bash(\"./compile_validation_kernel.bat\")", - "Bash(./compile_validation_kernel.bat)" + "Bash(./compile_validation_kernel.bat)", + "Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")", + "Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")" ], "deny": [], "ask": [] diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj b/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj index b7ce7db..8b8ab80 100644 --- a/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj +++ b/vav2/platforms/windows/tests/red-surface-nvdec/RedSurfaceNVDECTest.vcxproj @@ -42,7 +42,7 @@ Console true $(ProjectDir)..\..\vavcore\lib - VavCore-debug.lib;d3d12.lib;dxgi.lib;kernel32.lib;user32.lib;%(AdditionalDependencies) + VavCore-debug.lib;d3d12.lib;dxgi.lib;d3dcompiler.lib;kernel32.lib;user32.lib;%(AdditionalDependencies) echo Copying VavCore DLL... @@ -53,11 +53,11 @@ echo Done. - + - + diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/shaders/StripeValidator.hlsl b/vav2/platforms/windows/tests/red-surface-nvdec/shaders/StripeValidator.hlsl new file mode 100644 index 0000000..7e2c0f8 --- /dev/null +++ b/vav2/platforms/windows/tests/red-surface-nvdec/shaders/StripeValidator.hlsl @@ -0,0 +1,52 @@ +// D3D12 Compute Shader for GPU-based stripe pattern validation +// No CPU readback needed - only error count is copied back + +// Input RGBA texture (from NVDEC CUDA conversion) +Texture2D RGBATexture : register(t0); + +// Output error count buffer +RWStructuredBuffer ErrorCount : register(u0); + +// Validation parameters +cbuffer ValidationParams : register(b0) +{ + uint Width; // Frame width + uint Height; // Frame height + uint StripeWidth; // Width of each color stripe + uint Padding; // Padding for alignment +}; + +[numthreads(16, 16, 1)] +void ValidateStripePattern(uint3 DTid : SV_DispatchThreadID) +{ + // Bounds check + if (DTid.x >= Width || DTid.y >= Height) + return; + + // Sample RGBA texture + float4 rgba = RGBATexture[DTid.xy]; + float3 rgb = rgba.rgb; + + // Calculate expected color based on 4-pixel stripe pattern (Red/Black alternating) + uint stripe_index = DTid.x / StripeWidth; + bool is_red_stripe = (stripe_index % 2 == 0); + + bool is_valid = false; + + if (is_red_stripe) + { + // Red stripe: R > 200/255 (0.78), G < 50/255 (0.2), B < 50/255 (0.2) + is_valid = (rgb.r > 0.78f && rgb.g < 0.2f && rgb.b < 0.2f); + } + else + { + // Black stripe: ALL < 50/255 (0.2) + is_valid = (rgb.r < 0.2f && rgb.g < 0.2f && rgb.b < 0.2f); + } + + // Increment error count if pixel doesn't match expected pattern + if (!is_valid) + { + InterlockedAdd(ErrorCount[0], 1); + } +} diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp index 8ec7362..cfaf10e 100644 --- a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp +++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.cpp @@ -177,6 +177,91 @@ void D3D12Manager::WaitForGPU() } } +void D3D12Manager::ExecuteCommandListAndWait() +{ + // Close command list + m_command_list->Close(); + + // Execute command list + ID3D12CommandList* cmdLists[] = { m_command_list }; + m_command_queue->ExecuteCommandLists(1, cmdLists); + + // Wait for GPU to finish + WaitForGPU(); + + // Reset command list for next use + m_command_allocator->Reset(); + m_command_list->Reset(m_command_allocator, nullptr); +} + +bool D3D12Manager::SaveTextureToBMP(ID3D12Resource* texture, uint32_t width, uint32_t height, const char* filename) +{ + if (!texture || !filename) { + printf("[D3D12Manager] Invalid parameters for SaveTextureToBMP\n"); + return false; + } + + // Readback texture to CPU + uint8_t* rgba_data = ReadbackTexture(texture, width, height); + if (!rgba_data) { + printf("[D3D12Manager] Failed to readback texture\n"); + return false; + } + + // BMP file header (14 bytes) + uint32_t file_size = 54 + (width * height * 4); + uint8_t bmp_header[54] = { + 'B', 'M', // Signature + (uint8_t)(file_size), (uint8_t)(file_size >> 8), (uint8_t)(file_size >> 16), (uint8_t)(file_size >> 24), // File size + 0, 0, 0, 0, // Reserved + 54, 0, 0, 0, // Data offset + 40, 0, 0, 0, // Info header size + (uint8_t)(width), (uint8_t)(width >> 8), (uint8_t)(width >> 16), (uint8_t)(width >> 24), // Width + (uint8_t)(height), (uint8_t)(height >> 8), (uint8_t)(height >> 16), (uint8_t)(height >> 24), // Height + 1, 0, // Planes + 32, 0, // Bits per pixel (32-bit RGBA) + 0, 0, 0, 0, // Compression (none) + (uint8_t)(width * height * 4), (uint8_t)((width * height * 4) >> 8), + (uint8_t)((width * height * 4) >> 16), (uint8_t)((width * height * 4) >> 24), // Image size + 0, 0, 0, 0, // X pixels per meter + 0, 0, 0, 0, // Y pixels per meter + 0, 0, 0, 0, // Colors used + 0, 0, 0, 0 // Important colors + }; + + // Open file + FILE* file = nullptr; + fopen_s(&file, filename, "wb"); + if (!file) { + printf("[D3D12Manager] Failed to open file: %s\n", filename); + delete[] rgba_data; + return false; + } + + // Write BMP header + fwrite(bmp_header, 1, 54, file); + + // Convert RGBA to BGRA for BMP and flip vertically + for (int y = height - 1; y >= 0; y--) { + for (uint32_t x = 0; x < width; x++) { + uint32_t src_idx = (y * width + x) * 4; + uint8_t bgra[4] = { + rgba_data[src_idx + 2], // B + rgba_data[src_idx + 1], // G + rgba_data[src_idx + 0], // R + rgba_data[src_idx + 3] // A + }; + fwrite(bgra, 1, 4, file); + } + } + + fclose(file); + delete[] rgba_data; + + printf("[D3D12Manager] Saved texture to: %s\n", filename); + return true; +} + ID3D12Resource* D3D12Manager::CreateNV12Texture(uint32_t width, uint32_t height) { D3D12_RESOURCE_DESC desc = {}; @@ -252,13 +337,13 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width, // Get texture description D3D12_RESOURCE_DESC desc = texture->GetDesc(); - // Calculate layout for both Y and UV planes - D3D12_PLACED_SUBRESOURCE_FOOTPRINT layouts[2]; - UINT num_rows[2] = { 0 }; - UINT64 row_sizes[2] = { 0 }; + // RGBA format has only 1 subresource + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; + UINT num_rows = 0; + UINT64 row_size = 0; UINT64 total_bytes = 0; - m_device->GetCopyableFootprints(&desc, 0, 2, 0, layouts, num_rows, row_sizes, &total_bytes); + m_device->GetCopyableFootprints(&desc, 0, 1, 0, &layout, &num_rows, &row_size, &total_bytes); // Create readback buffer D3D12_HEAP_PROPERTIES readback_heap_props = {}; @@ -301,31 +386,18 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width, barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; m_command_list->ResourceBarrier(1, &barrier); - // Copy Y plane (subresource 0) - D3D12_TEXTURE_COPY_LOCATION src_y = {}; - src_y.pResource = texture; - src_y.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_y.SubresourceIndex = 0; + // Copy RGBA texture (single subresource) + D3D12_TEXTURE_COPY_LOCATION src = {}; + src.pResource = texture; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.SubresourceIndex = 0; - D3D12_TEXTURE_COPY_LOCATION dst_y = {}; - dst_y.pResource = readback_buffer; - dst_y.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst_y.PlacedFootprint = layouts[0]; + D3D12_TEXTURE_COPY_LOCATION dst = {}; + dst.pResource = readback_buffer; + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.PlacedFootprint = layout; - m_command_list->CopyTextureRegion(&dst_y, 0, 0, 0, &src_y, nullptr); - - // Copy UV plane (subresource 1) - D3D12_TEXTURE_COPY_LOCATION src_uv = {}; - src_uv.pResource = texture; - src_uv.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_uv.SubresourceIndex = 1; - - D3D12_TEXTURE_COPY_LOCATION dst_uv = {}; - dst_uv.pResource = readback_buffer; - dst_uv.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst_uv.PlacedFootprint = layouts[1]; - - m_command_list->CopyTextureRegion(&dst_uv, 0, 0, 0, &src_uv, nullptr); + m_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); // Transition texture back to COMMON barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -350,28 +422,20 @@ uint8_t* D3D12Manager::ReadbackTexture(ID3D12Resource* texture, uint32_t width, return nullptr; } - // Allocate CPU buffer (NV12 format: Y plane + UV plane) - uint32_t y_size = width * height; - uint32_t uv_size = width * (height / 2); - uint8_t* cpu_buffer = new uint8_t[y_size + uv_size]; + // Allocate CPU buffer for RGBA data + uint8_t* rgba_data = new uint8_t[width * height * 4]; - // Copy Y plane - uint8_t* src_y_data = static_cast(mapped_data) + layouts[0].Offset; - uint8_t* dst_y_data = cpu_buffer; - for (UINT row = 0; row < height; ++row) { - memcpy(dst_y_data + row * width, src_y_data + row * layouts[0].Footprint.RowPitch, width); - } - - // Copy UV plane - uint8_t* src_uv_data = static_cast(mapped_data) + layouts[1].Offset; - uint8_t* dst_uv_data = cpu_buffer + y_size; - for (UINT row = 0; row < height / 2; ++row) { - memcpy(dst_uv_data + row * width, src_uv_data + row * layouts[1].Footprint.RowPitch, width); + // Copy RGBA data (considering row pitch alignment) + uint8_t* src_ptr = static_cast(mapped_data); + for (uint32_t y = 0; y < height; y++) { + memcpy(rgba_data + y * width * 4, + src_ptr + y * layout.Footprint.RowPitch, + width * 4); } // Unmap and release readback buffer readback_buffer->Unmap(0, nullptr); readback_buffer->Release(); - return cpu_buffer; + return rgba_data; } diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h index 54eb541..041ed5b 100644 --- a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h +++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12Manager.h @@ -26,6 +26,15 @@ public: // Get D3D12 device ID3D12Device* GetDevice() const { return m_device; } + // Get D3D12 command list (for GPU validation) + ID3D12GraphicsCommandList* GetCommandList() const { return m_command_list; } + + // Execute command list and wait for completion + void ExecuteCommandListAndWait(); + + // Save RGBA texture to BMP file + bool SaveTextureToBMP(ID3D12Resource* texture, uint32_t width, uint32_t height, const char* filename); + private: ID3D12Device* m_device; ID3D12CommandQueue* m_command_queue; diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.cpp new file mode 100644 index 0000000..f556ec9 --- /dev/null +++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.cpp @@ -0,0 +1,386 @@ +#include "D3D12PixelValidator.h" +#include "D3D12Manager.h" +#include +#include + +#pragma comment(lib, "d3dcompiler.lib") + +D3D12PixelValidator::D3D12PixelValidator() +{ +} + +D3D12PixelValidator::~D3D12PixelValidator() +{ + Cleanup(); +} + +bool D3D12PixelValidator::Initialize(ID3D12Device* device) +{ + if (!device) { + printf("[D3D12PixelValidator] Invalid device pointer\n"); + return false; + } + + m_device = device; + m_device->AddRef(); + + // Compile shader + if (!CompileShader(L"shaders\\StripeValidator.hlsl")) { + printf("[D3D12PixelValidator] Failed to compile shader\n"); + return false; + } + + // Create pipeline state + if (!CreatePipelineState()) { + printf("[D3D12PixelValidator] Failed to create pipeline state\n"); + return false; + } + + // Create buffers + if (!CreateBuffers()) { + printf("[D3D12PixelValidator] Failed to create buffers\n"); + return false; + } + + printf("[D3D12PixelValidator] Initialized successfully\n"); + return true; +} + +bool D3D12PixelValidator::CompileShader(const wchar_t* shader_path) +{ + UINT compileFlags = 0; +#ifdef _DEBUG + compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + + ID3DBlob* errorBlob = nullptr; + + HRESULT hr = D3DCompileFromFile( + shader_path, + nullptr, + D3D_COMPILE_STANDARD_FILE_INCLUDE, + "ValidateStripePattern", + "cs_5_0", + compileFlags, + 0, + &m_shaderBlob, + &errorBlob + ); + + if (FAILED(hr)) { + if (errorBlob) { + printf("[D3D12PixelValidator] Shader compilation failed: %s\n", + (const char*)errorBlob->GetBufferPointer()); + errorBlob->Release(); + } else { + printf("[D3D12PixelValidator] Shader compilation failed: 0x%08X\n", hr); + } + return false; + } + + if (errorBlob) { + errorBlob->Release(); + } + + return true; +} + +bool D3D12PixelValidator::CreatePipelineState() +{ + // Create root signature + D3D12_ROOT_PARAMETER rootParams[3] = {}; + + // t0: Input RGBA texture + D3D12_DESCRIPTOR_RANGE srvRange = {}; + srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + srvRange.NumDescriptors = 1; + srvRange.BaseShaderRegister = 0; + srvRange.RegisterSpace = 0; + srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParams[0].DescriptorTable.NumDescriptorRanges = 1; + rootParams[0].DescriptorTable.pDescriptorRanges = &srvRange; + rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // u0: Output error count buffer + D3D12_DESCRIPTOR_RANGE uavRange = {}; + uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + uavRange.NumDescriptors = 1; + uavRange.BaseShaderRegister = 0; + uavRange.RegisterSpace = 0; + uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParams[1].DescriptorTable.NumDescriptorRanges = 1; + rootParams[1].DescriptorTable.pDescriptorRanges = &uavRange; + rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // b0: Validation parameters (inline constants) + rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParams[2].Constants.ShaderRegister = 0; + rootParams[2].Constants.RegisterSpace = 0; + rootParams[2].Constants.Num32BitValues = 4; // Width, Height, StripeWidth, Padding + rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {}; + rootSigDesc.NumParameters = 3; + rootSigDesc.pParameters = rootParams; + rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.pStaticSamplers = nullptr; + rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* signatureBlob = nullptr; + ID3DBlob* errorBlob = nullptr; + + HRESULT hr = D3D12SerializeRootSignature( + &rootSigDesc, + D3D_ROOT_SIGNATURE_VERSION_1, + &signatureBlob, + &errorBlob + ); + + if (FAILED(hr)) { + if (errorBlob) { + printf("[D3D12PixelValidator] Root signature serialization failed: %s\n", + (const char*)errorBlob->GetBufferPointer()); + errorBlob->Release(); + } + return false; + } + + hr = m_device->CreateRootSignature( + 0, + signatureBlob->GetBufferPointer(), + signatureBlob->GetBufferSize(), + IID_PPV_ARGS(&m_rootSignature) + ); + + signatureBlob->Release(); + + if (FAILED(hr)) { + printf("[D3D12PixelValidator] Failed to create root signature: 0x%08X\n", hr); + return false; + } + + // Create compute pipeline state + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = m_rootSignature; + psoDesc.CS.pShaderBytecode = m_shaderBlob->GetBufferPointer(); + psoDesc.CS.BytecodeLength = m_shaderBlob->GetBufferSize(); + + hr = m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState)); + + if (FAILED(hr)) { + printf("[D3D12PixelValidator] Failed to create pipeline state: 0x%08X\n", hr); + return false; + } + + return true; +} + +bool D3D12PixelValidator::CreateBuffers() +{ + // Create GPU buffer for error count (UAV) + D3D12_HEAP_PROPERTIES heapProps = {}; + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_RESOURCE_DESC bufferDesc = {}; + bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + bufferDesc.Width = sizeof(uint32_t); + bufferDesc.Height = 1; + bufferDesc.DepthOrArraySize = 1; + bufferDesc.MipLevels = 1; + bufferDesc.Format = DXGI_FORMAT_UNKNOWN; + bufferDesc.SampleDesc.Count = 1; + bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + bufferDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + HRESULT hr = m_device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &bufferDesc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + nullptr, + IID_PPV_ARGS(&m_errorCountBuffer) + ); + + if (FAILED(hr)) { + printf("[D3D12PixelValidator] Failed to create error count buffer: 0x%08X\n", hr); + return false; + } + + // Create readback buffer (CPU accessible) + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + hr = m_device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &bufferDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readbackBuffer) + ); + + if (FAILED(hr)) { + printf("[D3D12PixelValidator] Failed to create readback buffer: 0x%08X\n", hr); + return false; + } + + return true; +} + +bool D3D12PixelValidator::ValidateStripePattern( + D3D12Manager* d3dManager, + ID3D12Resource* rgbaTexture, + uint32_t width, + uint32_t height, + uint32_t stripe_width, + unsigned int* error_count_host) +{ + if (!d3dManager || !rgbaTexture || !error_count_host) { + printf("[D3D12PixelValidator] Invalid parameters\n"); + return false; + } + + ID3D12GraphicsCommandList* commandList = d3dManager->GetCommandList(); + + // Create descriptor heap for SRV and UAV + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + heapDesc.NumDescriptors = 2; // 1 SRV (RGBA) + 1 UAV (error count) + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + + ID3D12DescriptorHeap* descriptorHeap = nullptr; + HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&descriptorHeap)); + if (FAILED(hr)) { + printf("[D3D12PixelValidator] Failed to create descriptor heap: 0x%08X\n", hr); + return false; + } + + UINT descriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = descriptorHeap->GetCPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = descriptorHeap->GetGPUDescriptorHandleForHeapStart(); + + // Create SRV for RGBA texture + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Texture2D.MipLevels = 1; + + m_device->CreateShaderResourceView(rgbaTexture, &srvDesc, cpuHandle); + + // Create UAV for error count buffer + cpuHandle.ptr += descriptorSize; + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = 1; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + + m_device->CreateUnorderedAccessView(m_errorCountBuffer, nullptr, &uavDesc, cpuHandle); + + // Clear error count to 0 + UINT clearValues[4] = {0, 0, 0, 0}; + D3D12_GPU_DESCRIPTOR_HANDLE uavGpuHandle = gpuHandle; + uavGpuHandle.ptr += descriptorSize * 1; // Skip 1 SRV + + commandList->ClearUnorderedAccessViewUint( + uavGpuHandle, + cpuHandle, + m_errorCountBuffer, + clearValues, + 0, + nullptr + ); + + // Set pipeline state and root signature + ID3D12DescriptorHeap* heaps[] = {descriptorHeap}; + commandList->SetDescriptorHeaps(1, heaps); + commandList->SetComputeRootSignature(m_rootSignature); + commandList->SetPipelineState(m_pipelineState); + + // Bind descriptor tables + D3D12_GPU_DESCRIPTOR_HANDLE srvTableHandle = gpuHandle; + D3D12_GPU_DESCRIPTOR_HANDLE uavTableHandle = gpuHandle; + uavTableHandle.ptr += descriptorSize * 1; + + commandList->SetComputeRootDescriptorTable(0, srvTableHandle); // SRV (RGBA) + commandList->SetComputeRootDescriptorTable(1, uavTableHandle); // UAV (error count) + + // Set validation parameters as root constants + UINT params[4] = {width, height, stripe_width, 0}; + commandList->SetComputeRoot32BitConstants(2, 4, params, 0); + + // Dispatch compute shader (16x16 thread groups) + UINT dispatchX = (width + 15) / 16; + UINT dispatchY = (height + 15) / 16; + commandList->Dispatch(dispatchX, dispatchY, 1); + + // Barrier: Wait for compute shader to finish + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = m_errorCountBuffer; + commandList->ResourceBarrier(1, &barrier); + + // Copy error count from GPU buffer to readback buffer + commandList->CopyResource(m_readbackBuffer, m_errorCountBuffer); + + // Execute command list and wait for GPU to finish + d3dManager->ExecuteCommandListAndWait(); + + // Map readback buffer and copy to host + void* mappedData = nullptr; + hr = m_readbackBuffer->Map(0, nullptr, &mappedData); + if (FAILED(hr)) { + printf("[D3D12PixelValidator] Failed to map readback buffer: 0x%08X\n", hr); + descriptorHeap->Release(); + return false; + } + + *error_count_host = *reinterpret_cast(mappedData); + + m_readbackBuffer->Unmap(0, nullptr); + + // Cleanup descriptor heap + descriptorHeap->Release(); + + printf("[D3D12PixelValidator] Validation complete: %u pixel errors\n", *error_count_host); + return true; +} + +void D3D12PixelValidator::Cleanup() +{ + if (m_readbackBuffer) { + m_readbackBuffer->Release(); + m_readbackBuffer = nullptr; + } + + if (m_errorCountBuffer) { + m_errorCountBuffer->Release(); + m_errorCountBuffer = nullptr; + } + + if (m_pipelineState) { + m_pipelineState->Release(); + m_pipelineState = nullptr; + } + + if (m_rootSignature) { + m_rootSignature->Release(); + m_rootSignature = nullptr; + } + + if (m_shaderBlob) { + m_shaderBlob->Release(); + m_shaderBlob = nullptr; + } + + if (m_device) { + m_device->Release(); + m_device = nullptr; + } +} diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.h b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.h new file mode 100644 index 0000000..78e45ab --- /dev/null +++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/D3D12PixelValidator.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include + +// Forward declaration +class D3D12Manager; + +// D3D12 Compute Shader-based pixel validator +// GPU-only validation without CPU readback +class D3D12PixelValidator { +public: + D3D12PixelValidator(); + ~D3D12PixelValidator(); + + // Prevent copying + D3D12PixelValidator(const D3D12PixelValidator&) = delete; + D3D12PixelValidator& operator=(const D3D12PixelValidator&) = delete; + + // Initialize validator with D3D12 device + bool Initialize(ID3D12Device* device); + + // Validate stripe pattern on GPU (RGBA texture input) + // Returns true if validation executed successfully + // error_count_host: Output - number of mismatched pixels + bool ValidateStripePattern( + D3D12Manager* d3dManager, + ID3D12Resource* rgbaTexture, + uint32_t width, + uint32_t height, + uint32_t stripe_width, + unsigned int* error_count_host + ); + + // Cleanup resources + void Cleanup(); + +private: + // Compile HLSL shader to DXIL bytecode + bool CompileShader(const wchar_t* shader_path); + + // Create compute pipeline state + bool CreatePipelineState(); + + // Create GPU buffers + bool CreateBuffers(); + + ID3D12Device* m_device = nullptr; + ID3D12RootSignature* m_rootSignature = nullptr; + ID3D12PipelineState* m_pipelineState = nullptr; + + // GPU buffer for error count (single uint32_t) + ID3D12Resource* m_errorCountBuffer = nullptr; + + // Readback buffer for error count (CPU accessible) + ID3D12Resource* m_readbackBuffer = nullptr; + + // Compiled shader bytecode + ID3DBlob* m_shaderBlob = nullptr; + + // Constants + struct ValidationParams { + uint32_t Width; + uint32_t Height; + uint32_t StripeWidth; + uint32_t Padding; + }; +}; diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.cpp deleted file mode 100644 index 68f846c..0000000 --- a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "PixelVerifier.h" -#include -#include - -PixelVerifier::PixelVerifier() -{ -} - -PixelVerifier::~PixelVerifier() -{ -} - -RGB PixelVerifier::ConvertNV12ToRGB(uint8_t y, uint8_t u, uint8_t v) -{ - // YUV to RGB conversion (ITU-R BT.709) - // R = Y + 1.5748 * (V - 128) - // G = Y - 0.1873 * (U - 128) - 0.4681 * (V - 128) - // B = Y + 1.8556 * (U - 128) - - int c = y - 16; - int d = u - 128; - int e = v - 128; - - int r = (298 * c + 409 * e + 128) >> 8; - int g = (298 * c - 100 * d - 208 * e + 128) >> 8; - int b = (298 * c + 516 * d + 128) >> 8; - - RGB result; - result.r = static_cast(std::clamp(r, 0, 255)); - result.g = static_cast(std::clamp(g, 0, 255)); - result.b = static_cast(std::clamp(b, 0, 255)); - - return result; -} - -bool PixelVerifier::IsBlackPixel(const RGB& pixel, int tolerance) -{ - // Black: R=0, G=0, B=0 (with tolerance) - return (pixel.r <= tolerance && pixel.g <= tolerance && pixel.b <= tolerance); -} - -bool PixelVerifier::IsRedPixel(const RGB& pixel, int tolerance) -{ - // Red: R=255, G=0, B=0 (with tolerance) - return (pixel.r >= (255 - tolerance) && pixel.g <= tolerance && pixel.b <= tolerance); -} - -bool PixelVerifier::VerifyStripePattern(const uint8_t* nv12_data, uint32_t width, uint32_t height) -{ - // NV12 format: - // - Y plane: width * height bytes - // - UV plane: width * (height/2) bytes (interleaved U and V) - - const uint8_t* y_plane = nv12_data; - const uint8_t* uv_plane = nv12_data + (width * height); - - // Check first row (representative sample) - int error_count = 0; - const int max_errors_to_print = 5; - - for (uint32_t x = 0; x < width; ++x) { - // Calculate stripe index (8 pixels per stripe) - int stripe_index = x / 8; - bool should_be_red = (stripe_index % 2) == 1; - - // Get Y value for this pixel - uint8_t y = y_plane[x]; - - // Get U and V values (UV is subsampled 2x horizontally) - uint32_t uv_x = (x / 2) * 2; - uint8_t u = uv_plane[uv_x]; - uint8_t v = uv_plane[uv_x + 1]; - - // Convert to RGB - RGB pixel = ConvertNV12ToRGB(y, u, v); - - // Verify pixel color - if (should_be_red) { - if (!IsRedPixel(pixel)) { - if (error_count < max_errors_to_print) { - printf(" FAIL at X=%d: Expected red, got RGB(%d,%d,%d)\n", - x, pixel.r, pixel.g, pixel.b); - } - error_count++; - } - } - else { - if (!IsBlackPixel(pixel)) { - if (error_count < max_errors_to_print) { - printf(" FAIL at X=%d: Expected black, got RGB(%d,%d,%d)\n", - x, pixel.r, pixel.g, pixel.b); - } - error_count++; - } - } - } - - if (error_count > max_errors_to_print) { - printf(" ... and %d more errors\n", error_count - max_errors_to_print); - } - - return (error_count == 0); -} diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.h b/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.h deleted file mode 100644 index 6b30a7e..0000000 --- a/vav2/platforms/windows/tests/red-surface-nvdec/src/PixelVerifier.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include - -struct RGB -{ - uint8_t r; - uint8_t g; - uint8_t b; -}; - -class PixelVerifier -{ -public: - PixelVerifier(); - ~PixelVerifier(); - - // Verify 8-pixel stripe pattern (black-red-black-red...) - // Returns true if all pixels match expected pattern - bool VerifyStripePattern(const uint8_t* nv12_data, uint32_t width, uint32_t height); - -private: - // Convert NV12 to RGB (single pixel) - RGB ConvertNV12ToRGB(uint8_t y, uint8_t u, uint8_t v); - - // Check if pixel is within tolerance of expected color - bool IsBlackPixel(const RGB& pixel, int tolerance = 50); - bool IsRedPixel(const RGB& pixel, int tolerance = 50); -}; diff --git a/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp b/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp index 3ccbc1c..2988166 100644 --- a/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp +++ b/vav2/platforms/windows/tests/red-surface-nvdec/src/main.cpp @@ -4,14 +4,13 @@ #include #include #include "D3D12Manager.h" -#include "PixelVerifier.h" +#include "D3D12PixelValidator.h" struct FrameTask { int frame_index; ID3D12Resource* texture; VavCoreVideoFrame frame; VavCoreResult result; - uint8_t* cpu_buffer; }; int main(int argc, char* argv[]) @@ -112,11 +111,21 @@ int main(int argc, char* argv[]) printf("[RedSurfaceNVDECTest] Decoder: NVDEC (explicit)\n"); printf("[RedSurfaceNVDECTest] Surface: D3D12\n"); - // Step 7: Create PixelVerifier - PixelVerifier verifier; + // Step 7: Create D3D12PixelValidator + printf("\n[Step 7] Creating D3D12 GPU validator...\n"); + D3D12PixelValidator validator; + if (!validator.Initialize(d3d12.GetDevice())) { + printf("[ERROR] Failed to initialize D3D12PixelValidator\n"); + vavcore_close_file(player); + vavcore_destroy_player(player); + d3d12.Cleanup(); + vavcore_cleanup(); + return 1; + } + printf("[OK] D3D12PixelValidator initialized\n"); // Step 8: Continuous frame decoding (pipeline priming approach) - printf("\n[Step 7] Decoding and verifying frames (continuous decode calls)...\n\n"); + printf("\n[Step 8] Decoding and verifying frames (GPU validation)...\n\n"); printf("[INFO] NVDEC pipeline priming: Calling decode continuously\n"); printf("[INFO] First 2 frames will be skipped for verification (priming frames)\n\n"); @@ -124,14 +133,14 @@ int main(int argc, char* argv[]) std::vector frame_tasks(MAX_FRAMES); int decode_errors = 0; + const uint32_t STRIPE_WIDTH = 4; // 4-pixel wide stripes (Red/Black alternating) - // Continuous decode loop - keep calling DecodeToSurface - // NVDEC will pipeline internally and return frames in order + // Process frames one-by-one to avoid memory pressure for (int i = 0; i < MAX_FRAMES && !vavcore_is_end_of_file(player); i++) { FrameTask& task = frame_tasks[i]; task.frame_index = i; - // Create RGBA texture for CUDA interop + // Create RGBA texture for NVDEC output (CUDA NV12ToRGBA conversion) task.texture = d3d12.CreateRGBATexture(metadata.width, metadata.height); if (!task.texture) { printf("[ERROR] Failed to create texture for frame %d\n", i); @@ -140,8 +149,7 @@ int main(int argc, char* argv[]) continue; } - // Decode frame to D3D12 surface - // This will block and wait (pending) until the frame is ready + // Decode frame to D3D12 surface (RGBA format after CUDA conversion) task.result = vavcore_decode_to_surface( player, VAVCORE_SURFACE_D3D12_RESOURCE, @@ -157,22 +165,32 @@ int main(int argc, char* argv[]) continue; } - // Readback D3D12 texture to CPU memory - task.cpu_buffer = d3d12.ReadbackTexture(task.texture, metadata.width, metadata.height); - if (!task.cpu_buffer) { - printf("Frame %3d: Readback failed\n", i); - task.texture->Release(); - task.texture = nullptr; - decode_errors++; - continue; - } + printf("Frame %3d: Decoded successfully (RGBA)\n", i); - printf("Frame %3d: Decoded successfully\n", i); + // Save first 5 frames as BMP for inspection (one at a time to reduce memory pressure) + if (i < 5) { + char filename[256]; + sprintf_s(filename, sizeof(filename), "frame_%03d.bmp", i); + bool saved = d3d12.SaveTextureToBMP(task.texture, metadata.width, metadata.height, filename); + + // If save failed, try waiting a bit and retry once + if (!saved) { + printf("Frame %3d: BMP save failed, waiting 500ms and retrying...\n", i); + Sleep(500); + saved = d3d12.SaveTextureToBMP(task.texture, metadata.width, metadata.height, filename); + if (!saved) { + printf("Frame %3d: BMP save failed after retry\n", i); + } + } + + // Wait a bit before next frame to avoid memory pressure + Sleep(100); + } } - printf("\n[Step 8] Verifying decoded frames...\n\n"); + printf("\n[Step 9] Verifying decoded frames (GPU compute shader)...\n\n"); - // Verify frames in order + // Verify frames in order using GPU compute shader int passed_frames = 0; int failed_frames = 0; int total_verified = 0; @@ -181,31 +199,39 @@ int main(int argc, char* argv[]) FrameTask& task = frame_tasks[i]; // Skip if decode failed - if (task.result != VAVCORE_SUCCESS || !task.cpu_buffer) { + if (task.result != VAVCORE_SUCCESS || !task.texture) { continue; } // Skip first 2 frames (NVDEC priming) if (i < 2) { printf("Frame %3d: PRIMING (skipped verification)\n", i); - delete[] task.cpu_buffer; - task.cpu_buffer = nullptr; continue; } - // Verify stripe pattern - bool passed = verifier.VerifyStripePattern(task.cpu_buffer, metadata.width, metadata.height); - if (passed) { - printf("Frame %3d: PASS\n", i); + // GPU validation (only 4 bytes copied to CPU!) + unsigned int error_count = 0; + bool validation_success = validator.ValidateStripePattern( + &d3d12, + task.texture, + metadata.width, + metadata.height, + STRIPE_WIDTH, + &error_count + ); + + if (!validation_success) { + printf("Frame %3d: GPU validation failed\n", i); + failed_frames++; + } else if (error_count == 0) { + printf("Frame %3d: PASS (0 pixel errors)\n", i); passed_frames++; } else { - printf("Frame %3d: FAIL\n", i); + printf("Frame %3d: FAIL (%u pixel errors)\n", i, error_count); failed_frames++; } total_verified++; - delete[] task.cpu_buffer; - task.cpu_buffer = nullptr; } // Cleanup textures @@ -213,11 +239,11 @@ int main(int argc, char* argv[]) if (task.texture) { task.texture->Release(); } - if (task.cpu_buffer) { - delete[] task.cpu_buffer; - } } + // Cleanup validator + validator.Cleanup(); + // Step 9: Print results printf("\n[RedSurfaceNVDECTest] Results:\n"); printf(" Total frames decoded: %d\n", MAX_FRAMES - decode_errors);