This commit is contained in:
2025-10-06 07:53:14 +09:00
parent 1fa499013f
commit e75d565ba2
4 changed files with 154 additions and 1 deletions

View File

@@ -108,7 +108,13 @@
"Bash(\"./compile_validation_kernel.bat\")",
"Bash(./compile_validation_kernel.bat)",
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")",
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")"
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")",
"Bash(\"./RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")",
"Bash(ffmpeg:*)",
"Bash(\"/c/Program Files/Microsoft Visual Studio/2022/Community/MSBuild/Current/Bin/MSBuild.exe\" RedSurfaceNVDECTest.vcxproj //p:Configuration=Debug //p:Platform=x64 //v:minimal)",
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_4px_stripe_av1.webm\")",
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_4px_stripe_720p_av1.webm\")",
"Bash(ffprobe:*)"
],
"deny": [],
"ask": []

View File

@@ -253,6 +253,14 @@ bool D3D12SurfaceHandler::CopyRGBAFrame(CUdeviceptr src_rgba,
UINT dst_pitch = layout.Footprint.RowPitch;
uint32_t src_pitch = width * 4; // RGBA: 4 bytes per pixel
LOGF_DEBUG("[D3D12SurfaceHandler] CopyRGBAFrame DEBUG: width=%u, height=%u", width, height);
LOGF_DEBUG(" src_pitch=%u, dst_pitch=%u", src_pitch, dst_pitch);
LOGF_DEBUG(" D3D12 texture desc: Width=%llu, Height=%u, Format=%d",
desc.Width, desc.Height, desc.Format);
LOGF_DEBUG(" Footprint: Width=%u, Height=%u, Depth=%u, RowPitch=%u",
layout.Footprint.Width, layout.Footprint.Height,
layout.Footprint.Depth, layout.Footprint.RowPitch);
// Direct RGBA copy using cudaMemcpy2D (zero extra conversion needed)
cudaError_t err = cudaMemcpy2D(
(void*)dst_ptr, dst_pitch, // Destination: D3D12 texture with pitch
@@ -268,6 +276,41 @@ bool D3D12SurfaceHandler::CopyRGBAFrame(CUdeviceptr src_rgba,
}
LOGF_DEBUG("[D3D12SurfaceHandler] RGBA frame copied to D3D12 texture (%ux%u)", width, height);
// DEBUG: Read back D3D12 texture via CUDA to verify orientation
static int d3d12_debug_count = 0;
if (d3d12_debug_count < 1) {
// Sample first 32x32 pixels from D3D12 texture
const int sample_size = 32;
uint8_t* cpu_sample = new uint8_t[sample_size * sample_size * 4];
cudaError_t err = cudaMemcpy2D(
cpu_sample, sample_size * 4,
(void*)dst_ptr, dst_pitch,
sample_size * 4, sample_size,
cudaMemcpyDeviceToHost
);
if (err == cudaSuccess) {
LOGF_DEBUG("[D3D12SurfaceHandler] D3D12 texture sample (32x32) via CUDA:");
for (int y = 0; y < 4; y++) {
char line[1024];
int pos = 0;
pos += snprintf(line + pos, sizeof(line) - pos, " y=%d: ", y);
for (int x = 0; x < 16; x++) {
uint8_t r = cpu_sample[(y * sample_size + x) * 4 + 0];
pos += snprintf(line + pos, sizeof(line) - pos, "%3d ", r);
}
LOGF_DEBUG("%s", line);
}
} else {
LOGF_ERROR("[D3D12SurfaceHandler] Failed to sample D3D12 texture: %s", cudaGetErrorString(err));
}
delete[] cpu_sample;
d3d12_debug_count++;
}
return true;
}

View File

@@ -115,6 +115,7 @@ $L__BB0_2:
ret;
}
)PTX";
namespace VavCore {
@@ -223,6 +224,43 @@ bool NV12ToRGBAConverter::ConvertNV12ToRGBA(CUdeviceptr nv12_ptr, uint32_t nv12_
dim3 gridDim((m_width + blockDim.x - 1) / blockDim.x,
(m_height + blockDim.y - 1) / blockDim.y);
// DEBUG: Sample Y plane data to check orientation
static int debug_frame_count = 0;
if (debug_frame_count < 3) {
// Copy first 32x32 pixels of Y plane to CPU
const int sample_size = 32;
uint8_t* cpu_buffer = new uint8_t[sample_size * sample_size];
cudaError_t err = cudaMemcpy2D(
cpu_buffer, sample_size,
(void*)y_plane, nv12_pitch,
sample_size, sample_size,
cudaMemcpyDeviceToHost
);
if (err == cudaSuccess) {
LOGF_DEBUG("[NV12ToRGBAConverter] Frame %d - Y plane sample (32x32):", debug_frame_count);
LOGF_DEBUG(" m_width=%u, m_height=%u, nv12_pitch=%u", m_width, m_height, nv12_pitch);
// Print first 32 pixels of first 4 rows
for (int y = 0; y < 4; y++) {
char line[1024];
int pos = 0;
pos += snprintf(line + pos, sizeof(line) - pos, " y=%d: ", y);
for (int x = 0; x < 32; x++) {
uint8_t val = cpu_buffer[y * sample_size + x];
pos += snprintf(line + pos, sizeof(line) - pos, "%3d ", val);
}
LOGF_DEBUG("%s", line);
}
} else {
LOGF_ERROR("[NV12ToRGBAConverter] Failed to copy Y plane sample: %s", cudaGetErrorString(err));
}
delete[] cpu_buffer;
debug_frame_count++;
}
// Setup kernel parameters
void* args[] = {
&y_plane,
@@ -251,6 +289,71 @@ bool NV12ToRGBAConverter::ConvertNV12ToRGBA(CUdeviceptr nv12_ptr, uint32_t nv12_
return false;
}
// DEBUG: Save RGBA buffer directly to BMP to verify CUDA kernel output
static int rgba_debug_count = 0;
if (rgba_debug_count < 1) {
// Copy entire RGBA buffer to CPU
size_t full_size = m_width * m_height * 4;
uint8_t* cpu_rgba = new uint8_t[full_size];
cudaError_t err = cudaMemcpy2D(
cpu_rgba, m_width * 4,
(void*)m_rgbaBuffer, rgba_pitch,
m_width * 4, m_height,
cudaMemcpyDeviceToHost
);
if (err == cudaSuccess) {
// Save directly to BMP
FILE* f = nullptr;
fopen_s(&f, "cuda_kernel_output.bmp", "wb");
if (f) {
// BMP header
uint32_t file_size = 54 + full_size;
uint8_t header[54] = {
'B', 'M',
(uint8_t)(file_size), (uint8_t)(file_size >> 8), (uint8_t)(file_size >> 16), (uint8_t)(file_size >> 24),
0, 0, 0, 0,
54, 0, 0, 0,
40, 0, 0, 0,
(uint8_t)(m_width), (uint8_t)(m_width >> 8), (uint8_t)(m_width >> 16), (uint8_t)(m_width >> 24),
(uint8_t)(m_height), (uint8_t)(m_height >> 8), (uint8_t)(m_height >> 16), (uint8_t)(m_height >> 24),
1, 0,
32, 0,
0, 0, 0, 0,
(uint8_t)(full_size), (uint8_t)(full_size >> 8), (uint8_t)(full_size >> 16), (uint8_t)(full_size >> 24),
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0
};
fwrite(header, 1, 54, f);
// Write pixel data (bottom-up, BMP format is BGRA)
for (int y = m_height - 1; y >= 0; y--) {
for (uint32_t x = 0; x < m_width; x++) {
uint32_t idx = (y * m_width + x) * 4;
// CUDA kernel outputs RGBA, BMP expects BGRA
uint8_t bgra[4] = {
cpu_rgba[idx + 2], // B from RGBA[2]
cpu_rgba[idx + 1], // G from RGBA[1]
cpu_rgba[idx + 0], // R from RGBA[0]
cpu_rgba[idx + 3] // A from RGBA[3]
};
fwrite(bgra, 1, 4, f);
}
}
fclose(f);
LOGF_DEBUG("[NV12ToRGBAConverter] Saved CUDA kernel output to cuda_kernel_output.bmp");
}
} else {
LOGF_ERROR("[NV12ToRGBAConverter] Failed to copy RGBA for BMP: %s", cudaGetErrorString(err));
}
delete[] cpu_rgba;
rgba_debug_count++;
}
// Return pointer to RGBA buffer
*out_rgba_ptr = m_rgbaBuffer;

View File

@@ -1391,6 +1391,7 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
}
// Copy RGBA to D3D12 texture
LOGF_DEBUG("[DecodeToSurface] Calling CopyRGBAFrame with m_width=%u, m_height=%u", m_width, m_height);
copySuccess = m_d3d12Handler->CopyRGBAFrame(
rgbaPtr,
d3d12Resource,