WIP
This commit is contained in:
@@ -108,7 +108,13 @@
|
||||
"Bash(\"./compile_validation_kernel.bat\")",
|
||||
"Bash(./compile_validation_kernel.bat)",
|
||||
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")",
|
||||
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")"
|
||||
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_1080p_stripe.webm\")",
|
||||
"Bash(\"./RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_720p_stripe.webm\")",
|
||||
"Bash(ffmpeg:*)",
|
||||
"Bash(\"/c/Program Files/Microsoft Visual Studio/2022/Community/MSBuild/Current/Bin/MSBuild.exe\" RedSurfaceNVDECTest.vcxproj //p:Configuration=Debug //p:Platform=x64 //v:minimal)",
|
||||
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_4px_stripe_av1.webm\")",
|
||||
"Bash(\"./bin/Debug/RedSurfaceNVDECTest.exe\" \"D:/Project/video-av1/sample/test_4px_stripe_720p_av1.webm\")",
|
||||
"Bash(ffprobe:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": []
|
||||
|
||||
@@ -253,6 +253,14 @@ bool D3D12SurfaceHandler::CopyRGBAFrame(CUdeviceptr src_rgba,
|
||||
UINT dst_pitch = layout.Footprint.RowPitch;
|
||||
uint32_t src_pitch = width * 4; // RGBA: 4 bytes per pixel
|
||||
|
||||
LOGF_DEBUG("[D3D12SurfaceHandler] CopyRGBAFrame DEBUG: width=%u, height=%u", width, height);
|
||||
LOGF_DEBUG(" src_pitch=%u, dst_pitch=%u", src_pitch, dst_pitch);
|
||||
LOGF_DEBUG(" D3D12 texture desc: Width=%llu, Height=%u, Format=%d",
|
||||
desc.Width, desc.Height, desc.Format);
|
||||
LOGF_DEBUG(" Footprint: Width=%u, Height=%u, Depth=%u, RowPitch=%u",
|
||||
layout.Footprint.Width, layout.Footprint.Height,
|
||||
layout.Footprint.Depth, layout.Footprint.RowPitch);
|
||||
|
||||
// Direct RGBA copy using cudaMemcpy2D (zero extra conversion needed)
|
||||
cudaError_t err = cudaMemcpy2D(
|
||||
(void*)dst_ptr, dst_pitch, // Destination: D3D12 texture with pitch
|
||||
@@ -268,6 +276,41 @@ bool D3D12SurfaceHandler::CopyRGBAFrame(CUdeviceptr src_rgba,
|
||||
}
|
||||
|
||||
LOGF_DEBUG("[D3D12SurfaceHandler] RGBA frame copied to D3D12 texture (%ux%u)", width, height);
|
||||
|
||||
// DEBUG: Read back D3D12 texture via CUDA to verify orientation
|
||||
static int d3d12_debug_count = 0;
|
||||
if (d3d12_debug_count < 1) {
|
||||
// Sample first 32x32 pixels from D3D12 texture
|
||||
const int sample_size = 32;
|
||||
uint8_t* cpu_sample = new uint8_t[sample_size * sample_size * 4];
|
||||
|
||||
cudaError_t err = cudaMemcpy2D(
|
||||
cpu_sample, sample_size * 4,
|
||||
(void*)dst_ptr, dst_pitch,
|
||||
sample_size * 4, sample_size,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
|
||||
if (err == cudaSuccess) {
|
||||
LOGF_DEBUG("[D3D12SurfaceHandler] D3D12 texture sample (32x32) via CUDA:");
|
||||
for (int y = 0; y < 4; y++) {
|
||||
char line[1024];
|
||||
int pos = 0;
|
||||
pos += snprintf(line + pos, sizeof(line) - pos, " y=%d: ", y);
|
||||
for (int x = 0; x < 16; x++) {
|
||||
uint8_t r = cpu_sample[(y * sample_size + x) * 4 + 0];
|
||||
pos += snprintf(line + pos, sizeof(line) - pos, "%3d ", r);
|
||||
}
|
||||
LOGF_DEBUG("%s", line);
|
||||
}
|
||||
} else {
|
||||
LOGF_ERROR("[D3D12SurfaceHandler] Failed to sample D3D12 texture: %s", cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
delete[] cpu_sample;
|
||||
d3d12_debug_count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ $L__BB0_2:
|
||||
ret;
|
||||
|
||||
}
|
||||
|
||||
)PTX";
|
||||
|
||||
namespace VavCore {
|
||||
@@ -223,6 +224,43 @@ bool NV12ToRGBAConverter::ConvertNV12ToRGBA(CUdeviceptr nv12_ptr, uint32_t nv12_
|
||||
dim3 gridDim((m_width + blockDim.x - 1) / blockDim.x,
|
||||
(m_height + blockDim.y - 1) / blockDim.y);
|
||||
|
||||
// DEBUG: Sample Y plane data to check orientation
|
||||
static int debug_frame_count = 0;
|
||||
if (debug_frame_count < 3) {
|
||||
// Copy first 32x32 pixels of Y plane to CPU
|
||||
const int sample_size = 32;
|
||||
uint8_t* cpu_buffer = new uint8_t[sample_size * sample_size];
|
||||
|
||||
cudaError_t err = cudaMemcpy2D(
|
||||
cpu_buffer, sample_size,
|
||||
(void*)y_plane, nv12_pitch,
|
||||
sample_size, sample_size,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
|
||||
if (err == cudaSuccess) {
|
||||
LOGF_DEBUG("[NV12ToRGBAConverter] Frame %d - Y plane sample (32x32):", debug_frame_count);
|
||||
LOGF_DEBUG(" m_width=%u, m_height=%u, nv12_pitch=%u", m_width, m_height, nv12_pitch);
|
||||
|
||||
// Print first 32 pixels of first 4 rows
|
||||
for (int y = 0; y < 4; y++) {
|
||||
char line[1024];
|
||||
int pos = 0;
|
||||
pos += snprintf(line + pos, sizeof(line) - pos, " y=%d: ", y);
|
||||
for (int x = 0; x < 32; x++) {
|
||||
uint8_t val = cpu_buffer[y * sample_size + x];
|
||||
pos += snprintf(line + pos, sizeof(line) - pos, "%3d ", val);
|
||||
}
|
||||
LOGF_DEBUG("%s", line);
|
||||
}
|
||||
} else {
|
||||
LOGF_ERROR("[NV12ToRGBAConverter] Failed to copy Y plane sample: %s", cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
delete[] cpu_buffer;
|
||||
debug_frame_count++;
|
||||
}
|
||||
|
||||
// Setup kernel parameters
|
||||
void* args[] = {
|
||||
&y_plane,
|
||||
@@ -251,6 +289,71 @@ bool NV12ToRGBAConverter::ConvertNV12ToRGBA(CUdeviceptr nv12_ptr, uint32_t nv12_
|
||||
return false;
|
||||
}
|
||||
|
||||
// DEBUG: Save RGBA buffer directly to BMP to verify CUDA kernel output
|
||||
static int rgba_debug_count = 0;
|
||||
if (rgba_debug_count < 1) {
|
||||
// Copy entire RGBA buffer to CPU
|
||||
size_t full_size = m_width * m_height * 4;
|
||||
uint8_t* cpu_rgba = new uint8_t[full_size];
|
||||
|
||||
cudaError_t err = cudaMemcpy2D(
|
||||
cpu_rgba, m_width * 4,
|
||||
(void*)m_rgbaBuffer, rgba_pitch,
|
||||
m_width * 4, m_height,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
|
||||
if (err == cudaSuccess) {
|
||||
// Save directly to BMP
|
||||
FILE* f = nullptr;
|
||||
fopen_s(&f, "cuda_kernel_output.bmp", "wb");
|
||||
if (f) {
|
||||
// BMP header
|
||||
uint32_t file_size = 54 + full_size;
|
||||
uint8_t header[54] = {
|
||||
'B', 'M',
|
||||
(uint8_t)(file_size), (uint8_t)(file_size >> 8), (uint8_t)(file_size >> 16), (uint8_t)(file_size >> 24),
|
||||
0, 0, 0, 0,
|
||||
54, 0, 0, 0,
|
||||
40, 0, 0, 0,
|
||||
(uint8_t)(m_width), (uint8_t)(m_width >> 8), (uint8_t)(m_width >> 16), (uint8_t)(m_width >> 24),
|
||||
(uint8_t)(m_height), (uint8_t)(m_height >> 8), (uint8_t)(m_height >> 16), (uint8_t)(m_height >> 24),
|
||||
1, 0,
|
||||
32, 0,
|
||||
0, 0, 0, 0,
|
||||
(uint8_t)(full_size), (uint8_t)(full_size >> 8), (uint8_t)(full_size >> 16), (uint8_t)(full_size >> 24),
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0
|
||||
};
|
||||
fwrite(header, 1, 54, f);
|
||||
|
||||
// Write pixel data (bottom-up, BMP format is BGRA)
|
||||
for (int y = m_height - 1; y >= 0; y--) {
|
||||
for (uint32_t x = 0; x < m_width; x++) {
|
||||
uint32_t idx = (y * m_width + x) * 4;
|
||||
// CUDA kernel outputs RGBA, BMP expects BGRA
|
||||
uint8_t bgra[4] = {
|
||||
cpu_rgba[idx + 2], // B from RGBA[2]
|
||||
cpu_rgba[idx + 1], // G from RGBA[1]
|
||||
cpu_rgba[idx + 0], // R from RGBA[0]
|
||||
cpu_rgba[idx + 3] // A from RGBA[3]
|
||||
};
|
||||
fwrite(bgra, 1, 4, f);
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
LOGF_DEBUG("[NV12ToRGBAConverter] Saved CUDA kernel output to cuda_kernel_output.bmp");
|
||||
}
|
||||
} else {
|
||||
LOGF_ERROR("[NV12ToRGBAConverter] Failed to copy RGBA for BMP: %s", cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
delete[] cpu_rgba;
|
||||
rgba_debug_count++;
|
||||
}
|
||||
|
||||
// Return pointer to RGBA buffer
|
||||
*out_rgba_ptr = m_rgbaBuffer;
|
||||
|
||||
|
||||
@@ -1391,6 +1391,7 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
}
|
||||
|
||||
// Copy RGBA to D3D12 texture
|
||||
LOGF_DEBUG("[DecodeToSurface] Calling CopyRGBAFrame with m_width=%u, m_height=%u", m_width, m_height);
|
||||
copySuccess = m_d3d12Handler->CopyRGBAFrame(
|
||||
rgbaPtr,
|
||||
d3d12Resource,
|
||||
|
||||
Reference in New Issue
Block a user