WIP
This commit is contained in:
@@ -32,36 +32,22 @@ bool D3D12SurfaceHandler::CopyNV12Frame(CUdeviceptr src_frame,
|
||||
}
|
||||
|
||||
// Get D3D12 texture layout for both Y and UV planes
|
||||
// CRITICAL: Must use the ACTUAL texture descriptor as-is (including any padding)
|
||||
// DO NOT override height - GetCopyableFootprints needs the real allocation size
|
||||
D3D12_RESOURCE_DESC desc = dst_texture->GetDesc();
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layouts[2];
|
||||
UINT num_rows[2] = {0};
|
||||
UINT64 row_sizes[2] = {0};
|
||||
UINT64 total_bytes = 0;
|
||||
|
||||
// CRITICAL: Use actual video height, not texture allocation height
|
||||
// Texture was allocated with height * 1.5 for NV12 layout, but GetCopyableFootprints
|
||||
// should use actual video height to calculate correct plane offsets
|
||||
D3D12_RESOURCE_DESC actual_video_desc = desc;
|
||||
actual_video_desc.Height = height; // Use actual video height, not allocation height
|
||||
|
||||
// CRITICAL: Query BOTH subresources in a SINGLE call to get correct relative offsets
|
||||
// NV12 has 2 subresources (plane 0: Y, plane 1: UV)
|
||||
m_device->GetCopyableFootprints(&actual_video_desc, 0, 2, 0,
|
||||
// Use descriptor directly without modification (preserves allocated height/pitch)
|
||||
m_device->GetCopyableFootprints(&desc, 0, 2, 0,
|
||||
layouts, num_rows, row_sizes, &total_bytes);
|
||||
|
||||
uint32_t y_dst_pitch = layouts[0].Footprint.RowPitch;
|
||||
uint32_t uv_dst_pitch = layouts[1].Footprint.RowPitch;
|
||||
UINT64 uv_offset = layouts[1].Offset;
|
||||
|
||||
char buf[512];
|
||||
sprintf_s(buf, "[D3D12SurfaceHandler] Y: width=%u, height=%u, srcPitch=%u, dstPitch=%u\n",
|
||||
width, height, src_pitch, y_dst_pitch);
|
||||
OutputDebugStringA(buf);
|
||||
|
||||
sprintf_s(buf, "[D3D12SurfaceHandler] UV: width=%u, height=%u, srcPitch=%u, dstPitch=%u, offset=%llu\n",
|
||||
width, height / 2, src_pitch, uv_dst_pitch, uv_offset);
|
||||
OutputDebugStringA(buf);
|
||||
|
||||
// Copy Y plane
|
||||
if (!CopyYPlane(src_frame, src_pitch,
|
||||
dst_ptr, y_dst_pitch,
|
||||
@@ -70,7 +56,7 @@ bool D3D12SurfaceHandler::CopyNV12Frame(CUdeviceptr src_frame,
|
||||
}
|
||||
|
||||
// Copy UV plane
|
||||
CUdeviceptr src_uv = src_frame + (src_pitch * height);
|
||||
CUdeviceptr src_uv = src_frame + (static_cast<UINT64>(src_pitch) * height);
|
||||
CUdeviceptr dst_uv = dst_ptr + uv_offset;
|
||||
|
||||
if (!CopyUVPlane(src_uv, src_pitch,
|
||||
@@ -79,7 +65,6 @@ bool D3D12SurfaceHandler::CopyNV12Frame(CUdeviceptr src_frame,
|
||||
return false;
|
||||
}
|
||||
|
||||
OutputDebugStringA("[D3D12SurfaceHandler] NV12 frame copied successfully\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -99,12 +84,10 @@ bool D3D12SurfaceHandler::CopyYPlane(CUdeviceptr src, uint32_t src_pitch,
|
||||
CUdeviceptr dst, uint32_t dst_pitch,
|
||||
uint32_t width, uint32_t height)
|
||||
{
|
||||
// Copy Y plane: single 8-bit channel
|
||||
// Width parameter is in bytes (same as pixel width for 8-bit format)
|
||||
cudaError_t err = cudaMemcpy2D(
|
||||
(void*)dst, dst_pitch,
|
||||
(void*)src, src_pitch,
|
||||
width, height,
|
||||
width, height, // Use the logical width, not the pitch, for the copy width.
|
||||
cudaMemcpyDeviceToDevice
|
||||
);
|
||||
|
||||
@@ -115,8 +98,6 @@ bool D3D12SurfaceHandler::CopyYPlane(CUdeviceptr src, uint32_t src_pitch,
|
||||
OutputDebugStringA(buf);
|
||||
return false;
|
||||
}
|
||||
|
||||
OutputDebugStringA("[D3D12SurfaceHandler] Y plane copied\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -124,13 +105,11 @@ bool D3D12SurfaceHandler::CopyUVPlane(CUdeviceptr src, uint32_t src_pitch,
|
||||
CUdeviceptr dst, uint32_t dst_pitch,
|
||||
uint32_t width, uint32_t height)
|
||||
{
|
||||
// Copy UV plane: interleaved U and V (NV12 format)
|
||||
// Width in bytes = width of Y plane (because U and V are interleaved)
|
||||
// Height = half of Y plane height
|
||||
// For interleaved NV12, the byte width of the UV plane is the same as the Y plane.
|
||||
cudaError_t err = cudaMemcpy2D(
|
||||
(void*)dst, dst_pitch,
|
||||
(void*)src, src_pitch,
|
||||
width, height,
|
||||
width, height, // Use the logical width, not the pitch, for the copy width.
|
||||
cudaMemcpyDeviceToDevice
|
||||
);
|
||||
|
||||
@@ -141,8 +120,6 @@ bool D3D12SurfaceHandler::CopyUVPlane(CUdeviceptr src, uint32_t src_pitch,
|
||||
OutputDebugStringA(buf);
|
||||
return false;
|
||||
}
|
||||
|
||||
OutputDebugStringA("[D3D12SurfaceHandler] UV plane copied\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -546,12 +546,7 @@ bool NVDECAV1Decoder::InitializeCUDA() {
|
||||
}
|
||||
|
||||
// If D3D device is set, enable D3D interop on this context
|
||||
if (m_d3d11Device) {
|
||||
OutputDebugStringA("[InitializeCUDA] D3D11 device detected, CUDA-D3D11 interop will be enabled\n");
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[InitializeCUDA] D3D11 device: %p, CUDA context: %p\n", m_d3d11Device, m_cuContext);
|
||||
OutputDebugStringA(debug_buf);
|
||||
} else if (m_d3d12Device) {
|
||||
if (m_d3d12Device) {
|
||||
OutputDebugStringA("[InitializeCUDA] D3D12 device detected, CUDA-D3D12 interop will be enabled\n");
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[InitializeCUDA] D3D12 device: %p, CUDA context: %p\n", m_d3d12Device, m_cuContext);
|
||||
@@ -783,12 +778,11 @@ bool NVDECAV1Decoder::SupportsSurfaceType(VavCoreSurfaceType type) const {
|
||||
switch (type) {
|
||||
case VAVCORE_SURFACE_CUDA_DEVICE:
|
||||
return true; // NVDEC's primary strength - CUDA device pointers
|
||||
case VAVCORE_SURFACE_D3D11_TEXTURE:
|
||||
return true; // Via CUDA-D3D11 interop
|
||||
case VAVCORE_SURFACE_D3D12_RESOURCE:
|
||||
return true; // Via CUDA-D3D12 interop
|
||||
case VAVCORE_SURFACE_CPU:
|
||||
return true; // Always support CPU fallback
|
||||
case VAVCORE_SURFACE_D3D11_TEXTURE:
|
||||
case VAVCORE_SURFACE_AMF_SURFACE:
|
||||
default:
|
||||
return false;
|
||||
@@ -803,25 +797,7 @@ bool NVDECAV1Decoder::SetD3DDevice(void* d3d_device, VavCoreSurfaceType type) {
|
||||
|
||||
try {
|
||||
bool success = false;
|
||||
if (type == VAVCORE_SURFACE_D3D11_TEXTURE) {
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[SetD3DDevice] Setting D3D11 device: %p\n", d3d_device);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
// Store D3D11 device
|
||||
m_d3d11Device = d3d_device;
|
||||
m_preferredSurfaceType = type;
|
||||
|
||||
// DON'T reinitialize if already initialized - this would invalidate existing decoded frames
|
||||
if (m_initialized) {
|
||||
OutputDebugStringA("[SetD3DDevice] Decoder already initialized, D3D11 device set for DecodeToSurface\n");
|
||||
OutputDebugStringA("[SetD3DDevice] Note: D3D11 zero-copy will be available for DecodeToSurface calls\n");
|
||||
} else {
|
||||
OutputDebugStringA("[SetD3DDevice] D3D11 device set, will be used when Initialize() is called\n");
|
||||
}
|
||||
|
||||
success = true;
|
||||
} else if (type == VAVCORE_SURFACE_D3D12_RESOURCE) {
|
||||
if (type == VAVCORE_SURFACE_D3D12_RESOURCE) {
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[SetD3DDevice] Setting D3D12 device: %p\n", d3d_device);
|
||||
OutputDebugStringA(debug_buf);
|
||||
@@ -1013,153 +989,6 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
// Unmap the frame (this is just for getting device pointer, actual usage will be later)
|
||||
cuvidUnmapVideoFrame(m_decoder, devicePtr);
|
||||
|
||||
} else if (target_type == VAVCORE_SURFACE_D3D11_TEXTURE) {
|
||||
// For D3D11 texture, we need CUDA-D3D interop
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] D3D11 texture path\n");
|
||||
|
||||
if (!target_surface || !m_d3d11Device) {
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] ERROR: target_surface or m_d3d11Device is null\n");
|
||||
LogError("D3D11 texture or device not available");
|
||||
return false;
|
||||
}
|
||||
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] target_surface=%p, m_d3d11Device=%p\n",
|
||||
target_surface, m_d3d11Device);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
// Register D3D texture with CUDA
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Calling RegisterD3DResourceWithCUDA...\n");
|
||||
bool registerSuccess = RegisterD3DResourceWithCUDA(target_surface);
|
||||
if (!registerSuccess) {
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] ERROR: RegisterD3DResourceWithCUDA failed\n");
|
||||
LogError("Failed to register D3D texture with CUDA");
|
||||
return false;
|
||||
}
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] RegisterD3DResourceWithCUDA succeeded\n");
|
||||
|
||||
// Wait for decoded frame to be available (blocks until callback completes)
|
||||
int frameIdx = -1;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(m_frameQueueMutex);
|
||||
m_frameAvailable.wait(lock, [this] {
|
||||
return !m_decodedFrameIndices.empty();
|
||||
});
|
||||
frameIdx = m_decodedFrameIndices.front();
|
||||
m_decodedFrameIndices.pop();
|
||||
}
|
||||
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] Using decoded frame from queue: frameIdx=%d\n", frameIdx);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
// Map decoded NVDEC frame
|
||||
CUVIDPROCPARAMS procParams = {};
|
||||
procParams.progressive_frame = 1;
|
||||
procParams.top_field_first = 0;
|
||||
procParams.unpaired_field = 0;
|
||||
|
||||
CUdeviceptr srcDevicePtr = 0;
|
||||
unsigned int srcPitch = 0;
|
||||
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] Calling cuvidMapVideoFrame (frameIdx=%d)\n", frameIdx);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
result = cuvidMapVideoFrame(m_decoder, frameIdx, &srcDevicePtr, &srcPitch, &procParams);
|
||||
|
||||
if (result == CUDA_SUCCESS) {
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuvidMapVideoFrame succeeded: srcDevicePtr=%p, srcPitch=%u\n",
|
||||
(void*)srcDevicePtr, srcPitch);
|
||||
OutputDebugStringA(debug_buf);
|
||||
} else {
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuvidMapVideoFrame failed with code %d\n", result);
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogCUDAError(result, "cuvidMapVideoFrame");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Map D3D texture to CUDA array
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Calling MapCUDADevicePtrFromD3D...\n");
|
||||
CUdeviceptr dstDevicePtr;
|
||||
size_t dstPitch;
|
||||
bool mapSuccess = MapCUDADevicePtrFromD3D(dstDevicePtr, dstPitch);
|
||||
if (!mapSuccess) {
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] ERROR: MapCUDADevicePtrFromD3D failed\n");
|
||||
LogError("Failed to map CUDA device pointer from D3D");
|
||||
cuvidUnmapVideoFrame(m_decoder, srcDevicePtr);
|
||||
return false;
|
||||
}
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] MapCUDADevicePtrFromD3D succeeded\n");
|
||||
|
||||
// NV12 D3D11 texture: Get CUDA array (entire NV12 surface including Y and UV)
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Getting CUDA array for NV12 texture...\n");
|
||||
CUarray cudaArray;
|
||||
result = cuGraphicsSubResourceGetMappedArray(&cudaArray, m_cudaGraphicsResource, 0, 0);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuGraphicsSubResourceGetMappedArray failed with code %d\n", result);
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogCUDAError(result, "cuGraphicsSubResourceGetMappedArray");
|
||||
UnmapCUDADevicePtr();
|
||||
cuvidUnmapVideoFrame(m_decoder, srcDevicePtr);
|
||||
return false;
|
||||
}
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] Got CUDA array: %p\n", cudaArray);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
// Copy Y plane from NVDEC to D3D11 NV12 texture
|
||||
CUDA_MEMCPY2D copyParamsY = {};
|
||||
copyParamsY.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
copyParamsY.srcDevice = srcDevicePtr;
|
||||
copyParamsY.srcPitch = srcPitch;
|
||||
copyParamsY.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
copyParamsY.dstArray = cudaArray;
|
||||
copyParamsY.WidthInBytes = m_width;
|
||||
copyParamsY.Height = m_height;
|
||||
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Copying Y plane...\n");
|
||||
result = cuMemcpy2D(©ParamsY);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuMemcpy2D (Y plane) failed with code %d\n", result);
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogCUDAError(result, "cuMemcpy2D (Y plane)");
|
||||
UnmapCUDADevicePtr();
|
||||
cuvidUnmapVideoFrame(m_decoder, srcDevicePtr);
|
||||
return false;
|
||||
}
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Y plane copied successfully\n");
|
||||
|
||||
// Copy UV plane from NVDEC to D3D11 NV12 texture (UV starts at row m_height)
|
||||
CUDA_MEMCPY2D copyParamsUV = {};
|
||||
copyParamsUV.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
copyParamsUV.srcDevice = srcDevicePtr + (srcPitch * m_height);
|
||||
copyParamsUV.srcPitch = srcPitch;
|
||||
copyParamsUV.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
copyParamsUV.dstArray = cudaArray;
|
||||
copyParamsUV.dstY = m_height; // UV plane starts at row m_height in NV12 layout
|
||||
copyParamsUV.WidthInBytes = m_width; // NV12 UV is interleaved, same width as Y
|
||||
copyParamsUV.Height = m_height / 2;
|
||||
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Copying UV plane...\n");
|
||||
result = cuMemcpy2D(©ParamsUV);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuMemcpy2D (UV plane) failed with code %d\n", result);
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogCUDAError(result, "cuMemcpy2D (UV plane)");
|
||||
UnmapCUDADevicePtr();
|
||||
cuvidUnmapVideoFrame(m_decoder, srcDevicePtr);
|
||||
return false;
|
||||
}
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] UV plane copied successfully\n");
|
||||
|
||||
// Unmap and cleanup
|
||||
UnmapCUDADevicePtr();
|
||||
cuvidUnmapVideoFrame(m_decoder, srcDevicePtr);
|
||||
|
||||
// Fill output frame metadata
|
||||
output_frame.width = m_width;
|
||||
output_frame.height = m_height;
|
||||
output_frame.color_space = ColorSpace::YUV420P;
|
||||
output_frame.frame_index = m_framesDecoded;
|
||||
output_frame.timestamp_seconds = static_cast<double>(m_framesDecoded) / 30.0;
|
||||
} else if (target_type == VAVCORE_SURFACE_D3D12_RESOURCE) {
|
||||
// D3D12 resource path using D3D12SurfaceHandler
|
||||
OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] D3D12 resource path\n");
|
||||
@@ -1275,133 +1104,6 @@ VavCoreSurfaceType NVDECAV1Decoder::GetOptimalSurfaceType() const {
|
||||
|
||||
// CUDA surface helper methods implementation
|
||||
|
||||
bool NVDECAV1Decoder::RegisterD3DResourceWithCUDA(void* d3d_texture) {
|
||||
if (!d3d_texture) {
|
||||
OutputDebugStringA("[RegisterD3DResourceWithCUDA] ERROR: d3d_texture is null\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] d3d_texture=%p\n", d3d_texture);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
try {
|
||||
OutputDebugStringA("[RegisterD3DResourceWithCUDA] Calling cuGraphicsD3D11RegisterResource...\n");
|
||||
CUresult result = cuGraphicsD3D11RegisterResource(&m_cudaGraphicsResource,
|
||||
static_cast<ID3D11Resource*>(d3d_texture),
|
||||
CU_GRAPHICS_REGISTER_FLAGS_NONE);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] cuGraphicsD3D11RegisterResource failed with code %d\n", result);
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogCUDAError(result, "cuGraphicsD3D11RegisterResource");
|
||||
return false;
|
||||
}
|
||||
|
||||
sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] Success, m_cudaGraphicsResource=%p\n", m_cudaGraphicsResource);
|
||||
OutputDebugStringA(debug_buf);
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] Exception: %s\n", e.what());
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogError("Exception registering D3D resource with CUDA");
|
||||
return false;
|
||||
}
|
||||
catch (...) {
|
||||
OutputDebugStringA("[RegisterD3DResourceWithCUDA] Unknown exception\n");
|
||||
LogError("Exception registering D3D resource with CUDA");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool NVDECAV1Decoder::MapCUDADevicePtrFromD3D(CUdeviceptr& device_ptr, size_t& pitch) {
|
||||
if (!m_cudaGraphicsResource) {
|
||||
OutputDebugStringA("[MapCUDADevicePtrFromD3D] ERROR: m_cudaGraphicsResource is null\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[MapCUDADevicePtrFromD3D] m_cudaGraphicsResource=%p, m_stream=%p\n",
|
||||
m_cudaGraphicsResource, m_stream);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
try {
|
||||
// Map graphics resource
|
||||
OutputDebugStringA("[MapCUDADevicePtrFromD3D] Calling cuGraphicsMapResources...\n");
|
||||
CUresult result = cuGraphicsMapResources(1, &m_cudaGraphicsResource, m_stream);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
sprintf_s(debug_buf, "[MapCUDADevicePtrFromD3D] cuGraphicsMapResources failed with code %d\n", result);
|
||||
OutputDebugStringA(debug_buf);
|
||||
LogCUDAError(result, "cuGraphicsMapResources");
|
||||
return false;
|
||||
}
|
||||
OutputDebugStringA("[MapCUDADevicePtrFromD3D] cuGraphicsMapResources succeeded\n");
|
||||
|
||||
// D3D11 textures are mapped as CUDA arrays, not linear memory
|
||||
OutputDebugStringA("[MapCUDADevicePtrFromD3D] D3D11 textures map to CUDA arrays, using cuGraphicsSubResourceGetMappedArray\n");
|
||||
|
||||
// Return 0 for device_ptr to indicate array-based access
|
||||
device_ptr = 0;
|
||||
pitch = m_width;
|
||||
|
||||
sprintf_s(debug_buf, "[MapCUDADevicePtrFromD3D] Mapped as CUDA array (resource=%p)\n",
|
||||
m_cudaGraphicsResource);
|
||||
OutputDebugStringA(debug_buf);
|
||||
return true;
|
||||
}
|
||||
catch (...) {
|
||||
OutputDebugStringA("[MapCUDADevicePtrFromD3D] Exception caught\n");
|
||||
LogError("Exception mapping CUDA device pointer from D3D");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool NVDECAV1Decoder::UnmapCUDADevicePtr() {
|
||||
if (!m_cudaGraphicsResource) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
CUresult result = cuGraphicsUnmapResources(1, &m_cudaGraphicsResource, m_stream);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LogCUDAError(result, "cuGraphicsUnmapResources");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Unregister resource
|
||||
result = cuGraphicsUnregisterResource(m_cudaGraphicsResource);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LogCUDAError(result, "cuGraphicsUnregisterResource");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_cudaGraphicsResource = nullptr;
|
||||
return true;
|
||||
}
|
||||
catch (...) {
|
||||
LogError("Exception unmapping CUDA device pointer");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool NVDECAV1Decoder::SetupCUDAD3D11Interop(void* d3d_device) {
|
||||
if (!m_cuContext || !d3d_device) {
|
||||
OutputDebugStringA("[SetupCUDAD3D11Interop] ERROR: CUDA context or D3D11 device is null\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
OutputDebugStringA("[SetupCUDAD3D11Interop] Setting up CUDA-D3D11 interop...\n");
|
||||
|
||||
// Store the D3D11 device for later use
|
||||
m_d3d11Device = d3d_device;
|
||||
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[SetupCUDAD3D11Interop] D3D11 device stored: %p\n", m_d3d11Device);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NVDECAV1Decoder::SetupCUDAD3D12Interop(void* d3d_device) {
|
||||
if (!m_cuContext || !d3d_device) {
|
||||
OutputDebugStringA("[SetupCUDAD3D12Interop] ERROR: CUDA context or D3D12 device is null\n");
|
||||
|
||||
@@ -11,20 +11,6 @@ namespace VavCore {
|
||||
class D3D12SurfaceHandler;
|
||||
}
|
||||
|
||||
//// Prevent TIMECODE conflicts by defining it before Windows headers
|
||||
//#define WIN32_LEAN_AND_MEAN
|
||||
//#define NOMINMAX
|
||||
//
|
||||
//// Prevent specific Windows header conflicts
|
||||
//#define TIMECODE TIMECODE_WIN32
|
||||
//#include <cuda.h>
|
||||
//#include <nvcuvid.h>
|
||||
//#include <cuviddec.h>
|
||||
//#include <cudaD3D11.h>
|
||||
//#include <nvcuvid.h>
|
||||
//#include <dxgi.h>
|
||||
//#undef TIMECODE
|
||||
|
||||
// Forward declaration for CUDA external memory type
|
||||
typedef struct CUexternalMemory_st* cudaExternalMemory_t;
|
||||
typedef struct CUexternalSemaphore_st* cudaExternalSemaphore_t;
|
||||
@@ -112,9 +98,7 @@ private:
|
||||
CUfunction m_kernel = nullptr;
|
||||
|
||||
// D3D-CUDA interop
|
||||
void* m_d3d11Device = nullptr; // ID3D11Device*
|
||||
void* m_d3d12Device = nullptr; // ID3D12Device*
|
||||
CUgraphicsResource m_cudaGraphicsResource = nullptr;
|
||||
VavCoreSurfaceType m_preferredSurfaceType = VAVCORE_SURFACE_CPU;
|
||||
|
||||
// D3D12-CUDA Asynchronous Synchronization
|
||||
@@ -150,11 +134,7 @@ private:
|
||||
void CleanupCUDA();
|
||||
|
||||
// CUDA surface helper methods
|
||||
bool SetupCUDAD3D11Interop(void* d3d_device);
|
||||
bool SetupCUDAD3D12Interop(void* d3d_device);
|
||||
bool RegisterD3DResourceWithCUDA(void* d3d_texture);
|
||||
bool MapCUDADevicePtrFromD3D(CUdeviceptr& device_ptr, size_t& pitch);
|
||||
bool UnmapCUDADevicePtr();
|
||||
|
||||
// NVDEC callbacks
|
||||
static int CUDAAPI HandleVideoSequence(void* user_data, CUVIDEOFORMAT* format);
|
||||
|
||||
Reference in New Issue
Block a user