From 47a93404a09948894378856b5de3d4ef659123fb Mon Sep 17 00:00:00 2001 From: ened Date: Fri, 3 Oct 2025 16:21:08 +0900 Subject: [PATCH] WIP --- .../src/Decoder/D3D12SurfaceHandler.cpp | 39 +-- .../vavcore/src/Decoder/NVDECAV1Decoder.cpp | 304 +----------------- .../vavcore/src/Decoder/NVDECAV1Decoder.h | 20 -- 3 files changed, 11 insertions(+), 352 deletions(-) diff --git a/vav2/platforms/windows/vavcore/src/Decoder/D3D12SurfaceHandler.cpp b/vav2/platforms/windows/vavcore/src/Decoder/D3D12SurfaceHandler.cpp index a1c3ff9..f5a3cf8 100644 --- a/vav2/platforms/windows/vavcore/src/Decoder/D3D12SurfaceHandler.cpp +++ b/vav2/platforms/windows/vavcore/src/Decoder/D3D12SurfaceHandler.cpp @@ -32,36 +32,22 @@ bool D3D12SurfaceHandler::CopyNV12Frame(CUdeviceptr src_frame, } // Get D3D12 texture layout for both Y and UV planes + // CRITICAL: Must use the ACTUAL texture descriptor as-is (including any padding) + // DO NOT override height - GetCopyableFootprints needs the real allocation size D3D12_RESOURCE_DESC desc = dst_texture->GetDesc(); D3D12_PLACED_SUBRESOURCE_FOOTPRINT layouts[2]; UINT num_rows[2] = {0}; UINT64 row_sizes[2] = {0}; UINT64 total_bytes = 0; - // CRITICAL: Use actual video height, not texture allocation height - // Texture was allocated with height * 1.5 for NV12 layout, but GetCopyableFootprints - // should use actual video height to calculate correct plane offsets - D3D12_RESOURCE_DESC actual_video_desc = desc; - actual_video_desc.Height = height; // Use actual video height, not allocation height - - // CRITICAL: Query BOTH subresources in a SINGLE call to get correct relative offsets - // NV12 has 2 subresources (plane 0: Y, plane 1: UV) - m_device->GetCopyableFootprints(&actual_video_desc, 0, 2, 0, + // Use descriptor directly without modification (preserves allocated height/pitch) + m_device->GetCopyableFootprints(&desc, 0, 2, 0, layouts, num_rows, row_sizes, &total_bytes); uint32_t y_dst_pitch = layouts[0].Footprint.RowPitch; uint32_t uv_dst_pitch = layouts[1].Footprint.RowPitch; UINT64 uv_offset = layouts[1].Offset; - char buf[512]; - sprintf_s(buf, "[D3D12SurfaceHandler] Y: width=%u, height=%u, srcPitch=%u, dstPitch=%u\n", - width, height, src_pitch, y_dst_pitch); - OutputDebugStringA(buf); - - sprintf_s(buf, "[D3D12SurfaceHandler] UV: width=%u, height=%u, srcPitch=%u, dstPitch=%u, offset=%llu\n", - width, height / 2, src_pitch, uv_dst_pitch, uv_offset); - OutputDebugStringA(buf); - // Copy Y plane if (!CopyYPlane(src_frame, src_pitch, dst_ptr, y_dst_pitch, @@ -70,7 +56,7 @@ bool D3D12SurfaceHandler::CopyNV12Frame(CUdeviceptr src_frame, } // Copy UV plane - CUdeviceptr src_uv = src_frame + (src_pitch * height); + CUdeviceptr src_uv = src_frame + (static_cast(src_pitch) * height); CUdeviceptr dst_uv = dst_ptr + uv_offset; if (!CopyUVPlane(src_uv, src_pitch, @@ -79,7 +65,6 @@ bool D3D12SurfaceHandler::CopyNV12Frame(CUdeviceptr src_frame, return false; } - OutputDebugStringA("[D3D12SurfaceHandler] NV12 frame copied successfully\n"); return true; } @@ -99,12 +84,10 @@ bool D3D12SurfaceHandler::CopyYPlane(CUdeviceptr src, uint32_t src_pitch, CUdeviceptr dst, uint32_t dst_pitch, uint32_t width, uint32_t height) { - // Copy Y plane: single 8-bit channel - // Width parameter is in bytes (same as pixel width for 8-bit format) cudaError_t err = cudaMemcpy2D( (void*)dst, dst_pitch, (void*)src, src_pitch, - width, height, + width, height, // Use the logical width, not the pitch, for the copy width. cudaMemcpyDeviceToDevice ); @@ -115,8 +98,6 @@ bool D3D12SurfaceHandler::CopyYPlane(CUdeviceptr src, uint32_t src_pitch, OutputDebugStringA(buf); return false; } - - OutputDebugStringA("[D3D12SurfaceHandler] Y plane copied\n"); return true; } @@ -124,13 +105,11 @@ bool D3D12SurfaceHandler::CopyUVPlane(CUdeviceptr src, uint32_t src_pitch, CUdeviceptr dst, uint32_t dst_pitch, uint32_t width, uint32_t height) { - // Copy UV plane: interleaved U and V (NV12 format) - // Width in bytes = width of Y plane (because U and V are interleaved) - // Height = half of Y plane height + // For interleaved NV12, the byte width of the UV plane is the same as the Y plane. cudaError_t err = cudaMemcpy2D( (void*)dst, dst_pitch, (void*)src, src_pitch, - width, height, + width, height, // Use the logical width, not the pitch, for the copy width. cudaMemcpyDeviceToDevice ); @@ -141,8 +120,6 @@ bool D3D12SurfaceHandler::CopyUVPlane(CUdeviceptr src, uint32_t src_pitch, OutputDebugStringA(buf); return false; } - - OutputDebugStringA("[D3D12SurfaceHandler] UV plane copied\n"); return true; } diff --git a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp index 642d849..6c5bde9 100644 --- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp +++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp @@ -546,12 +546,7 @@ bool NVDECAV1Decoder::InitializeCUDA() { } // If D3D device is set, enable D3D interop on this context - if (m_d3d11Device) { - OutputDebugStringA("[InitializeCUDA] D3D11 device detected, CUDA-D3D11 interop will be enabled\n"); - char debug_buf[256]; - sprintf_s(debug_buf, "[InitializeCUDA] D3D11 device: %p, CUDA context: %p\n", m_d3d11Device, m_cuContext); - OutputDebugStringA(debug_buf); - } else if (m_d3d12Device) { + if (m_d3d12Device) { OutputDebugStringA("[InitializeCUDA] D3D12 device detected, CUDA-D3D12 interop will be enabled\n"); char debug_buf[256]; sprintf_s(debug_buf, "[InitializeCUDA] D3D12 device: %p, CUDA context: %p\n", m_d3d12Device, m_cuContext); @@ -783,12 +778,11 @@ bool NVDECAV1Decoder::SupportsSurfaceType(VavCoreSurfaceType type) const { switch (type) { case VAVCORE_SURFACE_CUDA_DEVICE: return true; // NVDEC's primary strength - CUDA device pointers - case VAVCORE_SURFACE_D3D11_TEXTURE: - return true; // Via CUDA-D3D11 interop case VAVCORE_SURFACE_D3D12_RESOURCE: return true; // Via CUDA-D3D12 interop case VAVCORE_SURFACE_CPU: return true; // Always support CPU fallback + case VAVCORE_SURFACE_D3D11_TEXTURE: case VAVCORE_SURFACE_AMF_SURFACE: default: return false; @@ -803,25 +797,7 @@ bool NVDECAV1Decoder::SetD3DDevice(void* d3d_device, VavCoreSurfaceType type) { try { bool success = false; - if (type == VAVCORE_SURFACE_D3D11_TEXTURE) { - char debug_buf[256]; - sprintf_s(debug_buf, "[SetD3DDevice] Setting D3D11 device: %p\n", d3d_device); - OutputDebugStringA(debug_buf); - - // Store D3D11 device - m_d3d11Device = d3d_device; - m_preferredSurfaceType = type; - - // DON'T reinitialize if already initialized - this would invalidate existing decoded frames - if (m_initialized) { - OutputDebugStringA("[SetD3DDevice] Decoder already initialized, D3D11 device set for DecodeToSurface\n"); - OutputDebugStringA("[SetD3DDevice] Note: D3D11 zero-copy will be available for DecodeToSurface calls\n"); - } else { - OutputDebugStringA("[SetD3DDevice] D3D11 device set, will be used when Initialize() is called\n"); - } - - success = true; - } else if (type == VAVCORE_SURFACE_D3D12_RESOURCE) { + if (type == VAVCORE_SURFACE_D3D12_RESOURCE) { char debug_buf[256]; sprintf_s(debug_buf, "[SetD3DDevice] Setting D3D12 device: %p\n", d3d_device); OutputDebugStringA(debug_buf); @@ -1013,153 +989,6 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_ // Unmap the frame (this is just for getting device pointer, actual usage will be later) cuvidUnmapVideoFrame(m_decoder, devicePtr); - } else if (target_type == VAVCORE_SURFACE_D3D11_TEXTURE) { - // For D3D11 texture, we need CUDA-D3D interop - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] D3D11 texture path\n"); - - if (!target_surface || !m_d3d11Device) { - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] ERROR: target_surface or m_d3d11Device is null\n"); - LogError("D3D11 texture or device not available"); - return false; - } - - char debug_buf[256]; - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] target_surface=%p, m_d3d11Device=%p\n", - target_surface, m_d3d11Device); - OutputDebugStringA(debug_buf); - - // Register D3D texture with CUDA - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Calling RegisterD3DResourceWithCUDA...\n"); - bool registerSuccess = RegisterD3DResourceWithCUDA(target_surface); - if (!registerSuccess) { - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] ERROR: RegisterD3DResourceWithCUDA failed\n"); - LogError("Failed to register D3D texture with CUDA"); - return false; - } - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] RegisterD3DResourceWithCUDA succeeded\n"); - - // Wait for decoded frame to be available (blocks until callback completes) - int frameIdx = -1; - { - std::unique_lock lock(m_frameQueueMutex); - m_frameAvailable.wait(lock, [this] { - return !m_decodedFrameIndices.empty(); - }); - frameIdx = m_decodedFrameIndices.front(); - m_decodedFrameIndices.pop(); - } - - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] Using decoded frame from queue: frameIdx=%d\n", frameIdx); - OutputDebugStringA(debug_buf); - - // Map decoded NVDEC frame - CUVIDPROCPARAMS procParams = {}; - procParams.progressive_frame = 1; - procParams.top_field_first = 0; - procParams.unpaired_field = 0; - - CUdeviceptr srcDevicePtr = 0; - unsigned int srcPitch = 0; - - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] Calling cuvidMapVideoFrame (frameIdx=%d)\n", frameIdx); - OutputDebugStringA(debug_buf); - - result = cuvidMapVideoFrame(m_decoder, frameIdx, &srcDevicePtr, &srcPitch, &procParams); - - if (result == CUDA_SUCCESS) { - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuvidMapVideoFrame succeeded: srcDevicePtr=%p, srcPitch=%u\n", - (void*)srcDevicePtr, srcPitch); - OutputDebugStringA(debug_buf); - } else { - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuvidMapVideoFrame failed with code %d\n", result); - OutputDebugStringA(debug_buf); - LogCUDAError(result, "cuvidMapVideoFrame"); - return false; - } - - // Map D3D texture to CUDA array - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Calling MapCUDADevicePtrFromD3D...\n"); - CUdeviceptr dstDevicePtr; - size_t dstPitch; - bool mapSuccess = MapCUDADevicePtrFromD3D(dstDevicePtr, dstPitch); - if (!mapSuccess) { - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] ERROR: MapCUDADevicePtrFromD3D failed\n"); - LogError("Failed to map CUDA device pointer from D3D"); - cuvidUnmapVideoFrame(m_decoder, srcDevicePtr); - return false; - } - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] MapCUDADevicePtrFromD3D succeeded\n"); - - // NV12 D3D11 texture: Get CUDA array (entire NV12 surface including Y and UV) - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Getting CUDA array for NV12 texture...\n"); - CUarray cudaArray; - result = cuGraphicsSubResourceGetMappedArray(&cudaArray, m_cudaGraphicsResource, 0, 0); - if (result != CUDA_SUCCESS) { - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuGraphicsSubResourceGetMappedArray failed with code %d\n", result); - OutputDebugStringA(debug_buf); - LogCUDAError(result, "cuGraphicsSubResourceGetMappedArray"); - UnmapCUDADevicePtr(); - cuvidUnmapVideoFrame(m_decoder, srcDevicePtr); - return false; - } - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] Got CUDA array: %p\n", cudaArray); - OutputDebugStringA(debug_buf); - - // Copy Y plane from NVDEC to D3D11 NV12 texture - CUDA_MEMCPY2D copyParamsY = {}; - copyParamsY.srcMemoryType = CU_MEMORYTYPE_DEVICE; - copyParamsY.srcDevice = srcDevicePtr; - copyParamsY.srcPitch = srcPitch; - copyParamsY.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParamsY.dstArray = cudaArray; - copyParamsY.WidthInBytes = m_width; - copyParamsY.Height = m_height; - - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Copying Y plane...\n"); - result = cuMemcpy2D(©ParamsY); - if (result != CUDA_SUCCESS) { - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuMemcpy2D (Y plane) failed with code %d\n", result); - OutputDebugStringA(debug_buf); - LogCUDAError(result, "cuMemcpy2D (Y plane)"); - UnmapCUDADevicePtr(); - cuvidUnmapVideoFrame(m_decoder, srcDevicePtr); - return false; - } - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Y plane copied successfully\n"); - - // Copy UV plane from NVDEC to D3D11 NV12 texture (UV starts at row m_height) - CUDA_MEMCPY2D copyParamsUV = {}; - copyParamsUV.srcMemoryType = CU_MEMORYTYPE_DEVICE; - copyParamsUV.srcDevice = srcDevicePtr + (srcPitch * m_height); - copyParamsUV.srcPitch = srcPitch; - copyParamsUV.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParamsUV.dstArray = cudaArray; - copyParamsUV.dstY = m_height; // UV plane starts at row m_height in NV12 layout - copyParamsUV.WidthInBytes = m_width; // NV12 UV is interleaved, same width as Y - copyParamsUV.Height = m_height / 2; - - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] Copying UV plane...\n"); - result = cuMemcpy2D(©ParamsUV); - if (result != CUDA_SUCCESS) { - sprintf_s(debug_buf, "[NVDECAV1Decoder::DecodeToSurface] cuMemcpy2D (UV plane) failed with code %d\n", result); - OutputDebugStringA(debug_buf); - LogCUDAError(result, "cuMemcpy2D (UV plane)"); - UnmapCUDADevicePtr(); - cuvidUnmapVideoFrame(m_decoder, srcDevicePtr); - return false; - } - OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] UV plane copied successfully\n"); - - // Unmap and cleanup - UnmapCUDADevicePtr(); - cuvidUnmapVideoFrame(m_decoder, srcDevicePtr); - - // Fill output frame metadata - output_frame.width = m_width; - output_frame.height = m_height; - output_frame.color_space = ColorSpace::YUV420P; - output_frame.frame_index = m_framesDecoded; - output_frame.timestamp_seconds = static_cast(m_framesDecoded) / 30.0; } else if (target_type == VAVCORE_SURFACE_D3D12_RESOURCE) { // D3D12 resource path using D3D12SurfaceHandler OutputDebugStringA("[NVDECAV1Decoder::DecodeToSurface] D3D12 resource path\n"); @@ -1275,133 +1104,6 @@ VavCoreSurfaceType NVDECAV1Decoder::GetOptimalSurfaceType() const { // CUDA surface helper methods implementation -bool NVDECAV1Decoder::RegisterD3DResourceWithCUDA(void* d3d_texture) { - if (!d3d_texture) { - OutputDebugStringA("[RegisterD3DResourceWithCUDA] ERROR: d3d_texture is null\n"); - return false; - } - - char debug_buf[256]; - sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] d3d_texture=%p\n", d3d_texture); - OutputDebugStringA(debug_buf); - - try { - OutputDebugStringA("[RegisterD3DResourceWithCUDA] Calling cuGraphicsD3D11RegisterResource...\n"); - CUresult result = cuGraphicsD3D11RegisterResource(&m_cudaGraphicsResource, - static_cast(d3d_texture), - CU_GRAPHICS_REGISTER_FLAGS_NONE); - if (result != CUDA_SUCCESS) { - sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] cuGraphicsD3D11RegisterResource failed with code %d\n", result); - OutputDebugStringA(debug_buf); - LogCUDAError(result, "cuGraphicsD3D11RegisterResource"); - return false; - } - - sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] Success, m_cudaGraphicsResource=%p\n", m_cudaGraphicsResource); - OutputDebugStringA(debug_buf); - return true; - } - catch (const std::exception& e) { - sprintf_s(debug_buf, "[RegisterD3DResourceWithCUDA] Exception: %s\n", e.what()); - OutputDebugStringA(debug_buf); - LogError("Exception registering D3D resource with CUDA"); - return false; - } - catch (...) { - OutputDebugStringA("[RegisterD3DResourceWithCUDA] Unknown exception\n"); - LogError("Exception registering D3D resource with CUDA"); - return false; - } -} - -bool NVDECAV1Decoder::MapCUDADevicePtrFromD3D(CUdeviceptr& device_ptr, size_t& pitch) { - if (!m_cudaGraphicsResource) { - OutputDebugStringA("[MapCUDADevicePtrFromD3D] ERROR: m_cudaGraphicsResource is null\n"); - return false; - } - - char debug_buf[256]; - sprintf_s(debug_buf, "[MapCUDADevicePtrFromD3D] m_cudaGraphicsResource=%p, m_stream=%p\n", - m_cudaGraphicsResource, m_stream); - OutputDebugStringA(debug_buf); - - try { - // Map graphics resource - OutputDebugStringA("[MapCUDADevicePtrFromD3D] Calling cuGraphicsMapResources...\n"); - CUresult result = cuGraphicsMapResources(1, &m_cudaGraphicsResource, m_stream); - if (result != CUDA_SUCCESS) { - sprintf_s(debug_buf, "[MapCUDADevicePtrFromD3D] cuGraphicsMapResources failed with code %d\n", result); - OutputDebugStringA(debug_buf); - LogCUDAError(result, "cuGraphicsMapResources"); - return false; - } - OutputDebugStringA("[MapCUDADevicePtrFromD3D] cuGraphicsMapResources succeeded\n"); - - // D3D11 textures are mapped as CUDA arrays, not linear memory - OutputDebugStringA("[MapCUDADevicePtrFromD3D] D3D11 textures map to CUDA arrays, using cuGraphicsSubResourceGetMappedArray\n"); - - // Return 0 for device_ptr to indicate array-based access - device_ptr = 0; - pitch = m_width; - - sprintf_s(debug_buf, "[MapCUDADevicePtrFromD3D] Mapped as CUDA array (resource=%p)\n", - m_cudaGraphicsResource); - OutputDebugStringA(debug_buf); - return true; - } - catch (...) { - OutputDebugStringA("[MapCUDADevicePtrFromD3D] Exception caught\n"); - LogError("Exception mapping CUDA device pointer from D3D"); - return false; - } -} - -bool NVDECAV1Decoder::UnmapCUDADevicePtr() { - if (!m_cudaGraphicsResource) { - return false; - } - - try { - CUresult result = cuGraphicsUnmapResources(1, &m_cudaGraphicsResource, m_stream); - if (result != CUDA_SUCCESS) { - LogCUDAError(result, "cuGraphicsUnmapResources"); - return false; - } - - // Unregister resource - result = cuGraphicsUnregisterResource(m_cudaGraphicsResource); - if (result != CUDA_SUCCESS) { - LogCUDAError(result, "cuGraphicsUnregisterResource"); - return false; - } - - m_cudaGraphicsResource = nullptr; - return true; - } - catch (...) { - LogError("Exception unmapping CUDA device pointer"); - return false; - } -} - -bool NVDECAV1Decoder::SetupCUDAD3D11Interop(void* d3d_device) { - if (!m_cuContext || !d3d_device) { - OutputDebugStringA("[SetupCUDAD3D11Interop] ERROR: CUDA context or D3D11 device is null\n"); - return false; - } - - OutputDebugStringA("[SetupCUDAD3D11Interop] Setting up CUDA-D3D11 interop...\n"); - - // Store the D3D11 device for later use - m_d3d11Device = d3d_device; - - char debug_buf[256]; - sprintf_s(debug_buf, "[SetupCUDAD3D11Interop] D3D11 device stored: %p\n", m_d3d11Device); - OutputDebugStringA(debug_buf); - - return true; -} - bool NVDECAV1Decoder::SetupCUDAD3D12Interop(void* d3d_device) { if (!m_cuContext || !d3d_device) { OutputDebugStringA("[SetupCUDAD3D12Interop] ERROR: CUDA context or D3D12 device is null\n"); diff --git a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h index 1cdb8e6..c39fd7d 100644 --- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h +++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h @@ -11,20 +11,6 @@ namespace VavCore { class D3D12SurfaceHandler; } -//// Prevent TIMECODE conflicts by defining it before Windows headers -//#define WIN32_LEAN_AND_MEAN -//#define NOMINMAX -// -//// Prevent specific Windows header conflicts -//#define TIMECODE TIMECODE_WIN32 -//#include -//#include -//#include -//#include -//#include -//#include -//#undef TIMECODE - // Forward declaration for CUDA external memory type typedef struct CUexternalMemory_st* cudaExternalMemory_t; typedef struct CUexternalSemaphore_st* cudaExternalSemaphore_t; @@ -112,9 +98,7 @@ private: CUfunction m_kernel = nullptr; // D3D-CUDA interop - void* m_d3d11Device = nullptr; // ID3D11Device* void* m_d3d12Device = nullptr; // ID3D12Device* - CUgraphicsResource m_cudaGraphicsResource = nullptr; VavCoreSurfaceType m_preferredSurfaceType = VAVCORE_SURFACE_CPU; // D3D12-CUDA Asynchronous Synchronization @@ -150,11 +134,7 @@ private: void CleanupCUDA(); // CUDA surface helper methods - bool SetupCUDAD3D11Interop(void* d3d_device); bool SetupCUDAD3D12Interop(void* d3d_device); - bool RegisterD3DResourceWithCUDA(void* d3d_texture); - bool MapCUDADevicePtrFromD3D(CUdeviceptr& device_ptr, size_t& pitch); - bool UnmapCUDADevicePtr(); // NVDEC callbacks static int CUDAAPI HandleVideoSequence(void* user_data, CUVIDEOFORMAT* format);