365 lines
11 KiB
C++
365 lines
11 KiB
C++
#include "pch.h"
|
|
#include "NVDECAV1Decoder_Headless.h"
|
|
#include <iostream>
|
|
#include <cstring>
|
|
#include <algorithm>
|
|
|
|
namespace Vav2Player {
|
|
|
|
NVDECAV1Decoder_Headless::NVDECAV1Decoder_Headless()
|
|
: m_initialized(false) {
|
|
}
|
|
|
|
NVDECAV1Decoder_Headless::~NVDECAV1Decoder_Headless() {
|
|
Cleanup();
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::Initialize(const VideoMetadata& metadata) {
|
|
if (m_initialized) {
|
|
LogError("Decoder already initialized");
|
|
return false;
|
|
}
|
|
|
|
if (metadata.codec_type != VideoCodecType::AV1) {
|
|
LogError("Invalid codec type for NVDEC AV1 decoder");
|
|
return false;
|
|
}
|
|
|
|
// Check NVDEC availability
|
|
if (!IsNVDECAvailable()) {
|
|
LogError("NVDEC not available on this system");
|
|
return false;
|
|
}
|
|
|
|
// Initialize CUDA context
|
|
if (!InitializeCUDA()) {
|
|
LogError("Failed to initialize CUDA");
|
|
return false;
|
|
}
|
|
|
|
// Store video properties
|
|
m_width = metadata.width;
|
|
m_height = metadata.height;
|
|
m_maxWidth = std::max(m_width, 4096u);
|
|
m_maxHeight = std::max(m_height, 4096u);
|
|
|
|
// Create decoder
|
|
if (!CreateDecoder()) {
|
|
LogError("Failed to create NVDEC decoder");
|
|
Cleanup();
|
|
return false;
|
|
}
|
|
|
|
// Create parser
|
|
if (!CreateParser()) {
|
|
LogError("Failed to create NVDEC parser");
|
|
Cleanup();
|
|
return false;
|
|
}
|
|
|
|
m_initialized = true;
|
|
|
|
std::cout << "[NVDECAV1Decoder_Headless] Initialized successfully" << std::endl;
|
|
std::cout << " Resolution: " << m_width << "x" << m_height << std::endl;
|
|
std::cout << " Max Resolution: " << m_maxWidth << "x" << m_maxHeight << std::endl;
|
|
|
|
return true;
|
|
}
|
|
|
|
void NVDECAV1Decoder_Headless::Cleanup() {
|
|
if (m_parser) {
|
|
cuvidDestroyVideoParser(m_parser);
|
|
m_parser = nullptr;
|
|
}
|
|
|
|
if (m_decoder) {
|
|
cuvidDestroyDecoder(m_decoder);
|
|
m_decoder = nullptr;
|
|
}
|
|
|
|
CleanupCUDA();
|
|
m_initialized = false;
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::IsInitialized() const {
|
|
return m_initialized;
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::DecodeFrame(const VideoPacket& input_packet, VideoFrame& output_frame) {
|
|
if (!input_packet.IsValid()) {
|
|
LogError("Invalid input packet");
|
|
return false;
|
|
}
|
|
|
|
return DecodeFrame(input_packet.data.get(), input_packet.size, output_frame);
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::DecodeFrame(const uint8_t* packet_data, size_t packet_size, VideoFrame& output_frame) {
|
|
if (!m_initialized || !packet_data || packet_size == 0) {
|
|
LogError("Invalid parameters or decoder not initialized");
|
|
return false;
|
|
}
|
|
|
|
auto decode_start = std::chrono::high_resolution_clock::now();
|
|
|
|
// Prepare packet for parser
|
|
CUVIDSOURCEDATAPACKET packet = {};
|
|
packet.payload_size = packet_size;
|
|
packet.payload = packet_data;
|
|
packet.flags = 0; // No special flags for headless mode
|
|
|
|
// Parse packet
|
|
CUresult result = cuvidParseVideoData(m_parser, &packet);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuvidParseVideoData");
|
|
m_decodeErrors++;
|
|
return false;
|
|
}
|
|
|
|
// For headless mode, we just mark the frame as successfully decoded
|
|
// without actually copying pixel data
|
|
output_frame.width = m_width;
|
|
output_frame.height = m_height;
|
|
output_frame.format = PixelFormat::YUV420P;
|
|
|
|
// Update statistics
|
|
auto decode_end = std::chrono::high_resolution_clock::now();
|
|
double decode_time = std::chrono::duration<double, std::milli>(decode_end - decode_start).count();
|
|
|
|
m_framesDecoded++;
|
|
m_bytesProcessed += packet_size;
|
|
|
|
// Update average decode time
|
|
m_avgDecodeTime = (m_avgDecodeTime * (m_framesDecoded - 1) + decode_time) / m_framesDecoded;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::Reset() {
|
|
if (!m_initialized) {
|
|
return false;
|
|
}
|
|
|
|
// Reset statistics
|
|
ResetStats();
|
|
return true;
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::Flush() {
|
|
if (!m_initialized) {
|
|
return false;
|
|
}
|
|
|
|
// Send end-of-stream packet to flush any remaining frames
|
|
CUVIDSOURCEDATAPACKET packet = {};
|
|
packet.flags = CUVID_PKT_ENDOFSTREAM;
|
|
|
|
CUresult result = cuvidParseVideoData(m_parser, &packet);
|
|
return (result == CUDA_SUCCESS);
|
|
}
|
|
|
|
std::string NVDECAV1Decoder_Headless::GetVersion() const {
|
|
int driver_version = 0;
|
|
cuDriverGetVersion(&driver_version);
|
|
|
|
return "NVDEC AV1 Headless (CUDA Driver: " + std::to_string(driver_version) + ")";
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::IsNVDECAvailable() const {
|
|
// Check if CUDA driver is available
|
|
if (cuInit(0) != CUDA_SUCCESS) {
|
|
return false;
|
|
}
|
|
|
|
// Check device count
|
|
int device_count = 0;
|
|
if (cuDeviceGetCount(&device_count) != CUDA_SUCCESS || device_count == 0) {
|
|
return false;
|
|
}
|
|
|
|
// Check decode capabilities for AV1
|
|
CUdevice device;
|
|
if (cuDeviceGet(&device, 0) != CUDA_SUCCESS) {
|
|
return false;
|
|
}
|
|
|
|
CUVIDDECODECAPS decode_caps = {};
|
|
decode_caps.eCodecType = cudaVideoCodec_AV1;
|
|
decode_caps.eChromaFormat = cudaVideoChromaFormat_420;
|
|
decode_caps.nBitDepthMinus8 = 0;
|
|
|
|
if (cuvidGetDecoderCaps(&decode_caps) != CUDA_SUCCESS) {
|
|
return false;
|
|
}
|
|
|
|
return decode_caps.bIsSupported != 0;
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::InitializeCUDA() {
|
|
// Initialize CUDA driver
|
|
CUresult result = cuInit(0);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuInit");
|
|
return false;
|
|
}
|
|
|
|
// Get device
|
|
CUdevice device;
|
|
result = cuDeviceGet(&device, 0);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuDeviceGet");
|
|
return false;
|
|
}
|
|
|
|
// Create context - use correct API signature for CUDA 13.0
|
|
CUctxCreateParams createParams = {};
|
|
createParams.execAffinityParams = nullptr;
|
|
result = cuCtxCreate_v4(&m_cuContext, &createParams, 0, device);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuCtxCreate");
|
|
return false;
|
|
}
|
|
|
|
// Create stream
|
|
result = cuStreamCreate(&m_stream, CU_STREAM_DEFAULT);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuStreamCreate");
|
|
return false;
|
|
}
|
|
|
|
return CheckCUDACapability();
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::CheckCUDACapability() {
|
|
// Get device properties
|
|
int major, minor;
|
|
CUresult result = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, 0);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuDeviceGetAttribute");
|
|
return false;
|
|
}
|
|
|
|
result = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, 0);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuDeviceGetAttribute");
|
|
return false;
|
|
}
|
|
|
|
std::cout << "[NVDECAV1Decoder_Headless] CUDA Compute Capability: " << major << "." << minor << std::endl;
|
|
|
|
// NVDEC requires compute capability 3.0 or higher
|
|
return (major >= 3);
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::CreateDecoder() {
|
|
memset(&m_createInfo, 0, sizeof(m_createInfo));
|
|
|
|
m_createInfo.CodecType = cudaVideoCodec_AV1;
|
|
m_createInfo.ChromaFormat = cudaVideoChromaFormat_420;
|
|
m_createInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
|
|
m_createInfo.bitDepthMinus8 = 0;
|
|
m_createInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
|
|
m_createInfo.ulNumOutputSurfaces = 8; // Simplified for headless
|
|
m_createInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
|
|
m_createInfo.ulNumDecodeSurfaces = 8;
|
|
m_createInfo.vidLock = nullptr;
|
|
m_createInfo.ulWidth = m_width;
|
|
m_createInfo.ulHeight = m_height;
|
|
m_createInfo.ulMaxWidth = m_maxWidth;
|
|
m_createInfo.ulMaxHeight = m_maxHeight;
|
|
m_createInfo.ulTargetWidth = m_width;
|
|
m_createInfo.ulTargetHeight = m_height;
|
|
|
|
CUresult result = cuvidCreateDecoder(&m_decoder, &m_createInfo);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuvidCreateDecoder");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool NVDECAV1Decoder_Headless::CreateParser() {
|
|
memset(&m_parserParams, 0, sizeof(m_parserParams));
|
|
|
|
m_parserParams.CodecType = cudaVideoCodec_AV1;
|
|
m_parserParams.ulMaxNumDecodeSurfaces = 8;
|
|
m_parserParams.ulClockRate = 0; // Use default
|
|
m_parserParams.ulErrorThreshold = 100;
|
|
m_parserParams.pUserData = this;
|
|
m_parserParams.pfnSequenceCallback = HandleVideoSequence;
|
|
m_parserParams.pfnDecodePicture = HandlePictureDecode;
|
|
m_parserParams.pfnDisplayPicture = HandlePictureDisplay;
|
|
|
|
CUresult result = cuvidCreateVideoParser(&m_parser, &m_parserParams);
|
|
if (result != CUDA_SUCCESS) {
|
|
LogCUDAError(result, "cuvidCreateVideoParser");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void NVDECAV1Decoder_Headless::CleanupCUDA() {
|
|
if (m_stream) {
|
|
cuStreamDestroy(m_stream);
|
|
m_stream = nullptr;
|
|
}
|
|
|
|
if (m_cuContext) {
|
|
cuCtxDestroy(m_cuContext);
|
|
m_cuContext = nullptr;
|
|
}
|
|
}
|
|
|
|
// NVDEC Callbacks
|
|
int CUDAAPI NVDECAV1Decoder_Headless::HandleVideoSequence(void* user_data, CUVIDEOFORMAT* format) {
|
|
auto* decoder = static_cast<NVDECAV1Decoder_Headless*>(user_data);
|
|
if (!decoder || !format) {
|
|
return 0;
|
|
}
|
|
|
|
std::cout << "[NVDECAV1Decoder_Headless] Sequence: " << format->coded_width << "x" << format->coded_height
|
|
<< " ChromaFormat:" << format->chroma_format << " BitDepth:" << format->bit_depth_luma_minus8 + 8 << std::endl;
|
|
|
|
return 1; // Success
|
|
}
|
|
|
|
int CUDAAPI NVDECAV1Decoder_Headless::HandlePictureDecode(void* user_data, CUVIDPICPARAMS* pic_params) {
|
|
auto* decoder = static_cast<NVDECAV1Decoder_Headless*>(user_data);
|
|
if (!decoder || !pic_params) {
|
|
return 0;
|
|
}
|
|
|
|
CUresult result = cuvidDecodePicture(decoder->m_decoder, pic_params);
|
|
if (result != CUDA_SUCCESS) {
|
|
decoder->LogCUDAError(result, "cuvidDecodePicture");
|
|
return 0;
|
|
}
|
|
|
|
return 1; // Success
|
|
}
|
|
|
|
int CUDAAPI NVDECAV1Decoder_Headless::HandlePictureDisplay(void* user_data, CUVIDPARSERDISPINFO* disp_info) {
|
|
auto* decoder = static_cast<NVDECAV1Decoder_Headless*>(user_data);
|
|
if (!decoder || !disp_info) {
|
|
return 0;
|
|
}
|
|
|
|
// For headless mode, just acknowledge the display
|
|
return 1;
|
|
}
|
|
|
|
void NVDECAV1Decoder_Headless::LogError(const std::string& message) const {
|
|
std::cerr << "[NVDECAV1Decoder_Headless] ERROR: " << message << std::endl;
|
|
}
|
|
|
|
void NVDECAV1Decoder_Headless::LogCUDAError(CUresult result, const std::string& operation) const {
|
|
const char* error_string = nullptr;
|
|
cuGetErrorString(result, &error_string);
|
|
std::cerr << "[NVDECAV1Decoder_Headless] CUDA ERROR in " << operation << ": "
|
|
<< (error_string ? error_string : "Unknown error")
|
|
<< " (code: " << result << ")" << std::endl;
|
|
}
|
|
|
|
} // namespace Vav2Player
|