This commit is contained in:
2025-10-05 03:42:51 +09:00
parent ab8f0cbfcc
commit 3dbcbf2e05
8 changed files with 436 additions and 18 deletions

View File

@@ -63,6 +63,10 @@ struct VideoMetadata {
std::string file_path;
uint64_t file_size = 0;
// Codec private data (e.g., AV1 sequence header from WebM CodecPrivate)
const uint8_t* codec_private_data = nullptr;
size_t codec_private_size = 0;
bool IsValid() const {
return width > 0 && height > 0 && frame_rate > 0.0;
}

View File

@@ -158,6 +158,51 @@ bool NVDECAV1Decoder::Initialize(const VideoMetadata& metadata) {
return false;
}
// Parse av1C box to extract OBUs
if (metadata.codec_private_data && metadata.codec_private_size > 0) {
char debug_buf[256];
// av1C box structure (ISO/IEC 14496-15):
// [0]: marker (1 bit) + version (7 bits)
// [1]: seq_profile (3 bits) + seq_level_idx_0 (5 bits)
// [2]: seq_tier_0 (1 bit) + high_bitdepth (1 bit) + twelve_bit (1 bit) + monochrome (1 bit) +
// chroma_subsampling_x (1 bit) + chroma_subsampling_y (1 bit) + chroma_sample_position (2 bits)
// [3]: reserved (3 bits) + initial_presentation_delay_present (1 bit) + initial_presentation_delay_minus_one (4 bits)
// [4...]: configOBUs (length-prefixed OBUs)
if (metadata.codec_private_size >= 5) {
// Skip av1C header (4 bytes) and check if there are OBUs
const uint8_t* obu_data = metadata.codec_private_data + 4;
size_t obu_size = metadata.codec_private_size - 4;
if (obu_size > 0) {
m_codecPrivateData = obu_data;
m_codecPrivateSize = obu_size;
m_firstFrameSent = false;
sprintf_s(debug_buf, "[Initialize] Extracted %zu bytes of OBUs from av1C box (skipped 4-byte header)\n", m_codecPrivateSize);
OutputDebugStringA(debug_buf);
printf("%s", debug_buf);
// Debug: print first few bytes
sprintf_s(debug_buf, "[Initialize] OBU data (first 8 bytes): %02X %02X %02X %02X %02X %02X %02X %02X\n",
obu_data[0], obu_data[1], obu_data[2], obu_data[3],
obu_data[4], obu_data[5], obu_data[6], obu_data[7]);
OutputDebugStringA(debug_buf);
printf("%s", debug_buf);
} else {
OutputDebugStringA("[Initialize] WARNING: No OBUs found in av1C box\n");
printf("[Initialize] WARNING: No OBUs found in av1C box\n");
}
} else {
OutputDebugStringA("[Initialize] WARNING: av1C box too small (< 5 bytes)\n");
printf("[Initialize] WARNING: av1C box too small (< 5 bytes)\n");
}
} else {
OutputDebugStringA("[Initialize] WARNING: No codec private data available\n");
printf("[Initialize] WARNING: No codec private data available\n");
}
// Load the PTX module for the deinterleave kernel
CUresult result = cuModuleLoadData(&m_module, g_deinterleave_kernel_ptx);
if (result != CUDA_SUCCESS) {
@@ -650,11 +695,15 @@ bool NVDECAV1Decoder::CreateDecoder() {
return true;
}
bool NVDECAV1Decoder::CreateParser() {
OutputDebugStringA("[CreateParser] Starting parser creation...\n");
memset(&m_parserParams, 0, sizeof(m_parserParams));
m_parserParams.CodecType = cudaVideoCodec_AV1;
m_parserParams.ulMaxNumDecodeSurfaces = 8;
m_parserParams.ulMaxNumDecodeSurfaces = 1;
m_parserParams.ulMaxDisplayDelay = 1; // CRITICAL: Required for pfnDisplayPicture to be called
m_parserParams.ulClockRate = 0; // Use default
m_parserParams.ulErrorThreshold = 100;
m_parserParams.pUserData = this;
@@ -668,6 +717,11 @@ bool NVDECAV1Decoder::CreateParser() {
return false;
}
char debug_buf[256];
sprintf_s(debug_buf, "[CreateParser] Parser created successfully! m_parser=%p, ulMaxDisplayDelay=%d\n",
m_parser, m_parserParams.ulMaxDisplayDelay);
OutputDebugStringA(debug_buf);
return true;
}
@@ -691,10 +745,12 @@ int CUDAAPI NVDECAV1Decoder::HandleVideoSequence(void* user_data, CUVIDEOFORMAT*
}
char debug_buf[512];
sprintf_s(debug_buf, "[NVDECAV1Decoder::HandleVideoSequence] Sequence: %dx%d ChromaFormat:%d BitDepth:%d\n",
sprintf_s(debug_buf, "[HandleVideoSequence] Sequence: %dx%d ChromaFormat:%d BitDepth:%d min_num_decode_surfaces:%d\n",
format->coded_width, format->coded_height,
format->chroma_format, format->bit_depth_luma_minus8 + 8);
format->chroma_format, format->bit_depth_luma_minus8 + 8,
format->min_num_decode_surfaces);
OutputDebugStringA(debug_buf);
printf("%s", debug_buf);
// Check if decoder needs reconfiguration due to format change
bool format_changed = false;
@@ -758,7 +814,14 @@ int CUDAAPI NVDECAV1Decoder::HandleVideoSequence(void* user_data, CUVIDEOFORMAT*
}
}
return 1; // Success
// Return min_num_decode_surfaces to update parser's ulMaxNumDecodeSurfaces
// This is critical for proper DPB (decode picture buffer) allocation
int return_value = (format->min_num_decode_surfaces > 1) ? format->min_num_decode_surfaces : 1;
sprintf_s(debug_buf, "[HandleVideoSequence] Returning %d to update ulMaxNumDecodeSurfaces\n", return_value);
OutputDebugStringA(debug_buf);
printf("%s", debug_buf);
return return_value;
}
int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS* pic_params) {
@@ -822,13 +885,20 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
slot_idx % decoder->RING_BUFFER_SIZE, disp_info->picture_index);
OutputDebugStringA(debug_buf);
// Update slot's picture_index (polling thread will query this)
// IMPORTANT: pfnDisplayPicture is called AFTER GPU decoding completes
// So we can directly mark the frame as ready without polling
{
std::lock_guard<std::mutex> lock(slot.slot_mutex);
slot.picture_index = disp_info->picture_index;
slot.is_ready = true; // Frame is already decoded and ready
}
// Note: Polling thread will detect this and signal slot.frame_ready when decode completes
// Signal waiting thread that frame is ready
slot.frame_ready.notify_one();
sprintf_s(debug_buf, "[HandlePictureDisplay] Slot %zu marked ready (picture_index=%d)\\n",
slot_idx % decoder->RING_BUFFER_SIZE, disp_info->picture_index);
OutputDebugStringA(debug_buf);
return 1;
}
@@ -1059,10 +1129,31 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
}
// ===== Component 2: Packet Submission =====
// 4. Submit packet to NVDEC parser with slot index in timestamp
// 4. Prepare packet (prepend codec private data to first frame)
std::vector<uint8_t> combined_packet;
const uint8_t* final_packet_data = packet_data;
size_t final_packet_size = packet_size;
if (!m_firstFrameSent && m_codecPrivateData && m_codecPrivateSize > 0) {
// First frame: prepend codec private data (AV1 sequence header)
combined_packet.resize(m_codecPrivateSize + packet_size);
memcpy(combined_packet.data(), m_codecPrivateData, m_codecPrivateSize);
memcpy(combined_packet.data() + m_codecPrivateSize, packet_data, packet_size);
final_packet_data = combined_packet.data();
final_packet_size = combined_packet.size();
m_firstFrameSent = true;
sprintf_s(debug_buf, "[DecodeToSurface] First frame: prepended %zu bytes of codec private data (total: %zu bytes)\n",
m_codecPrivateSize, final_packet_size);
OutputDebugStringA(debug_buf);
printf("%s", debug_buf);
}
// 5. Submit packet to NVDEC parser with slot index in timestamp
CUVIDSOURCEDATAPACKET packet = {};
packet.payload = packet_data;
packet.payload_size = static_cast<unsigned long>(packet_size);
packet.payload = final_packet_data;
packet.payload_size = static_cast<unsigned long>(final_packet_size);
packet.flags = CUVID_PKT_ENDOFPICTURE;
packet.timestamp = static_cast<int64_t>(my_slot_idx); // Embed slot index in timestamp

View File

@@ -115,6 +115,11 @@ private:
// Decoder configuration
CUVIDPARSERPARAMS m_parserParams = {};
// Codec private data (AV1 sequence header from WebM)
const uint8_t* m_codecPrivateData = nullptr;
size_t m_codecPrivateSize = 0;
bool m_firstFrameSent = false;
// Statistics
uint64_t m_framesDecoded = 0;
uint64_t m_decodeErrors = 0;

View File

@@ -673,6 +673,31 @@ bool WebMFileReader::ExtractVideoMetadata() {
meta.file_path = m_state->file_path;
// Extract codec private data (AV1 sequence header) from track
const mkvparser::Tracks* tracks = m_state->segment->GetTracks();
if (tracks) {
const mkvparser::Track* track = tracks->GetTrackByNumber(m_state->selected_track_number);
if (track && track->GetType() == mkvparser::Track::kVideo) {
const mkvparser::VideoTrack* video_track = static_cast<const mkvparser::VideoTrack*>(track);
size_t codec_private_size = 0;
const unsigned char* codec_private_data = video_track->GetCodecPrivate(codec_private_size);
if (codec_private_data && codec_private_size > 0) {
meta.codec_private_data = codec_private_data;
meta.codec_private_size = codec_private_size;
char debug_buf[256];
sprintf_s(debug_buf, "[WebMFileReader] Extracted codec private data: %zu bytes\n", codec_private_size);
OutputDebugStringA(debug_buf);
printf("%s", debug_buf);
} else {
OutputDebugStringA("[WebMFileReader] WARNING: No codec private data found in WebM track\n");
printf("[WebMFileReader] WARNING: No codec private data found in WebM track\n");
}
}
}
return meta.IsValid();
}

View File

@@ -328,14 +328,8 @@ VAVCORE_API VavCoreResult vavcore_open_file(VavCorePlayer* player, const char* f
OutputDebugStringA("[VavCore] AV1 track found! Selecting track...\n");
if (player->impl->fileReader->SelectVideoTrack(track.track_number)) {
OutputDebugStringA("[VavCore] Track selected successfully\n");
// Convert track info to VideoMetadata
VideoMetadata metadata;
metadata.width = track.width;
metadata.height = track.height;
metadata.frame_rate = track.frame_rate;
metadata.total_frames = track.frame_count;
metadata.codec_type = track.codec_type;
player->impl->metadata = metadata;
// Get full metadata from WebMFileReader (includes codec_private_data)
player->impl->metadata = player->impl->fileReader->GetVideoMetadata();
foundAV1 = true;
break;
}