WIP
This commit is contained in:
@@ -63,6 +63,10 @@ struct VideoMetadata {
|
||||
std::string file_path;
|
||||
uint64_t file_size = 0;
|
||||
|
||||
// Codec private data (e.g., AV1 sequence header from WebM CodecPrivate)
|
||||
const uint8_t* codec_private_data = nullptr;
|
||||
size_t codec_private_size = 0;
|
||||
|
||||
bool IsValid() const {
|
||||
return width > 0 && height > 0 && frame_rate > 0.0;
|
||||
}
|
||||
|
||||
@@ -158,6 +158,51 @@ bool NVDECAV1Decoder::Initialize(const VideoMetadata& metadata) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse av1C box to extract OBUs
|
||||
if (metadata.codec_private_data && metadata.codec_private_size > 0) {
|
||||
char debug_buf[256];
|
||||
|
||||
// av1C box structure (ISO/IEC 14496-15):
|
||||
// [0]: marker (1 bit) + version (7 bits)
|
||||
// [1]: seq_profile (3 bits) + seq_level_idx_0 (5 bits)
|
||||
// [2]: seq_tier_0 (1 bit) + high_bitdepth (1 bit) + twelve_bit (1 bit) + monochrome (1 bit) +
|
||||
// chroma_subsampling_x (1 bit) + chroma_subsampling_y (1 bit) + chroma_sample_position (2 bits)
|
||||
// [3]: reserved (3 bits) + initial_presentation_delay_present (1 bit) + initial_presentation_delay_minus_one (4 bits)
|
||||
// [4...]: configOBUs (length-prefixed OBUs)
|
||||
|
||||
if (metadata.codec_private_size >= 5) {
|
||||
// Skip av1C header (4 bytes) and check if there are OBUs
|
||||
const uint8_t* obu_data = metadata.codec_private_data + 4;
|
||||
size_t obu_size = metadata.codec_private_size - 4;
|
||||
|
||||
if (obu_size > 0) {
|
||||
m_codecPrivateData = obu_data;
|
||||
m_codecPrivateSize = obu_size;
|
||||
m_firstFrameSent = false;
|
||||
|
||||
sprintf_s(debug_buf, "[Initialize] Extracted %zu bytes of OBUs from av1C box (skipped 4-byte header)\n", m_codecPrivateSize);
|
||||
OutputDebugStringA(debug_buf);
|
||||
printf("%s", debug_buf);
|
||||
|
||||
// Debug: print first few bytes
|
||||
sprintf_s(debug_buf, "[Initialize] OBU data (first 8 bytes): %02X %02X %02X %02X %02X %02X %02X %02X\n",
|
||||
obu_data[0], obu_data[1], obu_data[2], obu_data[3],
|
||||
obu_data[4], obu_data[5], obu_data[6], obu_data[7]);
|
||||
OutputDebugStringA(debug_buf);
|
||||
printf("%s", debug_buf);
|
||||
} else {
|
||||
OutputDebugStringA("[Initialize] WARNING: No OBUs found in av1C box\n");
|
||||
printf("[Initialize] WARNING: No OBUs found in av1C box\n");
|
||||
}
|
||||
} else {
|
||||
OutputDebugStringA("[Initialize] WARNING: av1C box too small (< 5 bytes)\n");
|
||||
printf("[Initialize] WARNING: av1C box too small (< 5 bytes)\n");
|
||||
}
|
||||
} else {
|
||||
OutputDebugStringA("[Initialize] WARNING: No codec private data available\n");
|
||||
printf("[Initialize] WARNING: No codec private data available\n");
|
||||
}
|
||||
|
||||
// Load the PTX module for the deinterleave kernel
|
||||
CUresult result = cuModuleLoadData(&m_module, g_deinterleave_kernel_ptx);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
@@ -650,11 +695,15 @@ bool NVDECAV1Decoder::CreateDecoder() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool NVDECAV1Decoder::CreateParser() {
|
||||
OutputDebugStringA("[CreateParser] Starting parser creation...\n");
|
||||
|
||||
memset(&m_parserParams, 0, sizeof(m_parserParams));
|
||||
|
||||
m_parserParams.CodecType = cudaVideoCodec_AV1;
|
||||
m_parserParams.ulMaxNumDecodeSurfaces = 8;
|
||||
m_parserParams.ulMaxNumDecodeSurfaces = 1;
|
||||
m_parserParams.ulMaxDisplayDelay = 1; // CRITICAL: Required for pfnDisplayPicture to be called
|
||||
m_parserParams.ulClockRate = 0; // Use default
|
||||
m_parserParams.ulErrorThreshold = 100;
|
||||
m_parserParams.pUserData = this;
|
||||
@@ -668,6 +717,11 @@ bool NVDECAV1Decoder::CreateParser() {
|
||||
return false;
|
||||
}
|
||||
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[CreateParser] Parser created successfully! m_parser=%p, ulMaxDisplayDelay=%d\n",
|
||||
m_parser, m_parserParams.ulMaxDisplayDelay);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -691,10 +745,12 @@ int CUDAAPI NVDECAV1Decoder::HandleVideoSequence(void* user_data, CUVIDEOFORMAT*
|
||||
}
|
||||
|
||||
char debug_buf[512];
|
||||
sprintf_s(debug_buf, "[NVDECAV1Decoder::HandleVideoSequence] Sequence: %dx%d ChromaFormat:%d BitDepth:%d\n",
|
||||
sprintf_s(debug_buf, "[HandleVideoSequence] Sequence: %dx%d ChromaFormat:%d BitDepth:%d min_num_decode_surfaces:%d\n",
|
||||
format->coded_width, format->coded_height,
|
||||
format->chroma_format, format->bit_depth_luma_minus8 + 8);
|
||||
format->chroma_format, format->bit_depth_luma_minus8 + 8,
|
||||
format->min_num_decode_surfaces);
|
||||
OutputDebugStringA(debug_buf);
|
||||
printf("%s", debug_buf);
|
||||
|
||||
// Check if decoder needs reconfiguration due to format change
|
||||
bool format_changed = false;
|
||||
@@ -758,7 +814,14 @@ int CUDAAPI NVDECAV1Decoder::HandleVideoSequence(void* user_data, CUVIDEOFORMAT*
|
||||
}
|
||||
}
|
||||
|
||||
return 1; // Success
|
||||
// Return min_num_decode_surfaces to update parser's ulMaxNumDecodeSurfaces
|
||||
// This is critical for proper DPB (decode picture buffer) allocation
|
||||
int return_value = (format->min_num_decode_surfaces > 1) ? format->min_num_decode_surfaces : 1;
|
||||
sprintf_s(debug_buf, "[HandleVideoSequence] Returning %d to update ulMaxNumDecodeSurfaces\n", return_value);
|
||||
OutputDebugStringA(debug_buf);
|
||||
printf("%s", debug_buf);
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS* pic_params) {
|
||||
@@ -822,13 +885,20 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
|
||||
slot_idx % decoder->RING_BUFFER_SIZE, disp_info->picture_index);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
// Update slot's picture_index (polling thread will query this)
|
||||
// IMPORTANT: pfnDisplayPicture is called AFTER GPU decoding completes
|
||||
// So we can directly mark the frame as ready without polling
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(slot.slot_mutex);
|
||||
slot.picture_index = disp_info->picture_index;
|
||||
slot.is_ready = true; // Frame is already decoded and ready
|
||||
}
|
||||
|
||||
// Note: Polling thread will detect this and signal slot.frame_ready when decode completes
|
||||
// Signal waiting thread that frame is ready
|
||||
slot.frame_ready.notify_one();
|
||||
|
||||
sprintf_s(debug_buf, "[HandlePictureDisplay] Slot %zu marked ready (picture_index=%d)\\n",
|
||||
slot_idx % decoder->RING_BUFFER_SIZE, disp_info->picture_index);
|
||||
OutputDebugStringA(debug_buf);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -1059,10 +1129,31 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
|
||||
}
|
||||
|
||||
// ===== Component 2: Packet Submission =====
|
||||
// 4. Submit packet to NVDEC parser with slot index in timestamp
|
||||
// 4. Prepare packet (prepend codec private data to first frame)
|
||||
std::vector<uint8_t> combined_packet;
|
||||
const uint8_t* final_packet_data = packet_data;
|
||||
size_t final_packet_size = packet_size;
|
||||
|
||||
if (!m_firstFrameSent && m_codecPrivateData && m_codecPrivateSize > 0) {
|
||||
// First frame: prepend codec private data (AV1 sequence header)
|
||||
combined_packet.resize(m_codecPrivateSize + packet_size);
|
||||
memcpy(combined_packet.data(), m_codecPrivateData, m_codecPrivateSize);
|
||||
memcpy(combined_packet.data() + m_codecPrivateSize, packet_data, packet_size);
|
||||
|
||||
final_packet_data = combined_packet.data();
|
||||
final_packet_size = combined_packet.size();
|
||||
m_firstFrameSent = true;
|
||||
|
||||
sprintf_s(debug_buf, "[DecodeToSurface] First frame: prepended %zu bytes of codec private data (total: %zu bytes)\n",
|
||||
m_codecPrivateSize, final_packet_size);
|
||||
OutputDebugStringA(debug_buf);
|
||||
printf("%s", debug_buf);
|
||||
}
|
||||
|
||||
// 5. Submit packet to NVDEC parser with slot index in timestamp
|
||||
CUVIDSOURCEDATAPACKET packet = {};
|
||||
packet.payload = packet_data;
|
||||
packet.payload_size = static_cast<unsigned long>(packet_size);
|
||||
packet.payload = final_packet_data;
|
||||
packet.payload_size = static_cast<unsigned long>(final_packet_size);
|
||||
packet.flags = CUVID_PKT_ENDOFPICTURE;
|
||||
packet.timestamp = static_cast<int64_t>(my_slot_idx); // Embed slot index in timestamp
|
||||
|
||||
|
||||
@@ -115,6 +115,11 @@ private:
|
||||
// Decoder configuration
|
||||
CUVIDPARSERPARAMS m_parserParams = {};
|
||||
|
||||
// Codec private data (AV1 sequence header from WebM)
|
||||
const uint8_t* m_codecPrivateData = nullptr;
|
||||
size_t m_codecPrivateSize = 0;
|
||||
bool m_firstFrameSent = false;
|
||||
|
||||
// Statistics
|
||||
uint64_t m_framesDecoded = 0;
|
||||
uint64_t m_decodeErrors = 0;
|
||||
|
||||
@@ -673,6 +673,31 @@ bool WebMFileReader::ExtractVideoMetadata() {
|
||||
|
||||
meta.file_path = m_state->file_path;
|
||||
|
||||
// Extract codec private data (AV1 sequence header) from track
|
||||
const mkvparser::Tracks* tracks = m_state->segment->GetTracks();
|
||||
if (tracks) {
|
||||
const mkvparser::Track* track = tracks->GetTrackByNumber(m_state->selected_track_number);
|
||||
if (track && track->GetType() == mkvparser::Track::kVideo) {
|
||||
const mkvparser::VideoTrack* video_track = static_cast<const mkvparser::VideoTrack*>(track);
|
||||
|
||||
size_t codec_private_size = 0;
|
||||
const unsigned char* codec_private_data = video_track->GetCodecPrivate(codec_private_size);
|
||||
|
||||
if (codec_private_data && codec_private_size > 0) {
|
||||
meta.codec_private_data = codec_private_data;
|
||||
meta.codec_private_size = codec_private_size;
|
||||
|
||||
char debug_buf[256];
|
||||
sprintf_s(debug_buf, "[WebMFileReader] Extracted codec private data: %zu bytes\n", codec_private_size);
|
||||
OutputDebugStringA(debug_buf);
|
||||
printf("%s", debug_buf);
|
||||
} else {
|
||||
OutputDebugStringA("[WebMFileReader] WARNING: No codec private data found in WebM track\n");
|
||||
printf("[WebMFileReader] WARNING: No codec private data found in WebM track\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return meta.IsValid();
|
||||
}
|
||||
|
||||
|
||||
@@ -328,14 +328,8 @@ VAVCORE_API VavCoreResult vavcore_open_file(VavCorePlayer* player, const char* f
|
||||
OutputDebugStringA("[VavCore] AV1 track found! Selecting track...\n");
|
||||
if (player->impl->fileReader->SelectVideoTrack(track.track_number)) {
|
||||
OutputDebugStringA("[VavCore] Track selected successfully\n");
|
||||
// Convert track info to VideoMetadata
|
||||
VideoMetadata metadata;
|
||||
metadata.width = track.width;
|
||||
metadata.height = track.height;
|
||||
metadata.frame_rate = track.frame_rate;
|
||||
metadata.total_frames = track.frame_count;
|
||||
metadata.codec_type = track.codec_type;
|
||||
player->impl->metadata = metadata;
|
||||
// Get full metadata from WebMFileReader (includes codec_private_data)
|
||||
player->impl->metadata = player->impl->fileReader->GetVideoMetadata();
|
||||
foundAV1 = true;
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user