WIP

2025-10-05 03:42:51 +09:00
parent ab8f0cbfcc
commit 3dbcbf2e05
8 changed files with 436 additions and 18 deletions
--- a/vav2/platforms/windows/vavcore/src/Common/VideoTypes.h
+++ b/vav2/platforms/windows/vavcore/src/Common/VideoTypes.h
@@ -63,6 +63,10 @@ struct VideoMetadata {
    std::string file_path;
    uint64_t file_size = 0;

+    // Codec private data (e.g., AV1 sequence header from WebM CodecPrivate)
+    const uint8_t* codec_private_data = nullptr;
+    size_t codec_private_size = 0;
+
    bool IsValid() const {
        return width > 0 && height > 0 && frame_rate > 0.0;
    }
--- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
+++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.cpp
@@ -158,6 +158,51 @@ bool NVDECAV1Decoder::Initialize(const VideoMetadata& metadata) {
        return false;
    }

+    // Parse av1C box to extract OBUs
+    if (metadata.codec_private_data && metadata.codec_private_size > 0) {
+        char debug_buf[256];
+
+        // av1C box structure (ISO/IEC 14496-15):
+        // [0]: marker (1 bit) + version (7 bits)
+        // [1]: seq_profile (3 bits) + seq_level_idx_0 (5 bits)
+        // [2]: seq_tier_0 (1 bit) + high_bitdepth (1 bit) + twelve_bit (1 bit) + monochrome (1 bit) +
+        //      chroma_subsampling_x (1 bit) + chroma_subsampling_y (1 bit) + chroma_sample_position (2 bits)
+        // [3]: reserved (3 bits) + initial_presentation_delay_present (1 bit) + initial_presentation_delay_minus_one (4 bits)
+        // [4...]: configOBUs (length-prefixed OBUs)
+
+        if (metadata.codec_private_size >= 5) {
+            // Skip av1C header (4 bytes) and check if there are OBUs
+            const uint8_t* obu_data = metadata.codec_private_data + 4;
+            size_t obu_size = metadata.codec_private_size - 4;
+
+            if (obu_size > 0) {
+                m_codecPrivateData = obu_data;
+                m_codecPrivateSize = obu_size;
+                m_firstFrameSent = false;
+
+                sprintf_s(debug_buf, "[Initialize] Extracted %zu bytes of OBUs from av1C box (skipped 4-byte header)\n", m_codecPrivateSize);
+                OutputDebugStringA(debug_buf);
+                printf("%s", debug_buf);
+
+                // Debug: print first few bytes
+                sprintf_s(debug_buf, "[Initialize] OBU data (first 8 bytes): %02X %02X %02X %02X %02X %02X %02X %02X\n",
+                          obu_data[0], obu_data[1], obu_data[2], obu_data[3],
+                          obu_data[4], obu_data[5], obu_data[6], obu_data[7]);
+                OutputDebugStringA(debug_buf);
+                printf("%s", debug_buf);
+            } else {
+                OutputDebugStringA("[Initialize] WARNING: No OBUs found in av1C box\n");
+                printf("[Initialize] WARNING: No OBUs found in av1C box\n");
+            }
+        } else {
+            OutputDebugStringA("[Initialize] WARNING: av1C box too small (< 5 bytes)\n");
+            printf("[Initialize] WARNING: av1C box too small (< 5 bytes)\n");
+        }
+    } else {
+        OutputDebugStringA("[Initialize] WARNING: No codec private data available\n");
+        printf("[Initialize] WARNING: No codec private data available\n");
+    }
+
    // Load the PTX module for the deinterleave kernel
    CUresult result = cuModuleLoadData(&m_module, g_deinterleave_kernel_ptx);
    if (result != CUDA_SUCCESS) {
@@ -650,11 +695,15 @@ bool NVDECAV1Decoder::CreateDecoder() {
    return true;
 }

+
 bool NVDECAV1Decoder::CreateParser() {
+    OutputDebugStringA("[CreateParser] Starting parser creation...\n");
+
    memset(&m_parserParams, 0, sizeof(m_parserParams));

    m_parserParams.CodecType = cudaVideoCodec_AV1;
-    m_parserParams.ulMaxNumDecodeSurfaces = 8;
+    m_parserParams.ulMaxNumDecodeSurfaces = 1;
+    m_parserParams.ulMaxDisplayDelay = 1;  // CRITICAL: Required for pfnDisplayPicture to be called
    m_parserParams.ulClockRate = 0; // Use default
    m_parserParams.ulErrorThreshold = 100;
    m_parserParams.pUserData = this;
@@ -668,6 +717,11 @@ bool NVDECAV1Decoder::CreateParser() {
        return false;
    }

+    char debug_buf[256];
+    sprintf_s(debug_buf, "[CreateParser] Parser created successfully! m_parser=%p, ulMaxDisplayDelay=%d\n",
+              m_parser, m_parserParams.ulMaxDisplayDelay);
+    OutputDebugStringA(debug_buf);
+
    return true;
 }

@@ -691,10 +745,12 @@ int CUDAAPI NVDECAV1Decoder::HandleVideoSequence(void* user_data, CUVIDEOFORMAT*
    }

    char debug_buf[512];
-    sprintf_s(debug_buf, "[NVDECAV1Decoder::HandleVideoSequence] Sequence: %dx%d ChromaFormat:%d BitDepth:%d\n",
+    sprintf_s(debug_buf, "[HandleVideoSequence] Sequence: %dx%d ChromaFormat:%d BitDepth:%d min_num_decode_surfaces:%d\n",
              format->coded_width, format->coded_height,
-              format->chroma_format, format->bit_depth_luma_minus8 + 8);
+              format->chroma_format, format->bit_depth_luma_minus8 + 8,
+              format->min_num_decode_surfaces);
    OutputDebugStringA(debug_buf);
+    printf("%s", debug_buf);

    // Check if decoder needs reconfiguration due to format change
    bool format_changed = false;
@@ -758,7 +814,14 @@ int CUDAAPI NVDECAV1Decoder::HandleVideoSequence(void* user_data, CUVIDEOFORMAT*
        }
    }

-    return 1; // Success
+    // Return min_num_decode_surfaces to update parser's ulMaxNumDecodeSurfaces
+    // This is critical for proper DPB (decode picture buffer) allocation
+    int return_value = (format->min_num_decode_surfaces > 1) ? format->min_num_decode_surfaces : 1;
+    sprintf_s(debug_buf, "[HandleVideoSequence] Returning %d to update ulMaxNumDecodeSurfaces\n", return_value);
+    OutputDebugStringA(debug_buf);
+    printf("%s", debug_buf);
+
+    return return_value;
 }

 int CUDAAPI NVDECAV1Decoder::HandlePictureDecode(void* user_data, CUVIDPICPARAMS* pic_params) {
@@ -822,13 +885,20 @@ int CUDAAPI NVDECAV1Decoder::HandlePictureDisplay(void* user_data, CUVIDPARSERDI
              slot_idx % decoder->RING_BUFFER_SIZE, disp_info->picture_index);
    OutputDebugStringA(debug_buf);

-    // Update slot's picture_index (polling thread will query this)
+    // IMPORTANT: pfnDisplayPicture is called AFTER GPU decoding completes
+    // So we can directly mark the frame as ready without polling
    {
        std::lock_guard<std::mutex> lock(slot.slot_mutex);
        slot.picture_index = disp_info->picture_index;
+        slot.is_ready = true;  // Frame is already decoded and ready
    }

-    // Note: Polling thread will detect this and signal slot.frame_ready when decode completes
+    // Signal waiting thread that frame is ready
+    slot.frame_ready.notify_one();
+
+    sprintf_s(debug_buf, "[HandlePictureDisplay] Slot %zu marked ready (picture_index=%d)\\n",
+              slot_idx % decoder->RING_BUFFER_SIZE, disp_info->picture_index);
+    OutputDebugStringA(debug_buf);

    return 1;
 }
@@ -1059,10 +1129,31 @@ bool NVDECAV1Decoder::DecodeToSurface(const uint8_t* packet_data, size_t packet_
        }

        // ===== Component 2: Packet Submission =====
-        // 4. Submit packet to NVDEC parser with slot index in timestamp
+        // 4. Prepare packet (prepend codec private data to first frame)
+        std::vector<uint8_t> combined_packet;
+        const uint8_t* final_packet_data = packet_data;
+        size_t final_packet_size = packet_size;
+
+        if (!m_firstFrameSent && m_codecPrivateData && m_codecPrivateSize > 0) {
+            // First frame: prepend codec private data (AV1 sequence header)
+            combined_packet.resize(m_codecPrivateSize + packet_size);
+            memcpy(combined_packet.data(), m_codecPrivateData, m_codecPrivateSize);
+            memcpy(combined_packet.data() + m_codecPrivateSize, packet_data, packet_size);
+
+            final_packet_data = combined_packet.data();
+            final_packet_size = combined_packet.size();
+            m_firstFrameSent = true;
+
+            sprintf_s(debug_buf, "[DecodeToSurface] First frame: prepended %zu bytes of codec private data (total: %zu bytes)\n",
+                      m_codecPrivateSize, final_packet_size);
+            OutputDebugStringA(debug_buf);
+            printf("%s", debug_buf);
+        }
+
+        // 5. Submit packet to NVDEC parser with slot index in timestamp
        CUVIDSOURCEDATAPACKET packet = {};
-        packet.payload = packet_data;
-        packet.payload_size = static_cast<unsigned long>(packet_size);
+        packet.payload = final_packet_data;
+        packet.payload_size = static_cast<unsigned long>(final_packet_size);
        packet.flags = CUVID_PKT_ENDOFPICTURE;
        packet.timestamp = static_cast<int64_t>(my_slot_idx);  // Embed slot index in timestamp

--- a/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
+++ b/vav2/platforms/windows/vavcore/src/Decoder/NVDECAV1Decoder.h
@@ -115,6 +115,11 @@ private:
    // Decoder configuration
    CUVIDPARSERPARAMS m_parserParams = {};

+    // Codec private data (AV1 sequence header from WebM)
+    const uint8_t* m_codecPrivateData = nullptr;
+    size_t m_codecPrivateSize = 0;
+    bool m_firstFrameSent = false;
+
    // Statistics
    uint64_t m_framesDecoded = 0;
    uint64_t m_decodeErrors = 0;
--- a/vav2/platforms/windows/vavcore/src/FileIO/WebMFileReader.cpp
+++ b/vav2/platforms/windows/vavcore/src/FileIO/WebMFileReader.cpp
@@ -673,6 +673,31 @@ bool WebMFileReader::ExtractVideoMetadata() {

    meta.file_path = m_state->file_path;

+    // Extract codec private data (AV1 sequence header) from track
+    const mkvparser::Tracks* tracks = m_state->segment->GetTracks();
+    if (tracks) {
+        const mkvparser::Track* track = tracks->GetTrackByNumber(m_state->selected_track_number);
+        if (track && track->GetType() == mkvparser::Track::kVideo) {
+            const mkvparser::VideoTrack* video_track = static_cast<const mkvparser::VideoTrack*>(track);
+
+            size_t codec_private_size = 0;
+            const unsigned char* codec_private_data = video_track->GetCodecPrivate(codec_private_size);
+
+            if (codec_private_data && codec_private_size > 0) {
+                meta.codec_private_data = codec_private_data;
+                meta.codec_private_size = codec_private_size;
+
+                char debug_buf[256];
+                sprintf_s(debug_buf, "[WebMFileReader] Extracted codec private data: %zu bytes\n", codec_private_size);
+                OutputDebugStringA(debug_buf);
+                printf("%s", debug_buf);
+            } else {
+                OutputDebugStringA("[WebMFileReader] WARNING: No codec private data found in WebM track\n");
+                printf("[WebMFileReader] WARNING: No codec private data found in WebM track\n");
+            }
+        }
+    }
+
    return meta.IsValid();
 }

--- a/vav2/platforms/windows/vavcore/src/VavCore.cpp
+++ b/vav2/platforms/windows/vavcore/src/VavCore.cpp
@@ -328,14 +328,8 @@ VAVCORE_API VavCoreResult vavcore_open_file(VavCorePlayer* player, const char* f
                OutputDebugStringA("[VavCore] AV1 track found! Selecting track...\n");
                if (player->impl->fileReader->SelectVideoTrack(track.track_number)) {
                    OutputDebugStringA("[VavCore] Track selected successfully\n");
-                    // Convert track info to VideoMetadata
-                    VideoMetadata metadata;
-                    metadata.width = track.width;
-                    metadata.height = track.height;
-                    metadata.frame_rate = track.frame_rate;
-                    metadata.total_frames = track.frame_count;
-                    metadata.codec_type = track.codec_type;
-                    player->impl->metadata = metadata;
+                    // Get full metadata from WebMFileReader (includes codec_private_data)
+                    player->impl->metadata = player->impl->fileReader->GetVideoMetadata();
                    foundAV1 = true;
                    break;
                }