Merge pull request #27878 from 0lekW:ffmpeg-negative-dts-seeking-fix

Fix frame seeking with negative DTS values in FFMPEG backend #27878 **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1289 Fixes https://github.com/opencv/opencv/issues/27819 Fixes https://github.com/opencv/opencv/issues/23472 Accompanied by PR on https://github.com/opencv/opencv_extra/pull/1289 The FFmpeg backend fails to correctly seek in H.264 videos that contain negative DTS values in their initial frames. This is a valid encoding practice used by modern video encoders (such as DaVinci Resolve's current export) where B-frame reordering causes the first few frames to have negative DTS values. When picture_pts is unavailable (AV_NOPTS_VALUE), the code falls back to using pkt_dts: `picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ ? packet_raw.pts : packet_raw.dts;` If this DTS value is negative (which is legal per H.264 spec), it propagates through the frame number calculation: `frame_number = dts_to_frame_number(picture_pts) - first_frame_number;` This results in negative frame numbers, messing up seeking operations. Solution implemented in this branch is a timestamp normalization similar to FFmpegs -avoid_negative_ts_make_zero flag: - Calculate a global offset once on the first decoded frame by getting the minimum timestamp in either: - Container start_time - Stream start_time - First observed timestamp (PTS, then DTS). - Apply the offset consistently to all timestamps, shifting negative values to begin at 0 while keeping relative timing. - Simplify timestamp converters to remove `start_time` subtractions since timestamps are pre-normalized. This also includes a new test `videoio_ffmpeg.seek_with_negative_dts` This test verifies that seeking behavior performs as expected on a file which has negative DTS values in the first frames. A PR on opencv_extra accompanies this one with that testing file: https://github.com/opencv/opencv_extra/pull/1279 ``` opencv_extra=ffmpeg-videoio-negative-dts-test-data ``` <cut/> ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2026-01-18 17:21:42 +01:00 · 2025-12-16 21:38:51 +13:00
parent 33ebceddb2
commit 7ca9d9ce03
2 changed files with 160 additions and 12 deletions
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -524,6 +524,17 @@ inline static std::string _opencv_ffmpeg_get_error_string(int error_code)
        return std::string("Unknown error");
 }

+static inline int64_t to_avtb(int64_t ts, AVRational tb)
+{
+    return av_rescale_q(ts, tb, AV_TIME_BASE_Q);
+}
+
+static inline int64_t from_avtb(int64_t ts_avtb, AVRational tb)
+{
+    return av_rescale_q(ts_avtb, AV_TIME_BASE_Q, tb);
+}
+
+
 struct CvCapture_FFMPEG
 {
    bool open(const char* filename, int index, const Ptr<IStreamReader>& stream, const VideoCaptureParameters& params);
@@ -563,6 +574,10 @@ struct CvCapture_FFMPEG
    int64_t           pts_in_fps_time_base;
    int64_t           dts_delay_in_fps_time_base;

+    /// Timestamp offset in AV_TIME_BASE units for normalization
+    int64_t ts_offset_avtb = 0;
+    bool    ts_offset_decided = false;
+
    AVIOContext     * avio_context;

    AVPacket          packet;
@@ -623,6 +638,8 @@ void CvCapture_FFMPEG::init()
    picture_pts = AV_NOPTS_VALUE_;
    pts_in_fps_time_base = 0;
    dts_delay_in_fps_time_base = 0;
+    ts_offset_avtb = 0;
+    ts_offset_decided = false;
    first_frame_number = -1;
    memset( &rgb_picture, 0, sizeof(rgb_picture) );
    memset( &frame, 0, sizeof(frame) );
@@ -1705,15 +1722,74 @@ bool CvCapture_FFMPEG::grabFrame()
        if (picture_pts == AV_NOPTS_VALUE_) {
            int64_t dts = 0;
            if (!rawMode) {
-                picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts;
-                if(frame_number == 0) dts = picture->pkt_dts;
-            }
-            else {
-                const AVPacket& packet_raw = packet.data != 0 ? packet : packet_filtered;
-                picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ && packet_raw.pts != 0 ? packet_raw.pts : packet_raw.dts;
+                picture_pts = (picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_)
+                                ? picture->CV_FFMPEG_PTS_FIELD
+                                : picture->pkt_dts;
+                if (frame_number == 0) dts = picture->pkt_dts;
+            } else {
+                const AVPacket& packet_raw = (packet.data != 0) ? packet : packet_filtered;
+                picture_pts = (packet_raw.pts != AV_NOPTS_VALUE_)
+                                ? packet_raw.pts
+                                : packet_raw.dts;
                if (frame_number == 0) dts = packet_raw.dts;
-                if (picture_pts < 0) picture_pts = 0;
            }
+
+            // Decide timestamp offset once on first frame to normalize all timestamps to start at zero.
+            // This handles videos with negative DTS values (e.g., from B-frame reordering) or non-zero
+            // start_time. Similar to FFmpeg's -avoid_negative_ts make_zero option.
+            if (!ts_offset_decided)
+            {
+                int64_t min_start_avtb = INT64_MAX;
+
+                // Check container start_time (already in AV_TIME_BASE units)
+                if (ic && ic->start_time != AV_NOPTS_VALUE_)
+                {
+                    min_start_avtb = ic->start_time;
+                }
+
+                // Check stream start_time
+                AVStream* st = ic->streams[video_stream];
+                if (st->start_time != AV_NOPTS_VALUE_)
+                {
+                    int64_t s = to_avtb(st->start_time, st->time_base);
+                    if (s < min_start_avtb) min_start_avtb = s;
+                }
+
+                // Check first observed timestamp (PTS preferred, else DTS from frame 0)
+                int64_t first_ts_stream = picture_pts;
+                if (first_ts_stream == AV_NOPTS_VALUE_ && dts != AV_NOPTS_VALUE_)
+                {
+                    first_ts_stream = dts;
+                }
+                if (first_ts_stream != AV_NOPTS_VALUE_)
+                {
+                    int64_t t = to_avtb(first_ts_stream, st->time_base);
+                    if (t < min_start_avtb) min_start_avtb = t;
+                }
+
+                // Compute offset to shift negative timestamps to zero
+                ts_offset_avtb = (min_start_avtb != INT64_MAX && min_start_avtb < 0) ? -min_start_avtb : 0;
+                ts_offset_decided = true;
+            }
+
+            // Apply normalization to picture_pts
+            if (picture_pts != AV_NOPTS_VALUE_)
+            {
+                int64_t t = to_avtb(picture_pts, video_st->time_base);
+                t += ts_offset_avtb;
+                picture_pts = from_avtb(t, video_st->time_base);
+            }
+
+            // Also normalize dts
+            if (dts != AV_NOPTS_VALUE_)
+            {
+                int64_t t = to_avtb(dts, video_st->time_base);
+                t += ts_offset_avtb;
+                dts = from_avtb(t, video_st->time_base);
+            }
+
+
+
 #if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54, 1, 0) || LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
            AVRational frame_rate = video_st->avg_frame_rate;
 #else
@@ -2120,8 +2196,13 @@ int64_t CvCapture_FFMPEG::dts_to_frame_number(int64_t dts)

 double CvCapture_FFMPEG::dts_to_sec(int64_t dts) const
 {
-    return (double)(dts - ic->streams[video_stream]->start_time) *
-        r2d(ic->streams[video_stream]->time_base);
+    const AVStream* st = ic->streams[video_stream];
+    int64_t ts = dts;
+
+    if (ts_offset_avtb == 0 && st->start_time != AV_NOPTS_VALUE_)
+        ts -= st->start_time;
+
+    return ts * r2d(st->time_base);
 }

 void CvCapture_FFMPEG::get_rotation_angle()
@@ -2174,9 +2255,19 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number)
    {
        int64_t _frame_number_temp = std::max(_frame_number-delta, (int64_t)0);
        double sec = (double)_frame_number_temp / get_fps();
-        int64_t time_stamp = ic->streams[video_stream]->start_time;
-        double  time_base  = r2d(ic->streams[video_stream]->time_base);
-        time_stamp += (int64_t)(sec / time_base + 0.5);
+
+        AVStream* st = ic->streams[video_stream];
+        int64_t time_stamp = st->start_time;
+        double  time_base  = r2d(st->time_base);
+        int64_t ts_norm = (int64_t)(sec / time_base + 0.5);
+
+        if (ts_offset_avtb != 0) {
+            // map normalized target back to original demux timeline
+            time_stamp += ts_norm - from_avtb(ts_offset_avtb, st->time_base);
+        } else {
+            time_stamp += ts_norm;
+        }
+
        if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD);
        if(!rawMode)
            avcodec_flush_buffers(context);
--- a/modules/videoio/test/test_ffmpeg.cpp
+++ b/modules/videoio/test/test_ffmpeg.cpp
@@ -1055,6 +1055,63 @@ TEST(ffmpeg_cap_properties, set_pos_get_msec)
    EXPECT_EQ(cap.get(CAP_PROP_POS_MSEC), 0.0);
 }

+// Test that seeking twice to the same frame in videos with negative DTS
+// does not result in negative position or timestamp values
+// related issue: https://github.com/opencv/opencv/issues/27819
+TEST(videoio_ffmpeg, seek_with_negative_dts)
+{
+    if (!videoio_registry::hasBackend(CAP_FFMPEG))
+        throw SkipTestException("FFmpeg backend was not found");
+
+    const std::string filename = findDataFile("video/negdts_h264.mp4");
+    VideoCapture cap(filename, CAP_FFMPEG);
+
+    if (!cap.isOpened())
+        throw SkipTestException("Video stream is not supported");
+
+    // after open, a single grab() should not yield negative POS_MSEC.
+    ASSERT_TRUE(cap.grab());
+    EXPECT_GE(cap.get(CAP_PROP_POS_MSEC), 0.0) << "Negative ts immediately after open+grab()";
+
+    ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 0));
+    (void)cap.get(CAP_PROP_POS_FRAMES);
+
+    const int framesToProbe[] = {2, 3, 4, 5};
+
+    for (int f : framesToProbe)
+    {
+        // Reset to frame 0
+        ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 0));
+        cap.get(CAP_PROP_POS_FRAMES);
+
+        // Seek to target frame
+        ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, f));
+        const double posAfterFirstSeek = cap.get(CAP_PROP_POS_FRAMES);
+
+        // Seek to the same frame again
+        ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, f));
+        const double posAfterSecondSeek = cap.get(CAP_PROP_POS_FRAMES);
+        const double tsAfterSecondSeek = cap.get(CAP_PROP_POS_MSEC);
+
+        EXPECT_GE(posAfterSecondSeek, 0)
+            << "Frame index became negative after second seek to frame " << f
+            << " (first seek gave " << posAfterFirstSeek << ")";
+        EXPECT_GE(tsAfterSecondSeek, 0.0)
+            << "Timestamp became negative after second seek to frame " << f;
+
+        // Per-iteration decode check: grab() + ts non-negative
+        ASSERT_TRUE(cap.grab());
+        EXPECT_GE(cap.get(CAP_PROP_POS_MSEC), 0.0) << "Negative timestamp after grab() at frame " << f;
+
+        // Verify that reading a frame works and position advances
+        Mat frame;
+        ASSERT_TRUE(cap.read(frame));
+        ASSERT_FALSE(frame.empty());
+        EXPECT_GE(cap.get(CAP_PROP_POS_FRAMES), f);
+    }
+}
+
 #endif // WIN32

+
 }} // namespace