0
0
mirror of https://github.com/opencv/opencv.git synced 2026-01-18 17:21:42 +01:00

Merge pull request #27878 from 0lekW:ffmpeg-negative-dts-seeking-fix

Fix frame seeking with negative DTS values in FFMPEG backend #27878

**Merge with extra**: https://github.com/opencv/opencv_extra/pull/1289

Fixes https://github.com/opencv/opencv/issues/27819
Fixes https://github.com/opencv/opencv/issues/23472
Accompanied by PR on https://github.com/opencv/opencv_extra/pull/1289

The FFmpeg backend fails to correctly seek in H.264 videos that contain negative DTS values in their initial frames. This is a valid encoding practice used by modern video encoders (such as DaVinci Resolve's current export) where B-frame reordering causes the first few frames to have negative DTS values.

When picture_pts is unavailable (AV_NOPTS_VALUE), the code falls back to using pkt_dts:
`picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ ? packet_raw.pts : packet_raw.dts;`

If this DTS value is negative (which is legal per H.264 spec), it propagates through the frame number calculation:
`frame_number = dts_to_frame_number(picture_pts) - first_frame_number;`

This results in negative frame numbers, messing up seeking operations.

Solution implemented in this branch is a timestamp normalization similar to FFmpegs -avoid_negative_ts_make_zero flag:
- Calculate a global offset once on the first decoded frame by getting the minimum timestamp in either:
    - Container start_time
    - Stream start_time
    - First observed timestamp (PTS, then DTS).
- Apply the offset consistently to all timestamps, shifting negative values to begin at 0 while keeping relative timing.
- Simplify timestamp converters to remove `start_time` subtractions since timestamps are pre-normalized.

This also includes a new test `videoio_ffmpeg.seek_with_negative_dts`
This test verifies that seeking behavior performs as expected on a file which has negative DTS values in the first frames. 
A PR on opencv_extra accompanies this one with that testing file: https://github.com/opencv/opencv_extra/pull/1279

```
opencv_extra=ffmpeg-videoio-negative-dts-test-data
```

<cut/>

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Alex
2025-12-16 21:38:51 +13:00
committed by GitHub
parent 33ebceddb2
commit 7ca9d9ce03
2 changed files with 160 additions and 12 deletions

View File

@@ -524,6 +524,17 @@ inline static std::string _opencv_ffmpeg_get_error_string(int error_code)
return std::string("Unknown error");
}
static inline int64_t to_avtb(int64_t ts, AVRational tb)
{
return av_rescale_q(ts, tb, AV_TIME_BASE_Q);
}
static inline int64_t from_avtb(int64_t ts_avtb, AVRational tb)
{
return av_rescale_q(ts_avtb, AV_TIME_BASE_Q, tb);
}
struct CvCapture_FFMPEG
{
bool open(const char* filename, int index, const Ptr<IStreamReader>& stream, const VideoCaptureParameters& params);
@@ -563,6 +574,10 @@ struct CvCapture_FFMPEG
int64_t pts_in_fps_time_base;
int64_t dts_delay_in_fps_time_base;
/// Timestamp offset in AV_TIME_BASE units for normalization
int64_t ts_offset_avtb = 0;
bool ts_offset_decided = false;
AVIOContext * avio_context;
AVPacket packet;
@@ -623,6 +638,8 @@ void CvCapture_FFMPEG::init()
picture_pts = AV_NOPTS_VALUE_;
pts_in_fps_time_base = 0;
dts_delay_in_fps_time_base = 0;
ts_offset_avtb = 0;
ts_offset_decided = false;
first_frame_number = -1;
memset( &rgb_picture, 0, sizeof(rgb_picture) );
memset( &frame, 0, sizeof(frame) );
@@ -1705,15 +1722,74 @@ bool CvCapture_FFMPEG::grabFrame()
if (picture_pts == AV_NOPTS_VALUE_) {
int64_t dts = 0;
if (!rawMode) {
picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts;
if(frame_number == 0) dts = picture->pkt_dts;
}
else {
const AVPacket& packet_raw = packet.data != 0 ? packet : packet_filtered;
picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ && packet_raw.pts != 0 ? packet_raw.pts : packet_raw.dts;
picture_pts = (picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_)
? picture->CV_FFMPEG_PTS_FIELD
: picture->pkt_dts;
if (frame_number == 0) dts = picture->pkt_dts;
} else {
const AVPacket& packet_raw = (packet.data != 0) ? packet : packet_filtered;
picture_pts = (packet_raw.pts != AV_NOPTS_VALUE_)
? packet_raw.pts
: packet_raw.dts;
if (frame_number == 0) dts = packet_raw.dts;
if (picture_pts < 0) picture_pts = 0;
}
// Decide timestamp offset once on first frame to normalize all timestamps to start at zero.
// This handles videos with negative DTS values (e.g., from B-frame reordering) or non-zero
// start_time. Similar to FFmpeg's -avoid_negative_ts make_zero option.
if (!ts_offset_decided)
{
int64_t min_start_avtb = INT64_MAX;
// Check container start_time (already in AV_TIME_BASE units)
if (ic && ic->start_time != AV_NOPTS_VALUE_)
{
min_start_avtb = ic->start_time;
}
// Check stream start_time
AVStream* st = ic->streams[video_stream];
if (st->start_time != AV_NOPTS_VALUE_)
{
int64_t s = to_avtb(st->start_time, st->time_base);
if (s < min_start_avtb) min_start_avtb = s;
}
// Check first observed timestamp (PTS preferred, else DTS from frame 0)
int64_t first_ts_stream = picture_pts;
if (first_ts_stream == AV_NOPTS_VALUE_ && dts != AV_NOPTS_VALUE_)
{
first_ts_stream = dts;
}
if (first_ts_stream != AV_NOPTS_VALUE_)
{
int64_t t = to_avtb(first_ts_stream, st->time_base);
if (t < min_start_avtb) min_start_avtb = t;
}
// Compute offset to shift negative timestamps to zero
ts_offset_avtb = (min_start_avtb != INT64_MAX && min_start_avtb < 0) ? -min_start_avtb : 0;
ts_offset_decided = true;
}
// Apply normalization to picture_pts
if (picture_pts != AV_NOPTS_VALUE_)
{
int64_t t = to_avtb(picture_pts, video_st->time_base);
t += ts_offset_avtb;
picture_pts = from_avtb(t, video_st->time_base);
}
// Also normalize dts
if (dts != AV_NOPTS_VALUE_)
{
int64_t t = to_avtb(dts, video_st->time_base);
t += ts_offset_avtb;
dts = from_avtb(t, video_st->time_base);
}
#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54, 1, 0) || LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
AVRational frame_rate = video_st->avg_frame_rate;
#else
@@ -2120,8 +2196,13 @@ int64_t CvCapture_FFMPEG::dts_to_frame_number(int64_t dts)
double CvCapture_FFMPEG::dts_to_sec(int64_t dts) const
{
return (double)(dts - ic->streams[video_stream]->start_time) *
r2d(ic->streams[video_stream]->time_base);
const AVStream* st = ic->streams[video_stream];
int64_t ts = dts;
if (ts_offset_avtb == 0 && st->start_time != AV_NOPTS_VALUE_)
ts -= st->start_time;
return ts * r2d(st->time_base);
}
void CvCapture_FFMPEG::get_rotation_angle()
@@ -2174,9 +2255,19 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number)
{
int64_t _frame_number_temp = std::max(_frame_number-delta, (int64_t)0);
double sec = (double)_frame_number_temp / get_fps();
int64_t time_stamp = ic->streams[video_stream]->start_time;
double time_base = r2d(ic->streams[video_stream]->time_base);
time_stamp += (int64_t)(sec / time_base + 0.5);
AVStream* st = ic->streams[video_stream];
int64_t time_stamp = st->start_time;
double time_base = r2d(st->time_base);
int64_t ts_norm = (int64_t)(sec / time_base + 0.5);
if (ts_offset_avtb != 0) {
// map normalized target back to original demux timeline
time_stamp += ts_norm - from_avtb(ts_offset_avtb, st->time_base);
} else {
time_stamp += ts_norm;
}
if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD);
if(!rawMode)
avcodec_flush_buffers(context);

View File

@@ -1055,6 +1055,63 @@ TEST(ffmpeg_cap_properties, set_pos_get_msec)
EXPECT_EQ(cap.get(CAP_PROP_POS_MSEC), 0.0);
}
// Test that seeking twice to the same frame in videos with negative DTS
// does not result in negative position or timestamp values
// related issue: https://github.com/opencv/opencv/issues/27819
TEST(videoio_ffmpeg, seek_with_negative_dts)
{
if (!videoio_registry::hasBackend(CAP_FFMPEG))
throw SkipTestException("FFmpeg backend was not found");
const std::string filename = findDataFile("video/negdts_h264.mp4");
VideoCapture cap(filename, CAP_FFMPEG);
if (!cap.isOpened())
throw SkipTestException("Video stream is not supported");
// after open, a single grab() should not yield negative POS_MSEC.
ASSERT_TRUE(cap.grab());
EXPECT_GE(cap.get(CAP_PROP_POS_MSEC), 0.0) << "Negative ts immediately after open+grab()";
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 0));
(void)cap.get(CAP_PROP_POS_FRAMES);
const int framesToProbe[] = {2, 3, 4, 5};
for (int f : framesToProbe)
{
// Reset to frame 0
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 0));
cap.get(CAP_PROP_POS_FRAMES);
// Seek to target frame
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, f));
const double posAfterFirstSeek = cap.get(CAP_PROP_POS_FRAMES);
// Seek to the same frame again
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, f));
const double posAfterSecondSeek = cap.get(CAP_PROP_POS_FRAMES);
const double tsAfterSecondSeek = cap.get(CAP_PROP_POS_MSEC);
EXPECT_GE(posAfterSecondSeek, 0)
<< "Frame index became negative after second seek to frame " << f
<< " (first seek gave " << posAfterFirstSeek << ")";
EXPECT_GE(tsAfterSecondSeek, 0.0)
<< "Timestamp became negative after second seek to frame " << f;
// Per-iteration decode check: grab() + ts non-negative
ASSERT_TRUE(cap.grab());
EXPECT_GE(cap.get(CAP_PROP_POS_MSEC), 0.0) << "Negative timestamp after grab() at frame " << f;
// Verify that reading a frame works and position advances
Mat frame;
ASSERT_TRUE(cap.read(frame));
ASSERT_FALSE(frame.empty());
EXPECT_GE(cap.get(CAP_PROP_POS_FRAMES), f);
}
}
#endif // WIN32
}} // namespace