mirror of
https://github.com/opencv/opencv.git
synced 2026-01-18 17:21:42 +01:00
Merge pull request #27878 from 0lekW:ffmpeg-negative-dts-seeking-fix
Fix frame seeking with negative DTS values in FFMPEG backend #27878 **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1289 Fixes https://github.com/opencv/opencv/issues/27819 Fixes https://github.com/opencv/opencv/issues/23472 Accompanied by PR on https://github.com/opencv/opencv_extra/pull/1289 The FFmpeg backend fails to correctly seek in H.264 videos that contain negative DTS values in their initial frames. This is a valid encoding practice used by modern video encoders (such as DaVinci Resolve's current export) where B-frame reordering causes the first few frames to have negative DTS values. When picture_pts is unavailable (AV_NOPTS_VALUE), the code falls back to using pkt_dts: `picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ ? packet_raw.pts : packet_raw.dts;` If this DTS value is negative (which is legal per H.264 spec), it propagates through the frame number calculation: `frame_number = dts_to_frame_number(picture_pts) - first_frame_number;` This results in negative frame numbers, messing up seeking operations. Solution implemented in this branch is a timestamp normalization similar to FFmpegs -avoid_negative_ts_make_zero flag: - Calculate a global offset once on the first decoded frame by getting the minimum timestamp in either: - Container start_time - Stream start_time - First observed timestamp (PTS, then DTS). - Apply the offset consistently to all timestamps, shifting negative values to begin at 0 while keeping relative timing. - Simplify timestamp converters to remove `start_time` subtractions since timestamps are pre-normalized. This also includes a new test `videoio_ffmpeg.seek_with_negative_dts` This test verifies that seeking behavior performs as expected on a file which has negative DTS values in the first frames. A PR on opencv_extra accompanies this one with that testing file: https://github.com/opencv/opencv_extra/pull/1279 ``` opencv_extra=ffmpeg-videoio-negative-dts-test-data ``` <cut/> ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
@@ -524,6 +524,17 @@ inline static std::string _opencv_ffmpeg_get_error_string(int error_code)
|
||||
return std::string("Unknown error");
|
||||
}
|
||||
|
||||
static inline int64_t to_avtb(int64_t ts, AVRational tb)
|
||||
{
|
||||
return av_rescale_q(ts, tb, AV_TIME_BASE_Q);
|
||||
}
|
||||
|
||||
static inline int64_t from_avtb(int64_t ts_avtb, AVRational tb)
|
||||
{
|
||||
return av_rescale_q(ts_avtb, AV_TIME_BASE_Q, tb);
|
||||
}
|
||||
|
||||
|
||||
struct CvCapture_FFMPEG
|
||||
{
|
||||
bool open(const char* filename, int index, const Ptr<IStreamReader>& stream, const VideoCaptureParameters& params);
|
||||
@@ -563,6 +574,10 @@ struct CvCapture_FFMPEG
|
||||
int64_t pts_in_fps_time_base;
|
||||
int64_t dts_delay_in_fps_time_base;
|
||||
|
||||
/// Timestamp offset in AV_TIME_BASE units for normalization
|
||||
int64_t ts_offset_avtb = 0;
|
||||
bool ts_offset_decided = false;
|
||||
|
||||
AVIOContext * avio_context;
|
||||
|
||||
AVPacket packet;
|
||||
@@ -623,6 +638,8 @@ void CvCapture_FFMPEG::init()
|
||||
picture_pts = AV_NOPTS_VALUE_;
|
||||
pts_in_fps_time_base = 0;
|
||||
dts_delay_in_fps_time_base = 0;
|
||||
ts_offset_avtb = 0;
|
||||
ts_offset_decided = false;
|
||||
first_frame_number = -1;
|
||||
memset( &rgb_picture, 0, sizeof(rgb_picture) );
|
||||
memset( &frame, 0, sizeof(frame) );
|
||||
@@ -1705,15 +1722,74 @@ bool CvCapture_FFMPEG::grabFrame()
|
||||
if (picture_pts == AV_NOPTS_VALUE_) {
|
||||
int64_t dts = 0;
|
||||
if (!rawMode) {
|
||||
picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts;
|
||||
if(frame_number == 0) dts = picture->pkt_dts;
|
||||
}
|
||||
else {
|
||||
const AVPacket& packet_raw = packet.data != 0 ? packet : packet_filtered;
|
||||
picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ && packet_raw.pts != 0 ? packet_raw.pts : packet_raw.dts;
|
||||
picture_pts = (picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_)
|
||||
? picture->CV_FFMPEG_PTS_FIELD
|
||||
: picture->pkt_dts;
|
||||
if (frame_number == 0) dts = picture->pkt_dts;
|
||||
} else {
|
||||
const AVPacket& packet_raw = (packet.data != 0) ? packet : packet_filtered;
|
||||
picture_pts = (packet_raw.pts != AV_NOPTS_VALUE_)
|
||||
? packet_raw.pts
|
||||
: packet_raw.dts;
|
||||
if (frame_number == 0) dts = packet_raw.dts;
|
||||
if (picture_pts < 0) picture_pts = 0;
|
||||
}
|
||||
|
||||
// Decide timestamp offset once on first frame to normalize all timestamps to start at zero.
|
||||
// This handles videos with negative DTS values (e.g., from B-frame reordering) or non-zero
|
||||
// start_time. Similar to FFmpeg's -avoid_negative_ts make_zero option.
|
||||
if (!ts_offset_decided)
|
||||
{
|
||||
int64_t min_start_avtb = INT64_MAX;
|
||||
|
||||
// Check container start_time (already in AV_TIME_BASE units)
|
||||
if (ic && ic->start_time != AV_NOPTS_VALUE_)
|
||||
{
|
||||
min_start_avtb = ic->start_time;
|
||||
}
|
||||
|
||||
// Check stream start_time
|
||||
AVStream* st = ic->streams[video_stream];
|
||||
if (st->start_time != AV_NOPTS_VALUE_)
|
||||
{
|
||||
int64_t s = to_avtb(st->start_time, st->time_base);
|
||||
if (s < min_start_avtb) min_start_avtb = s;
|
||||
}
|
||||
|
||||
// Check first observed timestamp (PTS preferred, else DTS from frame 0)
|
||||
int64_t first_ts_stream = picture_pts;
|
||||
if (first_ts_stream == AV_NOPTS_VALUE_ && dts != AV_NOPTS_VALUE_)
|
||||
{
|
||||
first_ts_stream = dts;
|
||||
}
|
||||
if (first_ts_stream != AV_NOPTS_VALUE_)
|
||||
{
|
||||
int64_t t = to_avtb(first_ts_stream, st->time_base);
|
||||
if (t < min_start_avtb) min_start_avtb = t;
|
||||
}
|
||||
|
||||
// Compute offset to shift negative timestamps to zero
|
||||
ts_offset_avtb = (min_start_avtb != INT64_MAX && min_start_avtb < 0) ? -min_start_avtb : 0;
|
||||
ts_offset_decided = true;
|
||||
}
|
||||
|
||||
// Apply normalization to picture_pts
|
||||
if (picture_pts != AV_NOPTS_VALUE_)
|
||||
{
|
||||
int64_t t = to_avtb(picture_pts, video_st->time_base);
|
||||
t += ts_offset_avtb;
|
||||
picture_pts = from_avtb(t, video_st->time_base);
|
||||
}
|
||||
|
||||
// Also normalize dts
|
||||
if (dts != AV_NOPTS_VALUE_)
|
||||
{
|
||||
int64_t t = to_avtb(dts, video_st->time_base);
|
||||
t += ts_offset_avtb;
|
||||
dts = from_avtb(t, video_st->time_base);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54, 1, 0) || LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
|
||||
AVRational frame_rate = video_st->avg_frame_rate;
|
||||
#else
|
||||
@@ -2120,8 +2196,13 @@ int64_t CvCapture_FFMPEG::dts_to_frame_number(int64_t dts)
|
||||
|
||||
double CvCapture_FFMPEG::dts_to_sec(int64_t dts) const
|
||||
{
|
||||
return (double)(dts - ic->streams[video_stream]->start_time) *
|
||||
r2d(ic->streams[video_stream]->time_base);
|
||||
const AVStream* st = ic->streams[video_stream];
|
||||
int64_t ts = dts;
|
||||
|
||||
if (ts_offset_avtb == 0 && st->start_time != AV_NOPTS_VALUE_)
|
||||
ts -= st->start_time;
|
||||
|
||||
return ts * r2d(st->time_base);
|
||||
}
|
||||
|
||||
void CvCapture_FFMPEG::get_rotation_angle()
|
||||
@@ -2174,9 +2255,19 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number)
|
||||
{
|
||||
int64_t _frame_number_temp = std::max(_frame_number-delta, (int64_t)0);
|
||||
double sec = (double)_frame_number_temp / get_fps();
|
||||
int64_t time_stamp = ic->streams[video_stream]->start_time;
|
||||
double time_base = r2d(ic->streams[video_stream]->time_base);
|
||||
time_stamp += (int64_t)(sec / time_base + 0.5);
|
||||
|
||||
AVStream* st = ic->streams[video_stream];
|
||||
int64_t time_stamp = st->start_time;
|
||||
double time_base = r2d(st->time_base);
|
||||
int64_t ts_norm = (int64_t)(sec / time_base + 0.5);
|
||||
|
||||
if (ts_offset_avtb != 0) {
|
||||
// map normalized target back to original demux timeline
|
||||
time_stamp += ts_norm - from_avtb(ts_offset_avtb, st->time_base);
|
||||
} else {
|
||||
time_stamp += ts_norm;
|
||||
}
|
||||
|
||||
if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD);
|
||||
if(!rawMode)
|
||||
avcodec_flush_buffers(context);
|
||||
|
||||
@@ -1055,6 +1055,63 @@ TEST(ffmpeg_cap_properties, set_pos_get_msec)
|
||||
EXPECT_EQ(cap.get(CAP_PROP_POS_MSEC), 0.0);
|
||||
}
|
||||
|
||||
// Test that seeking twice to the same frame in videos with negative DTS
|
||||
// does not result in negative position or timestamp values
|
||||
// related issue: https://github.com/opencv/opencv/issues/27819
|
||||
TEST(videoio_ffmpeg, seek_with_negative_dts)
|
||||
{
|
||||
if (!videoio_registry::hasBackend(CAP_FFMPEG))
|
||||
throw SkipTestException("FFmpeg backend was not found");
|
||||
|
||||
const std::string filename = findDataFile("video/negdts_h264.mp4");
|
||||
VideoCapture cap(filename, CAP_FFMPEG);
|
||||
|
||||
if (!cap.isOpened())
|
||||
throw SkipTestException("Video stream is not supported");
|
||||
|
||||
// after open, a single grab() should not yield negative POS_MSEC.
|
||||
ASSERT_TRUE(cap.grab());
|
||||
EXPECT_GE(cap.get(CAP_PROP_POS_MSEC), 0.0) << "Negative ts immediately after open+grab()";
|
||||
|
||||
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 0));
|
||||
(void)cap.get(CAP_PROP_POS_FRAMES);
|
||||
|
||||
const int framesToProbe[] = {2, 3, 4, 5};
|
||||
|
||||
for (int f : framesToProbe)
|
||||
{
|
||||
// Reset to frame 0
|
||||
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 0));
|
||||
cap.get(CAP_PROP_POS_FRAMES);
|
||||
|
||||
// Seek to target frame
|
||||
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, f));
|
||||
const double posAfterFirstSeek = cap.get(CAP_PROP_POS_FRAMES);
|
||||
|
||||
// Seek to the same frame again
|
||||
ASSERT_TRUE(cap.set(CAP_PROP_POS_FRAMES, f));
|
||||
const double posAfterSecondSeek = cap.get(CAP_PROP_POS_FRAMES);
|
||||
const double tsAfterSecondSeek = cap.get(CAP_PROP_POS_MSEC);
|
||||
|
||||
EXPECT_GE(posAfterSecondSeek, 0)
|
||||
<< "Frame index became negative after second seek to frame " << f
|
||||
<< " (first seek gave " << posAfterFirstSeek << ")";
|
||||
EXPECT_GE(tsAfterSecondSeek, 0.0)
|
||||
<< "Timestamp became negative after second seek to frame " << f;
|
||||
|
||||
// Per-iteration decode check: grab() + ts non-negative
|
||||
ASSERT_TRUE(cap.grab());
|
||||
EXPECT_GE(cap.get(CAP_PROP_POS_MSEC), 0.0) << "Negative timestamp after grab() at frame " << f;
|
||||
|
||||
// Verify that reading a frame works and position advances
|
||||
Mat frame;
|
||||
ASSERT_TRUE(cap.read(frame));
|
||||
ASSERT_FALSE(frame.empty());
|
||||
EXPECT_GE(cap.get(CAP_PROP_POS_FRAMES), f);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // WIN32
|
||||
|
||||
|
||||
}} // namespace
|
||||
|
||||
Reference in New Issue
Block a user