In C++ code, I can correctly save a serial of images(opencv's cv::Mat) to a mp4 file by using ffmpeg library, see the question and answer here: avformat_write_header() function call crashed when I try to save several RGB data to a output.mp4 file
Now here comes another question:
Rotem' answer in that question can have the output.mp4 saved correctly. When play the mp4 file, I see the frames(OpenCV's cv::Mat image) shows in a const rate.
What I can do if I have got the frames not in a const frequency, for example, the I got the first frame at the 0ms, and the second frame at the 50ms the third frame at 75ms, so that the each frame as associated time stamp, for example, those time stamp array are something like below:
int timestamp[100] = {0, 50, 75, ...};
What is the method to modify the Rotem's answer to reflect this? It looks like I have to change the pts field of each frame. Because I just test the code, if I change this:
yuvFrame->pts = av_rescale_q(frame_count*frame_count, outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp
// note I change from frame_count to frame_count*frame_count
Then the output.mp4 plays slower and slower, because the later frame has large pts values.
Thanks.
EDIT
This is the code I'm currently used:
#include <iostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <opencv2/opencv.hpp>
extern "C" {
#include <libavutil/imgutils.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
}
#include<cstdlib> // to generate time stamps
using namespace std;
using namespace cv;
int main()
{
// Set up input frames as BGR byte arrays
vector<Mat> frames;
int width = 640;
int height = 480;
int num_frames = 100;
Scalar black(0, 0, 0);
Scalar white(255, 255, 255);
int font = FONT_HERSHEY_SIMPLEX;
double font_scale = 1.0;
int thickness = 2;
for (int i = 0; i < num_frames; i++) {
Mat frame = Mat::zeros(height, width, CV_8UC3);
putText(frame, std::to_string(i), Point(width / 2 - 50, height / 2), font, font_scale, white, thickness);
frames.push_back(frame);
}
// generate a serial of time stamps which is used to set the PTS value
// suppose they are in ms unit, the time interval is between 30ms to 59ms
vector<int> timestamps;
for (int i = 0; i < num_frames; i++) {
int timestamp;
if (i == 0)
timestamp = 0;
else
{
int random = 30 + (rand() % 30);
timestamp = timestamps[i-0] + random;
}
timestamps.push_back(timestamp);
}
// Populate frames with BGR byte arrays
// Initialize FFmpeg
//av_register_all();
// Set up output file
AVFormatContext* outFormatCtx = nullptr;
//AVCodec* outCodec = nullptr;
AVCodecContext* outCodecCtx = nullptr;
//AVStream* outStream = nullptr;
//AVPacket outPacket;
const char* outFile = "output.mp4";
int outWidth = frames[0].cols;
int outHeight = frames[0].rows;
int fps = 25;
// Open the output file context
avformat_alloc_output_context2(&outFormatCtx, nullptr, nullptr, outFile);
if (!outFormatCtx) {
cerr << "Error: Could not allocate output format context" << endl;
return -1;
}
// Open the output file
if (avio_open(&outFormatCtx->pb, outFile, AVIO_FLAG_WRITE) < 0) {
cerr << "Error opening output file" << std::endl;
return -1;
}
// Set up output codec
const AVCodec* outCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (!outCodec) {
cerr << "Error: Could not find H.264 codec" << endl;
return -1;
}
outCodecCtx = avcodec_alloc_context3(outCodec);
if (!outCodecCtx) {
cerr << "Error: Could not allocate output codec context" << endl;
return -1;
}
outCodecCtx->codec_id = AV_CODEC_ID_H264;
outCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
outCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
outCodecCtx->width = outWidth;
outCodecCtx->height = outHeight;
outCodecCtx->time_base = { 1, fps*1000 }; // 25000
outCodecCtx->framerate = {fps, 1}; // 25
outCodecCtx->bit_rate = 4000000;
//https://github.com/leandromoreira/ffmpeg-libav-tutorial
//We set the flag AV_CODEC_FLAG_GLOBAL_HEADER which tells the encoder that it can use the global headers.
if (outFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
{
outCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; //
}
// Open output codec
if (avcodec_open2(outCodecCtx, outCodec, nullptr) < 0) {
cerr << "Error: Could not open output codec" << endl;
return -1;
}
// Create output stream
AVStream* outStream = avformat_new_stream(outFormatCtx, outCodec);
if (!outStream) {
cerr << "Error: Could not allocate output stream" << endl;
return -1;
}
// Configure output stream parameters (e.g., time base, codec parameters, etc.)
// ...
// Connect output stream to format context
outStream->codecpar->codec_id = outCodecCtx->codec_id;
outStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
outStream->codecpar->width = outCodecCtx->width;
outStream->codecpar->height = outCodecCtx->height;
outStream->codecpar->format = outCodecCtx->pix_fmt;
outStream->time_base = outCodecCtx->time_base;
int ret = avcodec_parameters_from_context(outStream->codecpar, outCodecCtx);
if (ret < 0) {
cerr << "Error: Could not copy codec parameters to output stream" << endl;
return -1;
}
outStream->avg_frame_rate = outCodecCtx->framerate;
//outStream->id = outFormatCtx->nb_streams++; <--- We shouldn't modify outStream->id
ret = avformat_write_header(outFormatCtx, nullptr);
if (ret < 0) {
cerr << "Error: Could not write output header" << endl;
return -1;
}
// Convert frames to YUV format and write to output file
int frame_count = -1;
for (const auto& frame : frames) {
frame_count++;
AVFrame* yuvFrame = av_frame_alloc();
if (!yuvFrame) {
cerr << "Error: Could not allocate YUV frame" << endl;
return -1;
}
av_image_alloc(yuvFrame->data, yuvFrame->linesize, outWidth, outHeight, AV_PIX_FMT_YUV420P, 32);
yuvFrame->width = outWidth;
yuvFrame->height = outHeight;
yuvFrame->format = AV_PIX_FMT_YUV420P;
// Convert BGR frame to YUV format
Mat yuvMat;
cvtColor(frame, yuvMat, COLOR_BGR2YUV_I420);
memcpy(yuvFrame->data[0], yuvMat.data, outWidth * outHeight);
memcpy(yuvFrame->data[1], yuvMat.data + outWidth * outHeight, outWidth * outHeight / 4);
memcpy(yuvFrame->data[2], yuvMat.data + outWidth * outHeight * 5 / 4, outWidth * outHeight / 4);
// Set up output packet
//av_init_packet(&outPacket); //error C4996: 'av_init_packet': was declared deprecated
AVPacket* outPacket = av_packet_alloc();
memset(outPacket, 0, sizeof(outPacket)); //Use memset instead of av_init_packet (probably unnecessary).
//outPacket->data = nullptr;
//outPacket->size = 0;
yuvFrame->pts = av_rescale_q(timestamps[frame_count], outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp
// Encode frame and write to output file
int ret = avcodec_send_frame(outCodecCtx, yuvFrame);
if (ret < 0) {
cerr << "Error: Could not send frame to output codec" << endl;
return -1;
}
while (ret >= 0) {
ret = avcodec_receive_packet(outCodecCtx, outPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
cerr << "Error: Could not receive packet from output codec" << endl;
return -1;
}
//av_packet_rescale_ts(&outPacket, outCodecCtx->time_base, outStream->time_base);
outPacket->stream_index = outStream->index;
outPacket->duration = av_rescale_q(1, outCodecCtx->time_base, outStream->time_base); // Set packet duration
ret = av_interleaved_write_frame(outFormatCtx, outPacket);
av_packet_unref(outPacket);
if (ret < 0) {
cerr << "Error: Could not write packet to output file" << endl;
return -1;
}
}
av_frame_free(&yuvFrame);
}
// Flush the encoder
ret = avcodec_send_frame(outCodecCtx, nullptr);
if (ret < 0) {
std::cerr << "Error flushing encoder: " << std::endl;
return -1;
}
while (ret >= 0) {
AVPacket* pkt = av_packet_alloc();
if (!pkt) {
std::cerr << "Error allocating packet" << std::endl;
return -1;
}
ret = avcodec_receive_packet(outCodecCtx, pkt);
// Write the packet to the output file
if (ret == 0)
{
pkt->stream_index = outStream->index;
pkt->duration = av_rescale_q(1, outCodecCtx->time_base, outStream->time_base); // <---- Set packet duration
ret = av_interleaved_write_frame(outFormatCtx, pkt);
av_packet_unref(pkt);
if (ret < 0) {
std::cerr << "Error writing packet to output file: " << std::endl;
return -1;
}
}
}
// Write output trailer
av_write_trailer(outFormatCtx);
// Clean up
avcodec_close(outCodecCtx);
avcodec_free_context(&outCodecCtx);
avformat_free_context(outFormatCtx);
return 0;
}
Especially, I have those changes to the original Rotem's answer:
Fist, I have some code to generate a time stamp array:
// generate a serial of time stamps which is used to set the PTS value
// suppose they are in ms unit, the time interval is between 30ms to 59ms
vector<int> timestamps;
for (int i = 0; i < num_frames; i++) {
int timestamp;
if (i == 0)
timestamp = 0;
else
{
int random = 30 + (rand() % 30);
timestamp = timestamps[i-0] + random;
}
timestamps.push_back(timestamp);
}
Second, I just set the PTS by those values:
yuvFrame->pts = av_rescale_q(timestamps[frame_count], outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp
Note that I have set the fps like below:
outCodecCtx->time_base = { 1, fps*1000 }; // 25000
outCodecCtx->framerate = {fps, 1}; // 25
Now, when I run the program, I got a lot of warnings in the console:
[libx264 @ 0000022e7fa621c0] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0000022e7fa621c0] profile High, level 3.0, 4:2:0, 8-bit
[libx264 @ 0000022e7fa621c0] 264 - core 164 r3094M bfc87b7 - H.264/MPEG-4 AVC codec - Copyleft 2003-2022 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=6 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=abr mbtree=1 bitrate=4000 ratetol=1.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] invalid DTS: PTS is less than DTS
[mp4 @ 0000022e090b2300] pts (592) < dts (1129348497) in stream 0
Error: Could not write packet to output file
Any ideas? Thanks.