学习 FFmpeg 循序渐进的资料很少,前一段还在交代背景知识,下一段就推你去看源代码去了。
An ffmpeg and SDL Tutorial 算是我找到的比较好入门教程了,当然我并不关心 SDL 是啥,我只想知道 FFmpeg 是怎么使用的。
尝试编译了以下 tutorial 1 的代码,编译器提示基本每一个用到的函数和字段都被标记为 depressed
。当然编译是可以通过的,也能够如期的运行起来。不过,既然 FFmpeg 已经发布到了 3.x.x 版本,也不妨升级使用最新的接口。
与 tutorial 1 代码逻辑一致,读取视频 5 帧保存成 PPM 格式图片。
保存成 PPM 主要是因为这种格式简单,查看确是大大不方便的,可以使用 ImageMagick 命令转换成方便查看的格式:
convert in.ppm out.jpg
与 tutorial 1 不同的是,增加了 seek 函数,跳到视频的某大间点的关键帧附近。编译需要 C++11 支持,还依赖了 boost::format,如果想去掉 boost::format 的依赖还是很容易的。
#include <iostream>
#include <fstream>
#include <functional>
#include <boost/format.hpp>
#ifdef __cplusplus
extern "C" {
#endif
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
#ifdef __cplusplus
}
#endif
class ScopeExit final {
public:
explicit ScopeExit(std::function<void()> func)
: _func(func) {
}
void clear() {
_func = nullptr;
}
~ScopeExit() {
if (_func) {
_func();
}
}
private:
std::function<void()> _func = nullptr;
};
void SaveFrame(AVFrame *pFrame, int width, int height, int iFrame) {
std::string filename = (boost::format("frame%d.ppm") % iFrame).str();
std::ofstream ofs(filename, std::ofstream::out);
if (!ofs) {
return;
}
ofs << boost::format("P6\n%d %d\n255\n") % width % height;
// Write pixel data
for(int y = 0; y < height; y++) {
ofs.write((const char*)(pFrame->data[0] + y * pFrame->linesize[0]), width * 3);
}
}
int main(int argc, char *argv[]) {
if (argc < 2) {
std::cerr << "Please provide a movie file" << std::endl;
return -1;
}
// Register all formats and codecs
av_register_all();
// Open video file
AVFormatContext* pFormatCtx = NULL;
if (avformat_open_input(&pFormatCtx, argv[1], NULL, NULL) != 0) {
return -1; // Couldn't open file
}
ScopeExit format_ctx_closer([&pFormatCtx] {
avformat_close_input(&pFormatCtx);
});
// Retrieve stream information
if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
return -1; // Couldn't find stream information
}
// Dump information about file onto standard error
av_dump_format(pFormatCtx, 0, argv[1], 0);
// Find the first video stream
int videoStream = -1;
for (size_t i = 0; i < pFormatCtx->nb_streams; i++) {
if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
videoStream = i;
break;
}
}
if (videoStream == -1) {
return -1; // Didn't find a video stream
}
auto time_base = pFormatCtx->streams[videoStream]->time_base;
std::cout << boost::format("time_base num:%1% den::%2%") % time_base.num % time_base.den
<< std::endl;
// Get a pointer to the codec context for the video stream
const auto* codecpar = pFormatCtx->streams[videoStream]->codecpar;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(codecpar->codec_id);
if (pCodec == NULL) {
std::cerr << "Unsupported codec!" << std::endl;
return -1; // Codec not found
}
// Copy context
AVCodecContext* pCodecCtx = avcodec_alloc_context3(pCodec);
ScopeExit avcodec_closer([&pCodecCtx] {
if (pCodecCtx) {avcodec_close(pCodecCtx); }
});
if (0 != avcodec_parameters_to_context(pCodecCtx, codecpar)) {
std::cerr << "avcodec_parameters_to_context error" << std::endl;
return -1;
}
// Open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
return -1; // Could not open codec
}
// Allocate video frame
AVFrame* pFrame = av_frame_alloc();
if (pFrame == NULL) {
return -1;
}
ScopeExit frame_deleter([&pFrame] {
if (pFrame) {av_frame_free(&pFrame); }
});
// Allocate an AVFrame structure
AVFrame* pFrameRGB = av_frame_alloc();
if (pFrameRGB == NULL) {
return -1;
}
ScopeExit frame_rgb_deleter([&pFrameRGB] {
if (pFrameRGB) {av_frame_free(&pFrameRGB); }
});
pFrameRGB->width = pCodecCtx->width;
pFrameRGB->height = pCodecCtx->height;
pFrameRGB->format = AV_PIX_FMT_RGB24;
auto numBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24,
pFrameRGB->width,
pFrameRGB->height,
1);
uint8_t* buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
ScopeExit buffer_deleter([&buffer] {
if (buffer) {av_freep(&buffer); }
});
av_image_fill_arrays(pFrameRGB->data,
pFrameRGB->linesize,
buffer,
AV_PIX_FMT_RGB24,
pFrameRGB->width,
pFrameRGB->height,
1);
// initialize SWS context for software scaling
struct SwsContext* sws_ctx = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
pCodecCtx->width,
pCodecCtx->height,
AV_PIX_FMT_RGB24,
SWS_BILINEAR,
NULL,
NULL,
NULL);
// seek to 21 second, actually before 21 second
av_seek_frame(pFormatCtx, videoStream , 21 * time_base.den, AVSEEK_FLAG_BACKWARD | AVSEEK_FLAG_ANY);
// Read frames and save first five frames to disk
int i = 0;
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0 && i <= 5) {
ScopeExit unrefer([&packet] {
av_packet_unref(&packet);
});
if (packet.stream_index != videoStream) {
continue;
}
int ret_packet = avcodec_send_packet(pCodecCtx, &packet);
if (ret_packet < 0) {
break;
}
int ret_frame = avcodec_receive_frame(pCodecCtx, pFrame);
if (ret_frame >= 0) {
// just save key frame
// if (!pFrame->key_frame) {
// continue;
// }
auto pts = av_frame_get_best_effort_timestamp(pFrame);
if (pts == AV_NOPTS_VALUE) {
pts = 0;
}
std::cout << boost::format("pts:%1% time:%2%") % pts % (pts * av_q2d(time_base))
<< std::endl;
// Convert the image from its native format to RGB
sws_scale(sws_ctx,
(uint8_t const * const *)pFrame->data,
pFrame->linesize,
0,
pCodecCtx->height,
pFrameRGB->data,
pFrameRGB->linesize);
// Save the frame to disk
if (++i <= 5) {
SaveFrame(pFrameRGB, pCodecCtx->width, pCodecCtx->height, i);
}
} else if (ret_frame != AVERROR(EAGAIN) && ret_frame != AVERROR_EOF) {
break;
}
}
return 0;
}