学习 FFmpeg 之视频读取

学习 FFmpeg 循序渐进的资料很少，前一段还在交代背景知识，下一段就推你去看源代码去了。
An ffmpeg and SDL Tutorial 算是我找到的比较好入门教程了，当然我并不关心 SDL 是啥，我只想知道 FFmpeg 是怎么使用的。

尝试编译了以下 tutorial 1 的代码，编译器提示基本每一个用到的函数和字段都被标记为 depressed。当然编译是可以通过的，也能够如期的运行起来。不过，既然 FFmpeg 已经发布到了 3.x.x 版本，也不妨升级使用最新的接口。

与 tutorial 1 代码逻辑一致，读取视频 5 帧保存成 PPM 格式图片。
保存成 PPM 主要是因为这种格式简单，查看确是大大不方便的，可以使用 ImageMagick 命令转换成方便查看的格式：

convert in.ppm out.jpg

与 tutorial 1 不同的是，增加了 seek 函数，跳到视频的某大间点的关键帧附近。编译需要 C++11 支持，还依赖了 boost::format，如果想去掉 boost::format 的依赖还是很容易的。

#include <iostream>
#include <fstream>
#include <functional>

#include <boost/format.hpp>

#ifdef __cplusplus
extern "C" {
#endif
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
#ifdef __cplusplus
}
#endif

class ScopeExit final {
public:
    explicit ScopeExit(std::function<void()> func)
        : _func(func) {
    }

    void clear() {
        _func = nullptr;
    }

    ~ScopeExit() {
        if (_func) {
            _func();
        }
    }

private:
    std::function<void()> _func = nullptr;
};

void SaveFrame(AVFrame *pFrame, int width, int height, int iFrame) {
    std::string filename = (boost::format("frame%d.ppm") % iFrame).str();
    std::ofstream ofs(filename, std::ofstream::out);
    if (!ofs) {
        return;
    }

    ofs << boost::format("P6\n%d %d\n255\n") % width % height;

    // Write pixel data
    for(int y = 0; y < height; y++) {
        ofs.write((const char*)(pFrame->data[0] + y * pFrame->linesize[0]), width * 3);
    }
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        std::cerr << "Please provide a movie file" << std::endl;
        return -1;
    }
    // Register all formats and codecs
    av_register_all();

    // Open video file
    AVFormatContext* pFormatCtx = NULL;
    if (avformat_open_input(&pFormatCtx, argv[1], NULL, NULL) != 0) {
        return -1; // Couldn't open file
    }
    ScopeExit format_ctx_closer([&pFormatCtx] {
        avformat_close_input(&pFormatCtx);
    });

    // Retrieve stream information
    if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
        return -1; // Couldn't find stream information
    }

    // Dump information about file onto standard error
    av_dump_format(pFormatCtx, 0, argv[1], 0);

    // Find the first video stream
    int videoStream = -1;
    for (size_t i = 0; i < pFormatCtx->nb_streams; i++) {
        if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            videoStream = i;
            break;
        }
    }
    if (videoStream == -1) {
        return -1; // Didn't find a video stream
    }
    auto time_base = pFormatCtx->streams[videoStream]->time_base;
    std::cout << boost::format("time_base num:%1% den::%2%") % time_base.num % time_base.den
              << std::endl;

    // Get a pointer to the codec context for the video stream
    const auto* codecpar = pFormatCtx->streams[videoStream]->codecpar;

    // Find the decoder for the video stream
    AVCodec* pCodec = avcodec_find_decoder(codecpar->codec_id);
    if (pCodec == NULL) {
        std::cerr << "Unsupported codec!" << std::endl;
        return -1; // Codec not found
    }

    // Copy context
    AVCodecContext* pCodecCtx = avcodec_alloc_context3(pCodec);
    ScopeExit avcodec_closer([&pCodecCtx] {
        if (pCodecCtx) {avcodec_close(pCodecCtx); }
    });
    if (0 != avcodec_parameters_to_context(pCodecCtx, codecpar)) {
        std::cerr << "avcodec_parameters_to_context error" << std::endl;
        return -1;
    }

    // Open codec
    if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
        return -1; // Could not open codec
    }

    // Allocate video frame
    AVFrame* pFrame = av_frame_alloc();
    if (pFrame == NULL) {
        return -1;
    }
    ScopeExit frame_deleter([&pFrame] {
        if (pFrame) {av_frame_free(&pFrame); }
    });

    // Allocate an AVFrame structure
    AVFrame* pFrameRGB = av_frame_alloc();
    if (pFrameRGB == NULL) {
        return -1;
    }
    ScopeExit frame_rgb_deleter([&pFrameRGB] {
        if (pFrameRGB) {av_frame_free(&pFrameRGB); }
    });
    pFrameRGB->width = pCodecCtx->width;
    pFrameRGB->height = pCodecCtx->height;
    pFrameRGB->format = AV_PIX_FMT_RGB24;

    auto numBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24,
                                             pFrameRGB->width,
                                             pFrameRGB->height,
                                             1);
    uint8_t* buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
    ScopeExit buffer_deleter([&buffer] {
        if (buffer) {av_freep(&buffer); }
    });
    av_image_fill_arrays(pFrameRGB->data,
                         pFrameRGB->linesize,
                         buffer,
                         AV_PIX_FMT_RGB24,
                         pFrameRGB->width,
                         pFrameRGB->height,
                         1);

    // initialize SWS context for software scaling
    struct SwsContext* sws_ctx = sws_getContext(pCodecCtx->width,
                                                pCodecCtx->height,
                                                pCodecCtx->pix_fmt,
                                                pCodecCtx->width,
                                                pCodecCtx->height,
                                                AV_PIX_FMT_RGB24,
                                                SWS_BILINEAR,
                                                NULL,
                                                NULL,
                                                NULL);

    // seek to 21 second, actually before 21 second
    av_seek_frame(pFormatCtx, videoStream , 21 * time_base.den, AVSEEK_FLAG_BACKWARD | AVSEEK_FLAG_ANY);

    // Read frames and save first five frames to disk
    int i = 0;
    AVPacket packet;
    while (av_read_frame(pFormatCtx, &packet) >= 0 && i <= 5) {
        ScopeExit unrefer([&packet] {
            av_packet_unref(&packet);
        });
        if (packet.stream_index != videoStream) {
            continue;
        }
        int ret_packet = avcodec_send_packet(pCodecCtx, &packet);
        if (ret_packet < 0) {
            break;
        }
        int ret_frame = avcodec_receive_frame(pCodecCtx, pFrame);
        if (ret_frame >= 0) {
            // just save key frame
            // if (!pFrame->key_frame) {
            //     continue;
            // }
            auto pts = av_frame_get_best_effort_timestamp(pFrame);
            if (pts == AV_NOPTS_VALUE) {
                pts = 0;
            }
            std::cout << boost::format("pts:%1% time:%2%") % pts % (pts * av_q2d(time_base))
                      << std::endl;
            // Convert the image from its native format to RGB
            sws_scale(sws_ctx,
                      (uint8_t const * const *)pFrame->data,
                      pFrame->linesize,
                      0,
                      pCodecCtx->height,
                      pFrameRGB->data,
                      pFrameRGB->linesize);

            // Save the frame to disk
            if (++i <= 5) {
                SaveFrame(pFrameRGB, pCodecCtx->width, pCodecCtx->height, i);
            }
        } else if (ret_frame != AVERROR(EAGAIN) && ret_frame != AVERROR_EOF) {
            break;
        }
    }

    return 0;
}

学习 FFmpeg 之视频读取

学习 FFmpeg 之视频读取

相关阅读更多精彩内容

友情链接更多精彩内容