音频重采样

什么是音频重采样？

由于从网络流、本地媒体文件等各种渠道解码的AVFrame帧，其采样位数、声道数、采样率都是不确定的，但是在很多的播放器框架中，需要播放指定的采样位数、声道数、采样率的音频数据，因此需要首先进行格式转换，这个格式转换的过程就称为音频重采样。

接口定义：

结构定义：

音频重采样上下文，这是一个不公开的结构，所有涉及的操作均以指针的方式进行：

typedef struct SwrContext SwrContext;

函数定义：

分配重采样上下文内存：

struct SwrContext *swr_alloc(void);

设置样本格式、通道布局、采样率：

int av_opt_set_sample_fmt(void *obj, const char *name, enum AVSampleFormat fmt, int search_flags);
int av_opt_set_channel_layout(void *obj, const char *name, int64_t ch_layout, int search_flags);
int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags);

上面这几个函数用于重采样上下文的创建和参数设置，用法如下：

SwrContext *swr = swr_alloc();
av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_FLTP, 0);
av_opt_set_channel_layout(swr, "in_channel_layout",  AV_CH_LAYOUT_5POINT1, 0);
av_opt_set_int(swr, "in_sample_rate",     48000,                0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);
av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO,  0);
av_opt_set_int(swr, "out_sample_rate",    44100,                0);

如果嫌预初使化的过程繁琐，可以使用统一包装函数：

struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
                                      int log_offset, void *log_ctx);

用法：

SwrContext *swr = swr_alloc_set_opts(NULL,  // we're allocating a new context
                      AV_CH_LAYOUT_STEREO,  // out_ch_layout
                      AV_SAMPLE_FMT_S16,    // out_sample_fmt
                      44100,                // out_sample_rate
                      AV_CH_LAYOUT_5POINT1, // in_ch_layout
                      AV_SAMPLE_FMT_FLTP,   // in_sample_fmt
                      48000,                // in_sample_rate
                      0,                    // log_offset
                      NULL);                // log_ctx

正式初使化重采样上下文：

int swr_init(struct SwrContext *s);

计算重采样上下文缓冲样本的延迟时间，base参数用于指定基准时间，通常设置为输入采样率：

int64_t swr_get_delay(struct SwrContext *s, int64_t base);

执行重采样转换，out和out_count参数指定输出缓冲区和建议的样本数，in和in_count指定输入的样本数。
返回转换成功的样本数，由于上下文带有缓冲功能，所有实际输出的样本数可能小于输入的样本数。
可以将in参数设置为NULL，用于将缓冲区的数据全部输出。

int swr_convert(struct SwrContext *s, uint8_t **out, int out_count, const uint8_t **in , int in_count);

重采样过程完成后，释放上下文：

void swr_free(struct SwrContext **s);

这个函数用于重新计算a * b / c 的结果，在音频重采样中用于计算相同时长的目标样本数：

int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd) av_const;

如：

int64 dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);

一些辅助函数：

根据通道布局获取通道数：

int av_get_channel_layout_nb_channels(uint64_t channel_layout);

根据音频参数分配相关内存空间：

int av_samples_alloc_array_and_samples(uint8_t ***audio_data, int *linesize, int nb_channels, 
        int nb_samples, enum AVSampleFormat sample_fmt, int align);

上面是获取一组内存空间，有时只需要单通道内存或者样本以交错排列存储，使用这个函数：

int av_samples_alloc(uint8_t **audio_data, int *linesize, int nb_channels, 
        int nb_samples, enum AVSampleFormat sample_fmt, int align);

根据音频参数获取音频数据大小：

int av_samples_get_buffer_size(int *linesize, int nb_channels, int nb_samples, 
        enum AVSampleFormat sample_fmt, int align);

代码举例：

按常理来说，我应该写一个关于音频重采样的例子，但是我突然改变主意了，计划在下篇中写一个完整地读取本地媒体文件并交给ALSA播放的演示程序，这里就暂时放一个重采样的简单封装吧。

// 错误信息维护
class CAVError
{
    int m_nErrCode;
    std::string m_strError;

public:
    CAVError()
    {
        setError(0, "");
    }
    virtual ~CAVError()
    {
    }

    // 设置错误信息
    void setError(int nCode, const std::string& strError)
    {
        m_nErrCode = nCode;

        if (m_nErrCode != 0)
        {
            char sErr[128] = {0};
            av_strerror(m_nErrCode, sErr, sizeof(sErr));
            m_strError = sErr;
        }
        else
        {
            m_strError = strError;
        }
    }

    // 获取错误信息
    std::string getError(int& nErrCode) const
    {
        char sTmp[32] = {0};
        snprintf(sTmp, sizeof(sTmp), "%d", m_nErrCode);

        nErrCode = m_nErrCode;
        return std::string(sTmp) + " - " + m_strError;
    }
};

// 音频重采样封装器
class CSWRWrapper
        : public CAVError
{
    struct SwrContext* m_pSwrCTX;

public:
    CSWRWrapper()
        : m_pSwrCTX(NULL)
    {
    }
    virtual ~CSWRWrapper()
    {
        free();
    }

    struct SwrContext* handle()
    {
        return m_pSwrCTX;
    }

    // 初使化转换参数
    bool init(int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
              int64_t in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate)
    {
        // 分配重采样上下文，初使化参数
        m_pSwrCTX = swr_alloc_set_opts(NULL, out_ch_layout, out_sample_fmt, out_sample_rate, in_ch_layout, in_sample_fmt, in_sample_rate, 0, NULL);
        if (m_pSwrCTX == NULL)
        {
            setError(0, "swr_alloc_set_opts() failed!");
            return false;
        }

        // 初使化上下文
        int rc = swr_init(m_pSwrCTX);
        if (rc < 0)
        {
            swr_free(&m_pSwrCTX);

            setError(rc, "");
            return false;
        }

        return true;
    }

    // 释放资源
    void free()
    {
        if (m_pSwrCTX)
        {
            swr_free(&m_pSwrCTX);
        }
    }

    // 重采样转换
    int convert(uint8_t **out, int out_count, const uint8_t **in , int in_count)
    {
        if (m_pSwrCTX == NULL)
        {
            setError(0, "not init!");
            return -1;
        }

        int rc = swr_convert(m_pSwrCTX, out, out_count, in, in_count);
        if (rc < 0)
        {
            setError(rc, "");
            return -1;
        }

        return rc;
    }
};

音频重采样