《Android FFmpeg 播放器开发梳理》第四章 音频重采样与变速变调处理

前面一章,我们讲解了音频输出的处理,这一章将会讲解音频重采样以及变速变调处理。
AudioResampler是音频重采样处理的对象。重采样器主要是用来从音频解码器AudioDecoder中解码得到一帧音频帧,然后根据同步的类型,判断是否需要对其进行重采样处理以及变速变调处理的逻辑。其实现代码如下:

/**
 * 音频参数
 */
typedef struct AudioParams {
    int freq;
    int channels;
    int64_t channel_layout;
    enum AVSampleFormat fmt;
    int frame_size;
    int bytes_per_sec;
} AudioParams;

/**
 * 音频重采样状态结构体
 */
typedef struct AudioState {
    double audioClock;                      // 音频时钟
    double audio_diff_cum;
    double audio_diff_avg_coef;
    double audio_diff_threshold;
    int audio_diff_avg_count;
    int audio_hw_buf_size;
    uint8_t *outputBuffer;                  // 输出缓冲大小
    uint8_t *resampleBuffer;                // 重采样大小
    short *soundTouchBuffer;                // SoundTouch缓冲
    unsigned int bufferSize;                // 缓冲大小
    unsigned int resampleSize;              // 重采样大小
    unsigned int soundTouchBufferSize;      // SoundTouch处理后的缓冲大小大小
    int bufferIndex;
    int writeBufferSize;                    // 写入大小
    SwrContext *swr_ctx;                    // 音频转码上下文
    int64_t audio_callback_time;            // 音频回调时间
    AudioParams audioParamsSrc;             // 音频原始参数
    AudioParams audioParamsTarget;          // 音频目标参数
} AudioState;

/**
 * 音频重采样器
 */
class AudioResampler {
public:
    AudioResampler(PlayerState *playerState, AudioDecoder *audioDecoder, MediaSync *mediaSync);

    virtual ~AudioResampler();

    int setResampleParams(AudioDeviceSpec *spec, int64_t wanted_channel_layout);

    void pcmQueueCallback(uint8_t *stream, int len);

private:
    int audioSynchronize(int nbSamples);

    int audioFrameResample();

private:
    PlayerState *playerState;
    MediaSync *mediaSync;

    AVFrame *frame;
    AudioDecoder *audioDecoder;             // 音频解码器
    AudioState *audioState;                 // 音频重采样状态
    SoundTouchWrapper *soundTouchWrapper;   // 变速变调处理
};


AudioResampler::AudioResampler(PlayerState *playerState, AudioDecoder *audioDecoder, MediaSync *mediaSync) {
    this->playerState = playerState;
    this->audioDecoder = audioDecoder;
    this->mediaSync = mediaSync;
    audioState = (AudioState *) av_mallocz(sizeof(AudioState));
    memset(audioState, 0, sizeof(AudioState));
    soundTouchWrapper = new SoundTouchWrapper();
    frame = av_frame_alloc();
}

AudioResampler::~AudioResampler() {
    playerState = NULL;
    audioDecoder = NULL;
    mediaSync = NULL;
    if (soundTouchWrapper) {
        delete soundTouchWrapper;
        soundTouchWrapper = NULL;
    }
    if (audioState) {
        swr_free(&audioState->swr_ctx);
        av_freep(&audioState->resampleBuffer);
        memset(audioState, 0, sizeof(AudioState));
        av_free(audioState);
        audioState = NULL;
    }
    if (frame) {
        av_frame_unref(frame);
        av_frame_free(&frame);
        frame = NULL;
    }
}

int AudioResampler::setResampleParams(AudioDeviceSpec *spec, int64_t wanted_channel_layout) {

    audioState->audioParamsSrc = audioState->audioParamsTarget;
    audioState->audio_hw_buf_size = spec->size;
    audioState->bufferSize = 0;
    audioState->bufferIndex = 0;
    audioState->audio_diff_avg_coef = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
    audioState->audio_diff_avg_count = 0;
    audioState->audio_diff_threshold = (double) (audioState->audio_hw_buf_size) / audioState->audioParamsTarget.bytes_per_sec;

    audioState->audioParamsTarget.fmt = AV_SAMPLE_FMT_S16;
    audioState->audioParamsTarget.freq = spec->freq;
    audioState->audioParamsTarget.channel_layout = wanted_channel_layout;
    audioState->audioParamsTarget.channels = spec->channels;
    audioState->audioParamsTarget.frame_size = av_samples_get_buffer_size(NULL, audioState->audioParamsTarget.channels, 1,
                                                                          audioState->audioParamsTarget.fmt, 1);
    audioState->audioParamsTarget.bytes_per_sec = av_samples_get_buffer_size(NULL, audioState->audioParamsTarget.channels,
                                                                             audioState->audioParamsTarget.freq,
                                                                             audioState->audioParamsTarget.fmt, 1);

    if (audioState->audioParamsTarget.bytes_per_sec <= 0 || audioState->audioParamsTarget.frame_size <= 0) {
        av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
        return -1;
    }
    return 0;
}

void AudioResampler::pcmQueueCallback(uint8_t *stream, int len) {
    int bufferSize, length;

    // 没有音频解码器时,直接返回
    if (!audioDecoder) {
        memset(stream, 0, len);
        return;
    }

    audioState->audio_callback_time = av_gettime_relative();
    while (len > 0) {
        if (audioState->bufferIndex >= audioState->bufferSize) {
            bufferSize = audioFrameResample();
            if (bufferSize < 0) {
                audioState->outputBuffer = NULL;
                audioState->bufferSize = (unsigned int) (AUDIO_MIN_BUFFER_SIZE / audioState->audioParamsTarget.frame_size
                                                         * audioState->audioParamsTarget.frame_size);
            } else {
                audioState->bufferSize = bufferSize;
            }
            audioState->bufferIndex = 0;
        }

        length = audioState->bufferSize - audioState->bufferIndex;
        if (length > len) {
            length = len;
        }
        // 复制经过转码输出的PCM数据到缓冲区中
        if (audioState->outputBuffer != NULL && !playerState->mute) {
            memcpy(stream, audioState->outputBuffer + audioState->bufferIndex, length);
        } else {
            memset(stream, 0, length);
        }
        len -= length;
        stream += length;
        audioState->bufferIndex += length;
    }
    audioState->writeBufferSize = audioState->bufferSize - audioState->bufferIndex;

    if (!isnan(audioState->audioClock) && mediaSync) {
        mediaSync->updateAudioClock(audioState->audioClock -
                                    (double) (2 * audioState->audio_hw_buf_size + audioState->writeBufferSize)
                                    / audioState->audioParamsTarget.bytes_per_sec,
                                    audioState->audio_callback_time / 1000000.0);
    }
}

int AudioResampler::audioSynchronize(int nbSamples) {
    int wanted_nb_samples = nbSamples;

    // 如果时钟不是同步到音频流,则需要进行对音频频进行同步处理
    if (playerState->syncType != AV_SYNC_AUDIO) {
        double diff, avg_diff;
        int min_nb_samples, max_nb_samples;
        diff = mediaSync ? mediaSync->getAudioDiffClock() : 0;
        if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD) {
            audioState->audio_diff_cum = diff + audioState->audio_diff_avg_coef * audioState->audio_diff_cum;
            if (audioState->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
                audioState->audio_diff_avg_count++;
            } else {
                avg_diff = audioState->audio_diff_cum * (1.0 - audioState->audio_diff_avg_coef);

                if (fabs(avg_diff) >= audioState->audio_diff_threshold) {
                    wanted_nb_samples = nbSamples + (int)(diff * audioState->audioParamsSrc.freq);
                    min_nb_samples = ((nbSamples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                    max_nb_samples = ((nbSamples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                    wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);
                }
            }
        } else {
            audioState->audio_diff_avg_count = 0;
            audioState->audio_diff_cum = 0;
        }
    }

    return wanted_nb_samples;
}

int AudioResampler::audioFrameResample() {
    int data_size, resampled_data_size;
    int64_t dec_channel_layout;
    int wanted_nb_samples;
    int translate_time = 1;
    int ret = -1;

    // 处于暂停状态
    if (!audioDecoder || playerState->abortRequest || playerState->pauseRequest) {
        return -1;
    }

    for (;;) {

        // 如果数据包解码失败,直接返回
        if ((ret = audioDecoder->getAudioFrame(frame)) < 0) {
            return -1;
        }
        if (ret == 0) {
            continue;
        }

        data_size = av_samples_get_buffer_size(NULL, av_frame_get_channels(frame),
                                               frame->nb_samples,
                                               (AVSampleFormat)frame->format, 1);

        dec_channel_layout =
                (frame->channel_layout && av_frame_get_channels(frame) == av_get_channel_layout_nb_channels(frame->channel_layout))
                ? frame->channel_layout : av_get_default_channel_layout(av_frame_get_channels(frame));
        wanted_nb_samples = audioSynchronize(frame->nb_samples);

        // 帧格式跟源格式不对????
        if (frame->format != audioState->audioParamsSrc.fmt
            || dec_channel_layout != audioState->audioParamsSrc.channel_layout
            || frame->sample_rate != audioState->audioParamsSrc.freq
            || (wanted_nb_samples != frame->nb_samples && !audioState->swr_ctx)) {

            swr_free(&audioState->swr_ctx);
            audioState->swr_ctx = swr_alloc_set_opts(NULL, audioState->audioParamsTarget.channel_layout,
                                                     audioState->audioParamsTarget.fmt, audioState->audioParamsTarget.freq,
                                                     dec_channel_layout, (AVSampleFormat)frame->format,
                                                     frame->sample_rate, 0, NULL);

            if (!audioState->swr_ctx || swr_init(audioState->swr_ctx) < 0) {
                av_log(NULL, AV_LOG_ERROR, "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                       frame->sample_rate,
                       av_get_sample_fmt_name((AVSampleFormat)frame->format),
                       av_frame_get_channels(frame),
                       audioState->audioParamsTarget.freq,
                       av_get_sample_fmt_name(audioState->audioParamsTarget.fmt),
                       audioState->audioParamsTarget.channels);
                swr_free(&audioState->swr_ctx);
                return -1;
            }
            audioState->audioParamsSrc.channel_layout = dec_channel_layout;
            audioState->audioParamsSrc.channels = av_frame_get_channels(frame);
            audioState->audioParamsSrc.freq = frame->sample_rate;
            audioState->audioParamsSrc.fmt = (AVSampleFormat)frame->format;
        }

        // 音频重采样处理
        if (audioState->swr_ctx) {
            const uint8_t **in = (const uint8_t **)frame->extended_data;
            uint8_t **out = &audioState->resampleBuffer;
            int out_count = (int64_t)wanted_nb_samples * audioState->audioParamsTarget.freq / frame->sample_rate + 256;
            int out_size  = av_samples_get_buffer_size(NULL, audioState->audioParamsTarget.channels, out_count, audioState->audioParamsTarget.fmt, 0);
            int len2;
            if (out_size < 0) {
                av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
                return -1;
            }
            if (wanted_nb_samples != frame->nb_samples) {
                if (swr_set_compensation(audioState->swr_ctx, (wanted_nb_samples - frame->nb_samples) * audioState->audioParamsTarget.freq / frame->sample_rate,
                                         wanted_nb_samples * audioState->audioParamsTarget.freq / frame->sample_rate) < 0) {
                    av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                    return -1;
                }
            }
            av_fast_malloc(&audioState->resampleBuffer, &audioState->resampleSize, out_size);
            if (!audioState->resampleBuffer) {
                return AVERROR(ENOMEM);
            }
            len2 = swr_convert(audioState->swr_ctx, out, out_count, in, frame->nb_samples);
            if (len2 < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
                return -1;
            }
            if (len2 == out_count) {
                av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
                if (swr_init(audioState->swr_ctx) < 0) {
                    swr_free(&audioState->swr_ctx);
                }
            }
            audioState->outputBuffer = audioState->resampleBuffer;
            resampled_data_size = len2 * audioState->audioParamsTarget.channels * av_get_bytes_per_sample(audioState->audioParamsTarget.fmt);

            // 变速变调处理
            if ((playerState->playbackRate != 1.0f || playerState->playbackPitch != 1.0f) && !playerState->abortRequest) {
                int bytes_per_sample = av_get_bytes_per_sample(audioState->audioParamsTarget.fmt);
                av_fast_malloc(&audioState->soundTouchBuffer, &audioState->soundTouchBufferSize, out_size * translate_time);
                for (int i = 0; i < (resampled_data_size / 2); i++) {
                    audioState->soundTouchBuffer[i] = (audioState->resampleBuffer[i * 2] | (audioState->resampleBuffer[i * 2 + 1] << 8));
                }
                if (!soundTouchWrapper) {
                    soundTouchWrapper = new SoundTouchWrapper();
                }
                int ret_len = soundTouchWrapper->translate(audioState->soundTouchBuffer, (float)(playerState->playbackRate),
                                                           (float)(playerState->playbackPitch != 1.0f ? playerState->playbackPitch : 1.0f / playerState->playbackRate),
                                                           resampled_data_size / 2, bytes_per_sample,
                                                           audioState->audioParamsTarget.channels, frame->sample_rate);
                if (ret_len > 0) {
                    audioState->outputBuffer = (uint8_t*)audioState->soundTouchBuffer;
                    resampled_data_size = ret_len;
                } else {
                    translate_time++;
                    av_frame_unref(frame);
                    continue;
                }
            }
        } else {
            audioState->outputBuffer = frame->data[0];
            resampled_data_size = data_size;
        }

        // 处理完直接退出循环
        break;
    }

    // 利用pts更新音频时钟
    if (frame->pts != AV_NOPTS_VALUE) {
        audioState->audioClock = frame->pts * av_q2d((AVRational){1, frame->sample_rate})
                                 + (double) frame->nb_samples / frame->sample_rate;
    } else {
        audioState->audioClock = NAN;
    }

    // 使用完成释放引用,防止内存泄漏
    av_frame_unref(frame);

    return resampled_data_size;
}

以上就是处理音频重采样以及变速变调处理的代码。这个代码也没啥好说的,在不是同步到音频时钟的情况下,我们需要根据实际的采样率(sample_rate) 得到目标采样率对应的采样数量(wanted_nb_samples),然后经过音频重采样处理,得到重采样后的缓冲数据,然后做变速变调处理,接着计算出重采样以及变速变调处理后的时长,加上原来的时钟,得到处理后的音频时间戳(pts)。我们通过不断地把音频输出设备回调地PCM缓冲区填满,填满后,我们需要计算出当前的音频时间戳用了多少,通知MediaSync更新音频时钟以及同步更新外部时钟。音频重采样以及变速变调处理的流程大体就这样了。

当音频输出设备回调填充PCM数据方法时,我们的播放器将会通过void pcmQueueCallback(uint8_t *stream, int len); 方法调用音频重采样器进行处理,代码如下:

void MediaPlayer::pcmQueueCallback(uint8_t *stream, int len) {
    if (!audioResampler) {
        memset(stream, 0, sizeof(len));
        return;
    }
    audioResampler->pcmQueueCallback(stream, len);
}

至此,音频重采样以及变速变调处理就讲解完了。
完整代码请参考本人的播放器项目:CainPlayer

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 214,588评论 6 496
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 91,456评论 3 389
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 160,146评论 0 350
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,387评论 1 288
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,481评论 6 386
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,510评论 1 293
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,522评论 3 414
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,296评论 0 270
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,745评论 1 307
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,039评论 2 330
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,202评论 1 343
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,901评论 5 338
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,538评论 3 322
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,165评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,415评论 1 268
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,081评论 2 365
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,085评论 2 352

推荐阅读更多精彩内容