ffplay.c 源码分析- 时间同步

之前我们对单独的音频和视频的播放进行了分析。
但是实际上播放一段影片，还需要音视频同步播放。

主要思路是

在解码获得数据时，对frame的pts进行计算。
在视频送显的时候，或者是音频赋值的时候，进行时间的纠正。
如果以音频时间为主的话，就需要修正视频的送显时间。
如果是视频的时间为主的，同样需要修正音频的播放时间。（通过减少音频的播放帧数。）

1. 计算PTS

case AVMEDIA_TYPE_VIDEO:
     //对视频进行解码。
     ret = avcodec_decode_video2(d->avctx, frame, &got_frame, &d->pkt_temp);
    if (got_frame) {
        //默认情况下 为-1
        if (decoder_reorder_pts == -1) {
            //视频的时间戳pts 可以通过av_frame_get_best_effort_timestamp来计算
            frame->pts = av_frame_get_best_effort_timestamp(frame);
         } else if (!decoder_reorder_pts) {
            frame->pts = frame->pkt_dts;
           }
       }
      break;
case AVMEDIA_TYPE_AUDIO:
      //对音频进行解码
       ret = avcodec_decode_audio4(d->avctx, frame, &got_frame, &d->pkt_temp);
        if (got_frame) {
            //通过sample_rate来计算time_base
             AVRational tb = (AVRational){1, frame->sample_rate};
             if (frame->pts != AV_NOPTS_VALUE)
                  //pts的通用公式 pts*av_q2d(time_base)
                  //av_rescale_q(a,b,c)是用来把时间戳从一个时基调整到另外一个时基时候用的函数。
                  //它基本的动作是计算a*b/c 。将编码器中的时基和当前的时基做转换。因为我们上面可能转码吗？
                 frame->pts = av_rescale_q(frame->pts, av_codec_get_pkt_timebase(d->avctx), tb);
            else if (d->next_pts != AV_NOPTS_VALUE)
                  frame->pts = av_rescale_q(d->next_pts, d->next_pts_tb, tb);

            //记录下 next_pts
             if (frame->pts != AV_NOPTS_VALUE) {
                  d->next_pts = frame->pts + frame->nb_samples;
                   d->next_pts_tb = tb;
              }
       }

这样就计算好了，视频帧和音频帧的pts。

2. 同步

以音频时间钟

这个时候就需要同步的就是视频。
同样在显示的时候，进行处理
video_refresh方法

static void video_refresh(void *opaque, double *remaining_time)
{

          //....省略
          double last_duration, duration, delay;
          Frame *vp, *lastvp;

             //计算duration
            /* dequeue the picture */
            lastvp = frame_queue_peek_last(&is->pictq);
            vp = frame_queue_peek(&is->pictq);
            
            //计算两帧的时间
            last_duration = vp_duration(is, lastvp, vp);
            //通过这方法来计算延迟
            delay = compute_target_delay(last_duration, is);
            //获取当前的时间
            time= av_gettime_relative()/1000000.0;
            //还未到显示的时间，这个时候先保持当前帧的显示，并且计算下次循环的睡眠时间
            if (time < is->frame_timer + delay) {
                *remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);
                goto display;
            }
            //累加帧的时间。frame_timer就是下一个帧显示的时间
            is->frame_timer += delay;
            //当前显示的帧的时间太长了。就需要丢掉原来的。用当前的时间
            if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
                is->frame_timer = time;

            SDL_LockMutex(is->pictq.mutex);
            if (!isnan(vp->pts))
                //更新时间钟的时间
                update_video_pts(is, vp->pts, vp->pos, vp->serial);
            SDL_UnlockMutex(is->pictq.mutex);

进入compute_target_delay方法，看看如何计算延迟

static double compute_target_delay(double delay, VideoState *is)
{
    double sync_threshold, diff = 0;

    /* update delay to follow master synchronisation source */
    if (get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER) {
        //如果视频是从属的时间钟，如果延迟比较大的话，那么需要通过重复显示或者是删除帧来修正延迟。
        //计算两个时间的pts差值。
        diff = get_clock(&is->vidclk) - get_master_clock(is);

        /* skip or repeat frame. We take into account the
           delay to compute the threshold. I still don't know
           if it is the best guess */
        // 外面传入的两帧之间的时间，和自定义的区间，来取阀值
        sync_threshold = FFMAX(AV_SYNC_THRESHOLD_MIN, FFMIN(AV_SYNC_THRESHOLD_MAX, delay));
        if (!isnan(diff) && fabs(diff) < is->max_frame_duration) {
            //如果当前的视频太慢了。就让他的delay 比duration小，但是不能小于0
            if (diff <= -sync_threshold)
                delay = FFMAX(0, delay + diff);
            //如果当前的视频太快了，而且大于帧持续的时间，则使用diff进行同步，让他休眠更差的时间
            else if (diff >= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD)
                delay = delay + diff;
            //如果实在太快了。就让它休眠两个duration
            else if (diff >= sync_threshold)
                delay = 2 * delay;
        }
    }

    av_log(NULL, AV_LOG_TRACE, "video: delay=%0.3f A-V=%f\n",
            delay, -diff);

    return delay;
}

最后同步到时间钟上。
虽然我们设定了睡眠的时间，但是同步时，我们还是用正常的PTS。

static void update_video_pts(VideoState *is, double pts, int64_t pos, int serial) {
    /* update current video pts */
    set_clock(&is->vidclk, pts, serial);
    sync_clock_to_slave(&is->extclk, &is->vidclk);
}

虽然改变了睡眠的时间。照样还是使用pts来同步。这点和音频的不一样，音频同步的是pts+ duration 。

static void set_clock(Clock *c, double pts, int serial)
{
    double time = av_gettime_relative() / 1000000.0;
    set_clock_at(c, pts, serial, time);
}

最后几个变量

/* no AV sync correction is done if below the minimum AV sync threshold */
// 最低同步阈值，如果低于该值，则不需要同步校正
#define AV_SYNC_THRESHOLD_MIN 0.04
/* AV sync correction is done if above the maximum AV sync threshold */
// 最大同步阈值，如果大于该值，则需要同步校正
#define AV_SYNC_THRESHOLD_MAX 0.1
/* If a frame duration is longer than this, it will not be duplicated to compensate AV sync */
// 帧补偿同步阈值，如果帧持续时间比这更长，则不用来补偿同步
#define AV_SYNC_FRAMEDUP_THRESHOLD 0.1
/* no AV correction is done if too big error */
// 同步阈值。如果误差太大，则不进行校正
#define AV_NOSYNC_THRESHOLD 10.0
    double max_frame_duration;          // 最大帧显示时间 // maximum duration of a frame - above this, we consider the jump a timestamp discontinuity

image.png

如果是用视频为主时间的话

音频的时间赋值
不是主时间钟的话

static int audio_decode_frame(VideoState *is)
{
    
    //...省略解码的代码。
    //音频的同步，是通过控制frame的数量nb_samples，来进行同步的。
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);

    //...省略代码
    audio_clock0 = is->audio_clock;
    //因为上面结果调整，这里重新根据nb_samples计算一次
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
#ifdef DEBUG
    {
        static double last_clock;
        printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
               is->audio_clock - last_clock,
               is->audio_clock, audio_clock0);
        last_clock = is->audio_clock;
    }
#endif
    return resampled_data_size;
}

主要来看一下synchronize_audio方法

//如果不是音频为主的时间钟，返回samples来进行更好的同步
static int synchronize_audio(VideoState *is, int nb_samples)
{
    int wanted_nb_samples = nb_samples;

    /* if not master, then we try to remove or add samples to correct the clock */
    if (get_master_sync_type(is) != AV_SYNC_AUDIO_MASTER) {
        double diff, avg_diff;
        int min_nb_samples, max_nb_samples;
        
        //同样，先计算两个时间钟之间的diff 
        diff = get_clock(&is->audclk) - get_master_clock(is);
        //两者的差距，在阀值的范围内，表示还能调整。AV_NOSYNC_THRESHOLD =10.0
        if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD) {
            //这里这个audio_diff_avg_coef 的算法不理解,使用差值来实现平均值, AUDIO_DIFF_AVG_NB=20
            is->audio_diff_cum = diff + is->audio_diff_avg_coef * is->audio_diff_cum;
            if (is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
                /* not enough measures to have a correct estimate */
                //累计的延迟还不够，继续累加。会累计20次的差值，来计算上面的平均数
                is->audio_diff_avg_count++;
            } else {
                //进行修正。
                //先计算通过累计的diff_cum平均进行估计
                /* estimate the A-V difference */
                avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);
                //延迟的平均数，确实是大于diff
                if (fabs(avg_diff) >= is->audio_diff_threshold) {
                    //diff*samplerate 可以计算补偿的样本数
                    wanted_nb_samples = nb_samples + (int)(diff * is->audio_src.freq);
                    //最大和最小的当前的修正参数。SAMPLE_CORRECTION_PERCENT_MAX=10.
                    //min 90%  max 110%
                    min_nb_samples = ((nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                    max_nb_samples = ((nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                    //为了避免音调过高的问题，只能在这个区间补偿
                    wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);
                }
                av_log(NULL, AV_LOG_TRACE, "diff=%f adiff=%f sample_diff=%d apts=%0.3f %f\n",
                        diff, avg_diff, wanted_nb_samples - nb_samples,
                        is->audio_clock, is->audio_diff_threshold);
            }
        } else {
            /* too big difference : may be initial PTS errors, so
               reset A-V filter */
            //差距太大了。说明这里的平均数可能计算错误了。重新来统计。
            is->audio_diff_avg_count = 0;
            is->audio_diff_cum       = 0;
        }
    }

    return wanted_nb_samples;
}

时间差值乘以采样率可以得到用于补偿的样本数，加之原样本数，即应输出样本数。另外考虑到上一节提到的音频音调变化问题，这里限制了调节范围在正负10%以内。

所以如果音视频不同步的差值较大，并不会立即完全同步，最多只调节当前帧样本数的10%，剩余会在下次调节时继续校正。

最后，是与视频同步音频时类似地，有一个准同步的区间，在这个区间内不去做同步校正，其大小是audio_diff_threshold：

is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;

即音频输出设备内缓冲的音频时长。

时间同步

一是时间钟的同步
先来看一下Clock，这个结构体的定义

// 时钟
typedef struct Clock {
    double pts;                 // 时钟基准 /* clock base */
    double pts_drift;           // 更新时钟的差值 /* clock base minus time at which we updated the clock */
    double last_updated;        // 上一次更新的时间
    double speed;               // 速度
    int serial;                     // 时钟基于使用该序列的包 /* clock is based on a packet with this serial */
    int paused;                 // 停止标志
    int *queue_serial;          // 指向当前数据包队列序列的指针，用于过时的时钟检测 /* pointer to the current packet queue serial, used for obsolete clock detection */
} Clock;

// 时钟同步类型
enum {
    AV_SYNC_AUDIO_MASTER,       // 音频作为同步，默认以音频同步 /* default choice */
    AV_SYNC_VIDEO_MASTER,       // 视频作为同步
    AV_SYNC_EXTERNAL_CLOCK, // 外部时钟作为同步 /* synchronize to an external clock */
};

/**
 * 更新视频的pts
 * @param is     [description]
 * @param pts    [description]
 * @param pos    [description]
 * @param serial [description]
 */
static void update_video_pts(VideoState *is, double pts, int64_t pos, int serial) {
    /* update current video pts */
    set_clock(&is->vidclk, pts, serial);
    sync_clock_to_slave(&is->extclk, &is->vidclk);
}

/**
 * 设置时钟
 * @param c      [description]
 * @param pts    [description]
 * @param serial [description]
 */
static void set_clock(Clock *c, double pts, int serial)
{
    double time = av_gettime_relative() / 1000000.0;
    set_clock_at(c, pts, serial, time);
}


/**
 * 同步从属时钟
 * @param c     [description]
 * @param slave [description]
 */
static void sync_clock_to_slave(Clock *c, Clock *slave)
{
    double clock = get_clock(c);
    double slave_clock = get_clock(slave);
    if (!isnan(slave_clock) && (isnan(clock) || fabs(clock - slave_clock) > AV_NOSYNC_THRESHOLD))
        set_clock(c, slave_clock, slave->serial);
}

/**
 * 获取时钟
 * @param  c [description]
 * @return   [description]
 */
static double get_clock(Clock *c)
{
    if (*c->queue_serial != c->serial)
        return NAN;
    if (c->paused) {
        return c->pts;
    } else {
        double time = av_gettime_relative() / 1000000.0;
        //pts_drift 是更新的时间钟的差值？
        //最后的时间是 更新的差值+ 当前的时间-当前的时间和上一次更新的时间之间的差值*速度
        //默认的情况下，根据上一次的drift计算下一次要出现的时间。
        return c->pts_drift + time - (time - c->last_updated) * (1.0 - c->speed);
    }
}

/**
 * 更新视频的pts
 * @param is     [description]
 * @param pts    [description]
 * @param pos    [description]
 * @param serial [description]
 */
static void update_video_pts(VideoState *is, double pts, int64_t pos, int serial) {
    /* update current video pts */
    set_clock(&is->vidclk, pts, serial);
   //将尾部的时间钟，用视频的时机钟来进行同步
    sync_clock_to_slave(&is->extclk, &is->vidclk);
}

static void set_clock(Clock *c, double pts, int serial)
{
    double time = av_gettime_relative() / 1000000.0;
    set_clock_at(c, pts, serial, time);
}

//使用当前的事来计算这几个值。也就是这一帧送显之前的操作的时间。
static void set_clock_at(Clock *c, double pts, int serial, double time)
{
    c->pts = pts;
    c->last_updated = time;
    c->pts_drift = c->pts - time;
    c->serial = serial;
}

pts_drift是表示预测的pts和当前的时间的间隔。通过这个时间来预算下一帧的时间。

最后的同步

static void sync_clock_to_slave(Clock *c, Clock *slave)
{
    double clock = get_clock(c);
    double slave_clock = get_clock(slave);
    if (!isnan(slave_clock) && (isnan(clock) || fabs(clock - slave_clock) > AV_NOSYNC_THRESHOLD))
        set_clock(c, slave_clock, slave->serial);
}

pts_drift 存在的意义，是为了去掉编码的时间吗？

image.png

外部时间钟

如果是以外部时间作为同步的话，上面两个都需要进行调整。

参考

ffplay音视频同步分析——视频同步音频