音视频的格式是一个有歧义的说法。我们熟知的诸如Flv、Mp4、Mov啥的都是包装格式,可以理解为一种容器,就像一个盒子。里面放到是经过编码的音视频数据,而这些音视频数据都有自己的编码格式,如AAC、H264、H265等等。
今天要展示的是从直播流中获取到的音频编码数据进行解码并使用H5的音频API进行播放的过程。
这些格式分别是
- speex
- aac
- mp3
这些格式都有开源的解码库,不过都是c库,在H5中需要通过emscripten编译成js执行。
引入头文件
#ifdef USE_SPEEX
#include <speex/speex.h>
#endif
#ifdef USE_AAC
#include "aacDecoder/include/neaacdec.h"
// #include "libfdk-aac/libAACdec/include/aacdecoder_lib.h"
#endif
#ifdef USE_MP3
#include "libmad/mad.h"
//#include "libid3tag/tag.h"
#endif
定义变量
int bufferLength;
int bufferFilled;
u8 *outputBuffer;
#ifdef USE_AAC
faacDecHandle faacHandle;
#endif
#ifdef USE_SPEEX
i16 *audioOutput;
void *speexState;
SpeexBits speexBits;
#endif
#ifdef USE_MP3
MP3Decoder mp3Decoder;
#endif
bufferLength 用于指定缓冲区的长度,bufferFilled用于指示缓冲中没有使用的数据,outputBuffer用来存放解码后的数据。
MP3Decoder是自己写的一个类,需要定义这几个成员
mad_stream inputStream;
mad_frame frame;
mad_synth synth;
初始化
outputBuffer = (u8 *)malloc(bufferLength);
#ifdef USE_SPEEX
audioOutput = (i16 *)malloc(640);
auto mode = speex_lib_get_mode(SPEEX_MODEID_WB);
speexState = speex_decoder_init(mode);
speex_bits_init(&speexBits);
#endif
#ifdef USE_AAC
faacHandle = faacDecOpen();
#endif
mp3的初始化
mad_stream_init(&inputStream);
mad_frame_init(&frame);
mad_synth_init(&synth);
解码
input对象中包含了经过协议拆包后的原始音频数据(RTMP协议或Flv格式中的格式)缓冲大小虽然是自己定义,但必须遵循下面的规则
aac:1024的倍数(AAC一帧的播放时间是= 10241000/44100= 22.32ms)
speex:320的倍数(3201000/16000 = 20ms)
MP3:576的倍数(双声道1152 * 1000 /44100 = 26.122ms)
根据这些数据可以估算缓冲大小引起的音频的延时,然后需要和视频的延迟进行同步。
#ifdef USE_SPEEX
if (input.length() <= 11)
{
memset(output, 0, 640);
}
else
{
speex_bits_read_from(&speexBits, (const char *)input, 52);
speex_decode_int(speexState, &speexBits, audioOutput);
memcpy(output, audioOutput, 640);
}
return 640;
#endif
#ifdef USE_AAC
//0 = AAC sequence header ,1 = AAC raw
if (input.readB<1, u8>())
{
faacDecFrameInfo frame_info;
auto pcm_data = faacDecDecode(faacHandle, &frame_info, (unsigned char *)input.point(), input.length());
if (frame_info.error > 0)
{
emscripten_log(1, "!!%s\n", NeAACDecGetErrorMessage(frame_info.error));
}
else
{
int samplesBytes = frame_info.samples << 1;
memcpy(output, pcm_data, samplesBytes);
return samplesBytes;
}
}
else
{
unsigned long samplerate;
unsigned char channels;
auto config = faacDecGetCurrentConfiguration(faacHandle);
config->defObjectType = LTP;
faacDecSetConfiguration(faacHandle,config);
faacDecInit2(faacHandle, (unsigned char *)input.point(), 4, &samplerate, &channels);
emscripten_log(0, "aac samplerate:%d channels:%d", samplerate, channels);
}
#endif
mp3 比较复杂,这里不贴代码了,主要是mad库不能直接调用其提供的API,直播流中的MP3数据和mp3文件的格式有所不同导致。如果本文火的话,我就详细说明。
释放资源
#ifdef USE_AAC
faacDecClose(faacHandle);
#endif
#ifdef USE_SPEEX
speex_decoder_destroy(speexState);
speex_bits_destroy(&speexBits);
free(audioOutput);
#endif
free(outputBuffer);
mp3
mad_synth_finish(&synth);
mad_frame_finish(&frame);
播放
创建AudioContext对象
window.AudioContext = window.AudioContext || window.webkitAudioContext;
var context = new window.AudioContext();
创建audioBuffer
var audioBuffers = []
var audioBuffer = context.createBuffer(channels, frameCount, samplerate);
播放音频(带缓冲)
var playNextBuffer = function() {
isPlaying = false;
if (audioBuffers.length) {
playAudio(audioBuffers.shift());
}
if (audioBuffers.length > 1) audioBuffers.shift();
//console.log(audioBuffers.length)
};
var copyAudioOutputArray = resampled ? function(target) {
for (var i = 0; i < allFrameCount; i++) {
var j = i << 1;
target[j] = target[j + 1] = audioOutputArray[i] / 32768;
}
} : function(target) {
for (var i = 0; i < allFrameCount; i++) {
target[i] = audioOutputArray[i] / 32768;
}
};
var copyToCtxBuffer = channels > 1 ? function(fromBuffer) {
for (var channel = 0; channel < channels; channel++) {
var nowBuffering = audioBuffer.getChannelData(channel);
if (fromBuffer) {
for (var i = 0; i < frameCount; i++) {
nowBuffering[i] = fromBuffer[i * (channel + 1)];
}
} else {
for (var i = 0; i < frameCount; i++) {
nowBuffering[i] = audioOutputArray[i * (channel + 1)] / 32768;
}
}
}
} : function(fromBuffer) {
var nowBuffering = audioBuffer.getChannelData(0);
if (fromBuffer) nowBuffering.set(fromBuffer);
else copyAudioOutputArray(nowBuffering);
};
var playAudio = function(fromBuffer) {
if (isPlaying) {
var buffer = new Float32Array(resampled ? allFrameCount * 2 : allFrameCount);
copyAudioOutputArray(buffer);
audioBuffers.push(buffer);
return;
}
isPlaying = true;
copyToCtxBuffer(fromBuffer);
var source = context.createBufferSource();
source.buffer = audioBuffer;
source.connect(context.destination);
source.onended = playNextBuffer;
//setTimeout(playNextBuffer, audioBufferTime-audioBuffers.length*200);
source.start();
};
其中playNextBuffer 函数用于从缓冲中取出数据
copyAudioOutputArray 函数用于将音频数据转化成浮点数。
copyToCtxBuffer 函数用于将音频数据拷贝进可以播放的缓冲数组中。
这些函数对单声道和双声道进行了处理
var resampled = samplerate < 22050;
对于频率小于22khz的数据,我们需要复制一份,模拟成22khz,因为H5只支持大于22khz的数据。