本文介绍iOS实时语音双向对讲(语音通话)功能:
(一)实时采集PCM并编码AAC
(二)RTSP+RTP协议实时传输
(三)FFmpeg实时解码AAC并播放PCM
第一篇介绍使用
<AVFoundation/AVFoundation.h>
中的AVCaptureSession
进行音频的实时采集,输出PCM数据;再使用<AudioToolbox/AudioToolbox.h>
中的AudioConverterRef
将采集到的PCM进行编码转换,输出AAC。
具体过程如下:
1.采集
初始化AVCaptureSession并设置相关配置
- (instancetype)initCaptureWithPreset:(CapturePreset)preset {
if ([super init]) {
[self initAVcaptureSession];
_definePreset = preset;
}
return self;
}
- (void)initAVcaptureSession {
//初始化AVCaptureSession
_session = [[AVCaptureSession alloc] init];
//开始配置
[_session beginConfiguration];
NSError *error;
//获取音频设备对象
self.audioDevice = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio];
//初始化捕获输入对象
self.audioInput = [[AVCaptureDeviceInput alloc] initWithDevice:self.audioDevice error:&error];
if (error) {
NSLog(@"录音设备出错");
}
//添加音频输入对象到session
if ([self.session canAddInput:self.audioInput]) {
[self.session addInput:self.audioInput];
}
//初始化输出捕获对象
self.audioOutput = [[AVCaptureAudioDataOutput alloc] init];
//添加音频输出对象到session
if ([self.session canAddOutput:self.audioOutput]) {
[self.session addOutput:self.audioOutput];
}
//创建设置音频输出代理所需要的线程队列
dispatch_queue_t audioQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
[self.audioOutput setSampleBufferDelegate:self queue:audioQueue];
//提交配置
[self.session commitConfiguration];
}
开始与结束采集
- (void)start {
[self.session startRunning];
}
- (void)stop {
[self.session stopRunning];
}
设置代理回调将PCM数据输出
@protocol PCMCaptureDelegate <NSObject>
- (void)audioWithSampleBuffer:(CMSampleBufferRef)sampleBuffer;
@end
//AVCaptureAudioDataOutputSampleBufferDelegate
- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection {
if (captureOutput == self.audioOutput) {
if (self.delegate && [self.delegate respondsToSelector:@selector(audioWithSampleBuffer:)]) {
[self.delegate audioWithSampleBuffer:sampleBuffer];
}
}
}
2.编码
创建转换器并设置相关属性
- (void)setUpConverter:(CMSampleBufferRef)sampleBuffer {
//获取audioformat的描述信息
CMAudioFormatDescriptionRef audioFormatDes = (CMAudioFormatDescriptionRef)CMSampleBufferGetFormatDescription(sampleBuffer);
//获取输入的asbd的信息
AudioStreamBasicDescription inAudioStreamBasicDescription = *(CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDes));
//开始构造输出的asbd
AudioStreamBasicDescription outAudioStreamBasicDescription = {0};
//对于压缩格式必须设置为0
outAudioStreamBasicDescription.mBitsPerChannel = 0;
outAudioStreamBasicDescription.mBytesPerFrame = 0;
//设定声道数为1
outAudioStreamBasicDescription.mChannelsPerFrame = 1;
//设定采样率为16000
outAudioStreamBasicDescription.mSampleRate = 16000;
//设定输出音频的格式
outAudioStreamBasicDescription.mFormatID = kAudioFormatMPEG4AAC;
outAudioStreamBasicDescription.mFormatFlags = kMPEG4Object_AAC_LC;
//填充输出的音频格式
UInt32 size = sizeof(outAudioStreamBasicDescription);
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, &size, &outAudioStreamBasicDescription);
//选择aac的编码器(用来描述一个已经安装的编解码器)
AudioClassDescription audioClassDes;
//初始化为0
memset(&audioClassDes, 0, sizeof(audioClassDes));
//获取满足要求的aac编码器的总大小
UInt32 countSize = 0;
AudioFormatGetPropertyInfo(kAudioFormatProperty_Encoders, sizeof(outAudioStreamBasicDescription.mFormatID), &outAudioStreamBasicDescription.mFormatID, &countSize);
//用来计算aac的编解码器的个数
int cout = countSize/sizeof(audioClassDes);
//创建一个包含有cout个数的编码器数组
AudioClassDescription descriptions[cout];
//将编码器数组信息写入到descriptions中
AudioFormatGetProperty(kAudioFormatProperty_Encoders, sizeof(outAudioStreamBasicDescription.mFormatID), &outAudioStreamBasicDescription.mFormatID, &countSize, descriptions);
for (int i = 0; i < cout; cout++) {
AudioClassDescription temp = descriptions[i];
if (temp.mManufacturer==kAppleSoftwareAudioCodecManufacturer//软编
&&temp.mSubType==outAudioStreamBasicDescription.mFormatID) {
audioClassDes = temp;
break;
}
}
//创建convertcontext用来保存converter的信息
ConverterContext *context = malloc(sizeof(ConverterContext));
self->convertContext = context;
OSStatus result = AudioConverterNewSpecific(&inAudioStreamBasicDescription, &outAudioStreamBasicDescription, 1, &audioClassDes, &(context->converter));
if (result == noErr) {
//创建编解码器成功
AudioConverterRef converter = context->converter;
//设置编码器属性
UInt32 temp = kAudioConverterQuality_High;
AudioConverterSetProperty(converter, kAudioConverterCodecQuality, sizeof(temp), &temp);
//设置比特率
UInt32 bitRate = 32000;
result = AudioConverterSetProperty(converter, kAudioConverterEncodeBitRate, sizeof(bitRate), &bitRate);
if (result != noErr) {
NSLog(@"设置比特率失败");
}
}else{
//创建编解码器失败
free(context);
context = NULL;
NSLog(@"创建编解码器失败");
}
}
编码samplebuffer数据
//编码samplebuffer数据
- (void)encodeSmapleBuffer:(CMSampleBufferRef)sampleBuffer {
if (!self->convertContext) {
[self setUpConverter:sampleBuffer];
}
ConverterContext *cxt = self->convertContext;
if (cxt && cxt->converter) {
//从samplebuffer中提取数据
CFRetain(sampleBuffer);
dispatch_async(encodeQueue, ^{
//从samplebuffer中获取blockbuffer
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t pcmLength = 0;
char *pcmData = NULL;
//获取blockbuffer中的pcm数据的指针和长度
OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &pcmLength, &pcmData);
if (status != noErr) {
NSLog(@"从block中获取pcm数据失败");
CFRelease(sampleBuffer);
return;
} else {
//在堆区分配内存用来保存编码后的aac数据
char *outputBuffer = malloc(pcmLength);
memset(outputBuffer, 0, pcmLength);
UInt32 packetSize = 1;
AudioStreamPacketDescription *outputPacketDes = (AudioStreamPacketDescription *)malloc(sizeof(AudioStreamPacketDescription) *packetSize);
//使用fillcomplexinputparm来保存pcm数据
FillComplexInputParm userParam;
userParam.source = pcmData;
userParam.sourceSize = (UInt32)pcmLength;
userParam.channelCount = 1;
userParam.packetDescription = NULL;
//在堆区创建audiobufferlist
AudioBufferList outputBufferList;
outputBufferList.mNumberBuffers = 1;
outputBufferList.mBuffers[0].mData = outputBuffer;
outputBufferList.mBuffers[0].mDataByteSize = (unsigned int)pcmLength;
outputBufferList.mBuffers[0].mNumberChannels = 1;
//编码
status = AudioConverterFillComplexBuffer(self->convertContext->converter, audioConverterComplexInputDataProc, &userParam, &packetSize, &outputBufferList, outputPacketDes);
free(outputPacketDes);
outputPacketDes = NULL;
if (status == noErr) {
// NSLog(@"编码成功");
//获取原始的aac数据
NSData *rawAAC = [NSData dataWithBytes:outputBufferList.mBuffers[0].mData length:outputBufferList.mBuffers[0].mDataByteSize];
free(outputBuffer);
outputBuffer = NULL;
//设置adts头
int headerLength = 0;
char *packetHeader = newAdtsDataForPacketLength((int)rawAAC.length, &headerLength);
NSData *adtsHeader = [NSData dataWithBytes:packetHeader length:headerLength];
free(packetHeader);
packetHeader = NULL;
NSMutableData *fullData = [NSMutableData dataWithData:adtsHeader];
[fullData appendData:rawAAC];
//设置私有头
char *privateHeader = newPrivate((int)fullData.length);
NSData *privateHeaderData = [NSData dataWithBytes:privateHeader length:24];
free(privateHeader);
privateHeader = NULL;
NSMutableData *pFullData = [NSMutableData dataWithData:privateHeaderData];
[pFullData appendData:fullData];
//设置rtp头
char *rtpHeader = newRTPForAAC();
NSData *rtpHeaderData = [NSData dataWithBytes:rtpHeader length:12];
free(rtpHeader);
rtpHeader = NULL;
NSMutableData *fullData1 = [NSMutableData dataWithData:rtpHeaderData];
[fullData1 appendData:pFullData];
//设置rtsp interleaved frame头
char *rtspFrameHeader = newRTSPInterleavedFrame((int)fullData1.length);
NSData *rtspFrameHeaderData = [NSData dataWithBytes:rtspFrameHeader length:4];
free(rtspFrameHeader);
rtspFrameHeader = NULL;
NSMutableData *fullData2 = [NSMutableData dataWithData:rtspFrameHeaderData];
[fullData2 appendData:fullData1];
//发送数据
[self.delegate sendData:fullData2];
fullData2 = nil;
fullData1 = nil;
fullData = nil;
rawAAC = nil;
}
free(outputBuffer);
CFRelease(sampleBuffer);
}
});
}
}
其中AudioConverterFillComplexBuffer
即是用于转换的函数,转换出来的AAC是raw data,需要添加固定字节(56bits)的ADTS头信息,用于描述音频的信息,便于解码器读取,关于ADTS的描述,可参考https://blog.csdn.net/jay100500/article/details/52955232,下面是添加ADTS的具体代码:
//给aac加上adts头, packetLength 为rawaac的长度
char *newAdtsDataForPacketLength(int packetLength, int *ioHeaderLen) {
//adts头的长度为固定的7个字节
int adtsLen = 7;
//在堆区分配7个字节的内存
char *packet = malloc(sizeof(char)*adtsLen);
//选择AAC LC
int profile = 2;
//选择采样率对应的下标
int freqIdx = 8;
//选择声道数所对应的下标
int chanCfg = 1;
//获取adts头和raw aac的总长度
NSUInteger fullLength = adtsLen+packetLength;
//设置syncword
packet[0] = 0xFF;
packet[1] = 0xF1;
packet[2] = (char)(((profile-1)<<6)+(freqIdx<<2)+(chanCfg>>2));
packet[3] = (char)(((chanCfg&3)<<6)+(fullLength>>11));
packet[4] = (char)((fullLength&0x7FF)>>3);
packet[5] = (char)(((fullLength&7)<<5)+0x1F);
packet[6] = (char)0xFC;
*ioHeaderLen = adtsLen;
return packet;
}
PS:这里作者除了添加ADTS头之外,还增加了私有头,所以不是标准的封装格式,可忽略
本文中使用RTSP+RTP协议将编码后的ADTS-AAC进行传输,所以还需要添加RTP头(96bits)以及RTSP Interleaved frame头(32bits)进行封装,均为固定字节,关于RTP的描述,可参考https://blog.csdn.net/qingkongyeyue/article/details/60590613,下面是RTP封装的具体代码:
//添加RTP头
char *newRTPForAAC() {
//RTP头长度为固定的12个字节
int rtpLen = 12;
//在堆区分配12个字节的内存
char *packet = malloc(sizeof(char)*rtpLen);
//设置syncword
packet[0] = 0x80;//V_P_X_CC
packet[1] = 0x88;//M_PT
//Sequence
packet[2] = 0x00;
packet[3] = 0xDA;
//timestamp
packet[4] = 0x00;
packet[5] = 0x01;
packet[6] = 0x98;
packet[7] = 0xC0;
//SSRC
packet[8] = 0x00;
packet[9] = 0x00;
packet[10] = 0x00;
packet[11] = 0x00;
return packet;
}
PS:由于作者不是标准的封装格式,所以RTP头中某些值并没有进行校验,所以也不够标准,这里需要大家按照RTP规则进行封装
RTSP Interleaved frame头只有4字节,首字符为"$",下面是具体代码:
char *newRTSPInterleavedFrame(int packetLength) {
//RTP头长度为固定的4个字节
int rtpLen = 4;
//在堆区分配4个字节的内存
char *packet = malloc(sizeof(char)*rtpLen);
//设置syncword
packet[0] = 0x24;
packet[1] = 0x00;
NSString *str = [NSString stringWithFormat:@"%d",packetLength];
long long l = [str longLongValue];
NSString *s16 = [NSString stringWithFormat:@"%04llx",l];
NSString *s16_1 = [s16 substringToIndex:2];
NSString *s16_2 = [s16 substringFromIndex:2];
unsigned long res1 = strtoul([s16_1 UTF8String],0,16);
unsigned long res2 = strtoul([s16_2 UTF8String],0,16);
packet[2] = res1;
packet[3] = res2;
return packet;
}
最后,fullData2即是最后封装后的数据,也就是说,数据前面被增加了一层又一层的各种头信息,此时就可以拿来进行RTSP传输了。
Demo地址:https://github.com/XuningZhai/TalkDemo
支持G711的Demo地址:https://github.com/XuningZhai/TalkDemo_G711_AAC