这篇日志记录自己在学习使用苹果原生框架语音识别库
Speech Framework
时的总结和示例代码。看了一遍官方文档,把该框架中的相关类和方法了解了一遍,然后总结了一张XMind结构图。
前提:需要Xcode 8 以上和一个运行iOS10以上系统的iOS设备.
Speech Framework中的类和方法概念
Note: 因为涉及到权限问题,需要在info.plist文件中添加两个key。分别是
Privacy - Microphone Usage Description
(麦克风权限)和Privacy - Speech Recognition Usage Description
(语音识别权限)
Swift代码
import UIKit
import Speech
class ViewController: UIViewController {
@IBOutlet weak var textView: UITextView!
@IBOutlet weak var microphoneButton: UIButton!
/// 语音识别操作类对象
private let speechRecognizer = SFSpeechRecognizer()
/// 处理语音识别请求,给语音识别提供语音输入
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
/// 告诉用户语音识别对象的结果。拥有这个对象很方便因为你可以 用它删除或中断任务
private var recognitionTask: SFSpeechRecognitionTask?
/// 语音引擎。负责提供语音输入
private let audioEngine = AVAudioEngine()
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.
microphoneButton.isEnabled = false
speechRecognizer?.delegate = self
/// 申请用户语音识别权限
SFSpeechRecognizer.requestAuthorization { (authStatus) in
var isButtonEnabled = false
switch authStatus {
case .authorized: // 用户授权语音识别
isButtonEnabled = true
case .denied: // 用户拒绝授权语音识别
isButtonEnabled = false
print("User denied access to speech recognition")
case .restricted: // 设备不支持语音识别功能
isButtonEnabled = false
print("Speech recognition restricted on this device")
case .notDetermined: // 结果未知 用户尚未进行选择
isButtonEnabled = false
print("Speech recognition not yet authorized")
}
OperationQueue.main.addOperation {
self.microphoneButton.isEnabled = isButtonEnabled
}
}
}
@IBAction func microphoneButtonClick(_ sender: UIButton) {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
microphoneButton.isEnabled = false
microphoneButton.setTitle("Start Recording", for: .normal)
} else {
startRecording()
microphoneButton.setTitle("Stop Recording", for: .normal)
}
}
func startRecording() {
if recognitionTask != nil { /// 检查recognitionTask是否在运行,如果在就取消任务和识别
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance() /// 记录语音做准备
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
/// 实例化recognitionRequest 利用它把语音数据传到苹果后台
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
/// 检查audioEngine(你的设备)是否有做录音功能作为语音输入
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
}
/// 检查recognitionRequest对象是否被实例化或不是nil
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecongitionRequest object")
}
/// 当用户说话的时候让recognitionRequest报告语音识别的部分结果
recognitionRequest.shouldReportPartialResults = true
/// 开启语音识别, 回调每次都会在识别引擎收到输入的时候,完善了当前识别的信息时候,或者被删除或者停止的时候被调用,最后会返回一个最终的文本
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false // 定义一个布尔值决定识别是否已经结束
/// 如果结果result不是nil,把textView.text的值设置为我们的最优文本。如果结果是最终结果,设置isFinal为true
if result != nil {
self.textView.text = result?.bestTranscription.formattedString
isFinal = (result?.isFinal)!
}
/// 如果没有错误或者结果是最终结果,停止audioEngine(语音输入)并且停止recognitionRequest和recognitionTask
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.microphoneButton.isEnabled = true
}
})
/// 向recognitionRequest增加一个语音输入。注意在开始了recognitionTask之后增加语音输入是OK的。SpeechFramework会在语音输入被加入的同时就开始进行解析识别
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
/// 准备并且开始audioEngine
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error")
}
textView.text = "Say something, I'm listening!"
}
}
extension ViewController: SFSpeechRecognizerDelegate {
/// 可用性状态改变时被调用
func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
if available {
microphoneButton.isEnabled = true
} else {
microphoneButton.isEnabled = false
}
}
}
Objective-C 代码
#import <Speech/Speech.h>
@interface ViewController ()<SFSpeechRecognizerDelegate>
@property (nonatomic, strong) SFSpeechRecognizer *speechRecognizer;
@property (nonatomic, strong) SFSpeechRecognitionTask *recognitionTask;
@property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *recognitionRequest;
/// 音频引擎
@property (nonatomic, strong) AVAudioEngine *audioEngine;
@property (weak, nonatomic) IBOutlet UITextView *textView;
@property (weak, nonatomic) IBOutlet UIButton *microphoneBtn;
@end
@implementation ViewController
- (void)dealloc {
[self.recognitionTask cancel];
self.recognitionTask = nil;
}
- (void)viewDidLoad {
[super viewDidLoad];
// Do any additional setup after loading the view, typically from a nib.
self.view.backgroundColor = [UIColor whiteColor];
NSLog(@"supportedLocales: %@", [SFSpeechRecognizer supportedLocales]);
self.microphoneBtn.enabled = NO;
/// 创建语音识别器对象并设置代理
self.speechRecognizer = [[SFSpeechRecognizer alloc] init];
self.speechRecognizer.delegate = self;
/// 请求用户授权
[SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {
BOOL isButtonEnabled = NO;
switch (status) {
case SFSpeechRecognizerAuthorizationStatusNotDetermined:
isButtonEnabled = NO;
NSLog(@"SFSpeechRecognizerAuthorizationStatusNotDetermined");
break;
case SFSpeechRecognizerAuthorizationStatusDenied:
isButtonEnabled = NO;
NSLog(@"SFSpeechRecognizerAuthorizationStatusDenied");
break;
case SFSpeechRecognizerAuthorizationStatusRestricted:
isButtonEnabled = NO;
NSLog(@"SFSpeechRecognizerAuthorizationStatusRestricted");
break;
case SFSpeechRecognizerAuthorizationStatusAuthorized:
NSLog(@"SFSpeechRecognizerAuthorizationStatusAuthorized");
isButtonEnabled = YES;
break;
default:
break;
}
dispatch_async(dispatch_get_main_queue(), ^{
self.microphoneBtn.enabled = isButtonEnabled;
});
}];
/// 创建音频引擎对象
self.audioEngine = [[AVAudioEngine alloc] init];
}
- (IBAction)microphoneBtnClick:(UIButton *)sender {
if (self.audioEngine.isRunning) {
[self.audioEngine stop];
[self.recognitionRequest endAudio];
self.microphoneBtn.enabled = NO;
[self.microphoneBtn setTitle:@"Start Recording" forState:UIControlStateNormal];
} else {
[self startRecording];
[self.microphoneBtn setTitle:@"Stop Recording" forState:UIControlStateNormal];
}
}
#pragma mark - private method
- (void)startRecording {
if (self.recognitionTask != nil) {
[self.recognitionTask cancel]; // 取消当前语音识别任务
self.recognitionTask = nil;
}
AVAudioSession *audioSession = [AVAudioSession sharedInstance];
NSError *categoryError = nil;
if (![audioSession setCategory:AVAudioSessionCategoryRecord error:&categoryError]) {
NSLog(@"categoryError: %@", categoryError.localizedDescription);
}
NSError *modeError = nil;
if (![audioSession setMode:AVAudioSessionModeMeasurement error:&modeError]) {
NSLog(@"modeError: %@", modeError.localizedDescription);
}
NSError *activeError = nil;
if (![audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&activeError]) {
NSLog(@"activeError: %@", activeError.localizedDescription);
}
/// 实例化 通过设备麦克风识别现场语音的请求 对象
self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
if (!self.audioEngine.inputNode) {// 系统输入节点
NSLog(@"Audio engine has no input node");
return;
}
if (!self.recognitionRequest) {
NSLog(@"Unable to create an SFSpeechAudioBufferRecongitionRequest object");
return;
}
/// 报告每个发音的部分非精确结果
self.recognitionRequest.shouldReportPartialResults = YES;
/// 执行语音识别任务 完成回调
self.recognitionTask = [self.speechRecognizer recognitionTaskWithRequest:self.recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * _Nullable error) {
BOOL isFinal = NO;
if (result) {
self.textView.text = result.bestTranscription.formattedString;
isFinal = result.isFinal;
}
if (error || isFinal) {
[self.audioEngine stop];
[self.audioEngine.inputNode removeTapOnBus:0];
self.recognitionRequest = nil;
self.recognitionTask = nil;
self.microphoneBtn.enabled = YES;
}
}];
AVAudioFormat *recordingFormat = [self.audioEngine.inputNode outputFormatForBus:0];
[self.audioEngine.inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
/// 将PCM格式的音频追加到识别请求的结尾
[self.recognitionRequest appendAudioPCMBuffer:buffer];
}];
[self.audioEngine prepare];
NSError *startError = nil;
if(![self.audioEngine startAndReturnError:&startError]) {
NSLog(@"startError: %@", startError.localizedDescription);
}
self.textView.text = @"Say something, I'm listening";
}
#pragma mark - SFSpeechRecognizerDelegate
- (void)speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidChange:(BOOL)available {
if (available) {
self.microphoneBtn.enabled = YES;
} else {
self.microphoneBtn.enabled = NO;
}
}
参考链接:
Building a Speech-to-Text App Using Speech Framework in iOS 10
SpeakToMe: Using Speech Recognition with AVAudioEngine