一、在.plist文件中添加麦克风、语音识别权限说明
二、代码实现
//
// LKSpeechRecognizer.swift
// Comic
//
// Created by 李棒棒 on 2024/1/5.
//
import UIKit
import Foundation
import Speech
import AVFoundation
/// 是否为模拟器
var IS_Simulator:Bool {
#if targetEnvironment(simulator)
return true
#else
return false
#endif
}
enum LKSpeechRecognizerStatus:Int {
//未开始
case none
///未授权
case noAuthorize
///识别中
case recognizing
///识别结束
case recognizeFinished
///识别关闭(被动关闭)
case recognizeClose
///识别超时(超过预设静音时间(默认:3s)、主动结束)
case recognizeMuteTimeout
///识别报错
case recognizeError
}
class LKSpeechRecognizer: NSObject {
static let share = LKSpeechRecognizer()
///status 状态 baseText:识别结果, speechText:识别校验后的结果
typealias LKSpeechRecognizerResult = (_ status:LKSpeechRecognizerStatus ,
_ baseText:String?,
_ speechText:String?,
_ error:Error?) -> Void
private var recognizerResult: LKSpeechRecognizerResult? = nil
private var bestText:String? = ""
private var speakText:String? = ""
//静音间隔时间 默认3s
var muteTime:TimeInterval = 3.0
var recognizerStatus:LKSpeechRecognizerStatus = .none
private var timer:Timer? = nil
private var isHaveInput:Bool = false
private var speechTask:SFSpeechRecognitionTask?
// 语音识别器
private var speechRequest:SFSpeechAudioBufferRecognitionRequest?
private var speechRecognizer:SFSpeechRecognizer = {
let locale = Locale(identifier: "zh_CN")
//NSLocale.current
let sRecognizer:SFSpeechRecognizer = SFSpeechRecognizer(locale: locale)!//设置识别语种跟随系统语言
return sRecognizer
}()
private var audioEngine:AVAudioEngine = {
let aEngine: AVAudioEngine = AVAudioEngine()
return aEngine
}()
override init() {
super.init()
self.speechRecognizer.delegate = self
//请求权限
if IS_Simulator == false {
//checkAuthorized()
} else {
print("模拟器不支持")
}
}
}
//MARK: -
extension LKSpeechRecognizer {
//开始识别
func startRecordSpeech() {
bestText = nil
speakText = nil
//请求授权
requestSpeechAuthorization {[weak self] authorizeStatus in
guard let self = self else { return }
if authorizeStatus == false {//用户未授权
recognizerStatus = .noAuthorize
recognizerResult?(.noAuthorize,nil,nil,nil)
return
}
requestRecordSpeech()
}
}
func requestRecordSpeech() {
if speechTask != nil {
speechTask?.cancel()
}
bestText = nil
speakText = nil
//AVAudioSession:音频会话,主要用来管理音频设置与硬件交互
//配置音频会话
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSession.Category.record)
try audioSession.setMode(AVAudioSession.Mode.measurement)
try audioSession.setActive(true, options: AVAudioSession.SetActiveOptions.notifyOthersOnDeactivation)
} catch let error {
print("audioSession properties weren't set because of an error:\(error.localizedDescription)")
recognizerStatus = .recognizeError
recognizerResult?(.recognizeError,self.bestText,self.speakText,error)
return
}
speechRequest = SFSpeechAudioBufferRecognitionRequest()
speechRequest?.contextualStrings = ["data","bank","databank"]
speechRequest?.shouldReportPartialResults = false
speechRequest?.taskHint = .search
speechRequest?.shouldReportPartialResults = true
speechTask = speechRecognizer.recognitionTask(with: self.speechRequest!, resultHandler: { [weak self] (result, error) in
guard let self = self else { return }
var isFinished = false
isFinished = result?.isFinal ?? false
if result != nil {//有音频输入
self.isHaveInput = true
let bestString = result!.bestTranscription.formattedString
print("bestString:\(bestString)")
// var range = NSRange(location: 0, length: bestString.count)
// if self.speakText?.count ?? 0 > 0 {
// range = NSString(string: bestString).range(of: self.speakText ?? "")
// }
//
// print("range:\(range)")
//
// //let nowString = bestString.substring(from: range.length)
// var nowString = ""
// nowString = (bestString as NSString).substring(from: range.location)
//
// print("bestString:\(bestString) - nowString:\(nowString)")
self.bestText = bestString
self.speakText = bestString
self.recognizerStatus = .recognizing
self.recognizerResult?(.recognizing,self.bestText,self.speakText,nil)
//一次识别结束后开启静默监测,2s内没有声音做结束逻辑处理
self.startDetectionSpeech()
}
if error != nil || isFinished == true {
self.audioEngine.stop()
self.speechRequest?.endAudio()
self.speechTask?.cancel()
if self.audioEngine.inputNode.numberOfInputs > 0 {
self.audioEngine.inputNode.removeTap(onBus: 0)
}
if isFinished == true {//结束
self.recognizerStatus = .recognizeFinished
self.recognizerResult?(.recognizeFinished,self.bestText,self.speakText,nil)
print("转换结束了")
}
if let error = error {//报错了
if self.recognizerStatus != .recognizeMuteTimeout {
self.recognizerStatus = .recognizeError
self.recognizerResult?(.recognizeError,self.bestText,self.speakText,error)
}
}
}
})
let inputNode:AVAudioInputNode? = audioEngine.inputNode
let format = inputNode?.outputFormat(forBus: 0)
if let inputNode = inputNode {
inputNode.installTap(onBus: 0, bufferSize: 400, format: format, block: { [weak self] buffer, when in
guard let self = self else { return }
if let speechRequest = self.speechRequest {
speechRequest.append(buffer)
self.isHaveInput = false
}
})
}
//准备
self.audioEngine.prepare()
do {
try self.audioEngine.start()
} catch let error {
print("audioEngine couldn't start because of an error:\(error.localizedDescription)")
}
}
//MARK: - 关闭录音识别
func closeRecordSpeech() {
stopDetectionSpeech()
if audioEngine.inputNode.numberOfInputs > 0 {
audioEngine.inputNode.removeTap(onBus: 0)
}
audioEngine.stop()
audioEngine.reset()
speechRequest?.endAudio()
speechTask?.cancel()
//speechTask?.finish()
recognizerStatus = .recognizeClose
recognizerResult?(.recognizeClose,self.bestText,self.speakText,nil)
print("录音关闭")
}
//MARK: - 状态及结果回调
func recognizerResult(_ completion: LKSpeechRecognizerResult?) {
recognizerResult = completion
}
}
//MARK: - 静音监测
extension LKSpeechRecognizer {
private func startDetectionSpeech(){
if let timer = timer {
if timer.isValid {
timer.invalidate()
}
}
NSLog("开始计时检测")
timer = Timer.scheduledTimer(timeInterval: muteTime, target: self, selector: #selector(self.didFinishSpeech), userInfo: nil, repeats: false)
RunLoop.main.add(timer!, forMode: RunLoop.Mode.common)
}
private func stopDetectionSpeech() {
if timer != nil {
timer?.invalidate()
timer = nil
NSLog("结束计时检测")
}
}
@objc private func didFinishSpeech() {
if isHaveInput == false {
print("检测到\(muteTime)s内没有说话")
stopDetectionSpeech()
if audioEngine.inputNode.numberOfInputs > 0 {
audioEngine.inputNode.removeTap(onBus: 0)
}
audioEngine.stop()
audioEngine.reset()
speechRequest?.endAudio()
speechTask?.cancel()
recognizerStatus = .recognizeMuteTimeout
recognizerResult?(.recognizeMuteTimeout,self.bestText,self.speakText,nil)
}
}
}
//MARK: - SFSpeechRecognizerDelegate
extension LKSpeechRecognizer: SFSpeechRecognizerDelegate {
//录音发生变化
func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
print("音频转化发生变化:\(speechRecognizer) - \(available)")
}
}
//申请权限
extension LKSpeechRecognizer {
//MARK: 检测权限
func checkAuthorized() {
requestSpeechAuthorization { authorizeStatus in
}
}
//MARK: - 申请语音识别权限
func requestSpeechAuthorization(authorize: @escaping (Bool)-> Void ) {
if IS_Simulator == true {
authorize(false)
print("模拟器不支持")
return
}
//请求权限
DispatchQueue.global().async {
SFSpeechRecognizer.requestAuthorization {[weak self] status in
//SFSpeechRecognizerAuthorizationStatus
guard let self = self else {return}
DispatchQueue.main.async {
let isSpeechAuthorized:Bool = (status == SFSpeechRecognizerAuthorizationStatus.authorized)
authorize(isSpeechAuthorized)
}
switch status {
case .notDetermined:
NSLog("Speech Recognizer Authorization Status-Not Determined")
case .denied://拒绝
NSLog("Speech Recognizer Authorization Status-Denied")
DispatchQueue.main.async {
let alertController:UIAlertController = UIAlertController(title: "无法访问语音权限", message: "请在iPhone的\"设置-隐私-语音识别\"\"中允许访问麦克风、语音识别权限\"", preferredStyle: .alert)
alertController.addAction(UIAlertAction(title: "取消", style: .cancel, handler: { alertAction in
}))
alertController.addAction(UIAlertAction(title: "设置", style: .default, handler: { alertAction in
UIApplication.shared.open(URL(string: UIApplication.openSettingsURLString)!)
}))
AppDelegate.currentViewController?.present(alertController, animated: true)
}
case .restricted:
NSLog("Speech Recognizer Authorization Status-Restricted")
case .authorized:
NSLog("Authorized")
@unknown default:
NSLog("unknown")
}
}
}
}
}
三、调用方法
开始转换
LKSpeechRecognizer.share.startRecordSpeech()
主动关闭
LKSpeechRecognizer.share.closeRecordSpeech()
转换结果回调
LKSpeechRecognizer.share.recognizerResult({ [weak self] (status,baseText,speechText,error) in
guard let self = self else { return }
print("结果:\(speechText ?? "")")
if status == .noAuthorize {
self.statusLab.text = "状态:用户麦克风未授权"
}else if status == .recognizeFinished || status == .recognizeMuteTimeout{
self.statusLab.text = "状态:您长时间未讲话,已停止识别"
}else if status == .recognizeClose {
self.statusLab.text = "状态:已手动停止识别"
}else if status == .recognizing {
self.statusLab.text = "请讲话……"
}else if status == .recognizeError {
self.statusLab.text = "状态error:\(error?.localizedDescription ?? "")"
}
self.textView.text = speechText
})