RT:
一直以来,对Siri的语音识别功能很感兴趣,但一直没有时间去研究,今天心血来潮,便找了点资料,自己动手试着做了一个简单的Dome,效果如下——
最终效果:
制作思路:
- 在Info.plist文件添加麦克风和语言识别权限描述
- 引入Speech语言识别包
- 真机测试(必须真机)
Microphone Usage Description和Speech Recognition Usage Description使用意图描述,内容随便写!
requiresOnDeviceRecognition属性可以设置为true,不需要访问服务器,貌似有使用数量限制!仅限与设备上就没有
if #available(iOS 13, *) {
// 将此属性设置为true以防止SFSpeechRecognitionRequest通过网络发送音频
// 设备上的请求将不那么准确。
recognitionRequest.requiresOnDeviceRecognition = true
}
并且可以识别本地音频,不过最好是将音频的时间限制在1分中以内!
本地音频文件识别部分,我注释掉了,只留实时录制音频识别部分的代码,感兴趣可以取消注释测试看看效果!
/////////// 识别音频文件
/*============================================================================*/
// @objc private func recognizeBtnDidClick(_ sender: UIButton) {
// var info = ""
// sender.isSelected = !sender.isSelected
//
// if sender.isSelected {
// info = "正在识别···"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .orange
// let path = Bundle.main.path(forResource: "Track 1_004", ofType: "wav")
// let url: NSURL = NSURL.init(fileURLWithPath: path!)
// recognizeFile(url: url)
//
// } else {
// info = "停止识别!"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .blue
// }
// }
// // 音频文件识别
// func recognizeFile(url:NSURL) {
//
// guard let myRecognizer = SFSpeechRecognizer.init(locale: Locale.init(identifier: "zh-CN")) else { return }
//
// if !myRecognizer.isAvailable { return }
//
// let request = SFSpeechURLRecognitionRequest(url: url as URL)
// myRecognizer.recognitionTask(with: request) { (result, error) in
// guard let result = result else { return }
//
// self.textView.text = result.bestTranscription.formattedString
// if result.isFinal {
// print("Speech in the file is \(result.bestTranscription.formattedString)")
// self.textView.text = result.bestTranscription.formattedString
// }
// }
// }
/*============================================================================*/
官方也有相关的案例,感兴趣可以下载过来学习研究……(-)
完整代码:
//
// SpeechVC.swift
// UIKit-basic
//
// Created by Qire_er on 2022/1/16.
//
import UIKit
import Speech
class SpeechVC: UIViewController {
var textView: UITextView! // 用于显示识别文本
var recognizeBtn: UIButton! // 录制按钮
// 定义语言识别需要用到的几个对象的引用
/*====================================================================================*/
private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN"))! // 创建与指定区域设置关联的语音识别器
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? // 语音识别的请求
private var recognitionTask: SFSpeechRecognitionTask? // 语音识别的任务类
private let audioEngine = AVAudioEngine() // 音频引擎,用于音频输入
/*====================================================================================*/
private let recodingBG: UIColor = .red // 定义【正在录制】按钮背景色
private let enableBG: UIColor = .blue // 定义【可用状态】按钮背景色
private let disableBG: UIColor = .systemGray3 // 定义【禁用状态】按钮背景色
// 添加UI
override func viewDidLoad() {
super.viewDidLoad()
let vStack = UIStackView()
vStack.translatesAutoresizingMaskIntoConstraints = false
vStack.axis = .vertical
textView = UITextView()
textView.font = .boldSystemFont(ofSize: 46)
textView.backgroundColor = .systemGray5
recognizeBtn = UIButton()
recognizeBtn.setTitle("开始录制", for: .normal)
recognizeBtn.setTitleColor(UIColor.gray, for: .disabled)
recognizeBtn.addTarget(self, action: #selector(recordButtonTapped), for: .touchUpInside)
recognizeBtn.isEnabled = false // 默认禁用
vStack.addArrangedSubview(textView)
vStack.addArrangedSubview(recognizeBtn)
view.addSubview(vStack)
view.backgroundColor = .white
NSLayoutConstraint.activate([
vStack.leftAnchor.constraint(equalTo: view.leftAnchor, constant: 15),
vStack.rightAnchor.constraint(equalTo: view.rightAnchor, constant: -15),
vStack.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 15),
vStack.bottomAnchor.constraint(equalTo: view.safeAreaLayoutGuide.bottomAnchor, constant: -15),
recognizeBtn.heightAnchor.constraint(equalToConstant: 80)
])
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
speechRecognizer.delegate = self // 设置代理
// MARK: 请求语音识别权限
SFSpeechRecognizer.requestAuthorization { (status) in
print("status = \(status.rawValue)")
OperationQueue.main.addOperation {
switch status {
case .authorized : // 用户已授权
self.recognizeBtn.isEnabled = true
self.recognizeBtn.backgroundColor = .blue
case .notDetermined : // 用户未授权
self.recognizeBtn.isEnabled = false
self.recognizeBtn.setTitle("语音识别未经授权!", for: .disabled)
self.recognizeBtn.backgroundColor = self.disableBG
case .denied : // 用户拒绝
self.recognizeBtn.isEnabled = false
self.recognizeBtn.setTitle("用户拒绝访问语音识别!", for: .disabled)
self.recognizeBtn.backgroundColor = self.disableBG
case .restricted : // 设备不支持
self.recognizeBtn.isEnabled = false
self.recognizeBtn.setTitle("语音识别不支持此设备!", for: .disabled)
self.recognizeBtn.backgroundColor = self.disableBG
default: // 默认情况
self.recognizeBtn.isEnabled = false
self.recognizeBtn.backgroundColor = self.disableBG
}
}
}
}
// 录制方法
private func startRecording() throws {
// 取消上一次正在识别任务(如果有的话)
recognitionTask?.cancel()
self.recognitionTask = nil
// 配置应用程序的音频会话
let audioSession = AVAudioSession.sharedInstance() // 管理音频硬件资源的分配
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) // 设置音频会话的类别、模式和选项。
try audioSession.setActive(true, options: .notifyOthersOnDeactivation) // 激活音频会话
let inputNode = audioEngine.inputNode // inputNode|outputNode分别对应硬件的麦克风和扬声器
// 创建并配置语音识别请求
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() // 从捕获的音频内容(如来自设备麦克风的音频)识别语音的请求
guard let recognitionRequest = recognitionRequest else { fatalError("无法创建SFSpeechAudioBufferRecognitionRequest对象") }
// 设置在音频录制完成之前返回结果
// 每产生一种结果就马上返回
recognitionRequest.shouldReportPartialResults = true
// 将语音识别数据仅限于设备上
if #available(iOS 13, *) {
// 将此属性设置为true以防止SFSpeechRecognitionRequest通过网络发送音频
// 设备上的请求将不那么准确。
recognitionRequest.requiresOnDeviceRecognition = true
}
// 为语音识别会话创建识别任务
// 保留对任务的引用,以便可以取消该任务
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
// 使用识别结果更新文本视图
self.textView.text = result.bestTranscription.formattedString
isFinal = result.isFinal
print("【识别内容】\(result.bestTranscription.formattedString)")
}
if error != nil || isFinal {
// 如果出现问题,停止识别语音
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.recognizeBtn.isEnabled = true // 设置按钮为可用状态
self.recognizeBtn.setTitle("开始录制", for: []) // 设置按钮文字
self.recognizeBtn.backgroundColor = self.enableBG
}
}
// 配置麦克风输入
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
// 提示用户开始录制
textView.text = "点击【开始录制】···"
}
// 定义按钮点击处理函数
@objc private func recordButtonTapped() {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
recognizeBtn.isEnabled = false
recognizeBtn.setTitle("停止录制", for: .disabled)
} else {
do {
try startRecording()
recognizeBtn.setTitle("停止录制", for: [])
recognizeBtn.backgroundColor = recodingBG
} catch {
recognizeBtn.setTitle("录音不可用!", for: [])
recognizeBtn.backgroundColor = self.disableBG
}
}
}
/////////// 识别音频文件
/*============================================================================*/
// @objc private func recognizeBtnDidClick(_ sender: UIButton) {
// var info = ""
// sender.isSelected = !sender.isSelected
//
// if sender.isSelected {
// info = "正在识别···"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .orange
// let path = Bundle.main.path(forResource: "Track 1_004", ofType: "wav")
// let url: NSURL = NSURL.init(fileURLWithPath: path!)
// recognizeFile(url: url)
//
// } else {
// info = "停止识别!"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .blue
// }
// }
// // 音频文件识别
// func recognizeFile(url:NSURL) {
//
// guard let myRecognizer = SFSpeechRecognizer.init(locale: Locale.init(identifier: "zh-CN")) else { return }
//
// if !myRecognizer.isAvailable { return }
//
// let request = SFSpeechURLRecognitionRequest(url: url as URL)
// myRecognizer.recognitionTask(with: request) { (result, error) in
// guard let result = result else { return }
//
// self.textView.text = result.bestTranscription.formattedString
// if result.isFinal {
// print("Speech in the file is \(result.bestTranscription.formattedString)")
// self.textView.text = result.bestTranscription.formattedString
// }
// }
// }
/*============================================================================*/
}
// MARK: SFSpeechRecognizerDelegate
extension SpeechVC: SFSpeechRecognizerDelegate {
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
if available {
recognizeBtn.isEnabled = true
recognizeBtn.setTitle("开始录制···", for: [])
} else {
recognizeBtn.isEnabled = false
recognizeBtn.setTitle("语言识别不可用!", for: .disabled)
}
}
}
控制台也有相关信息的输出!感觉还是挺强大,值得好好研究……
(==完==)
ps: 以上仅代表个人浅见,如果你有什么高见,也欢迎讨论交流!-