我正在尝试使用适用于 iOS 的 Microsoft 认知服务语音转文本 SDK。到目前为止,我已经设法让 recognizeOnce 函数运行,没有任何问题,但 Swift 的样本很少,我似乎无法使用 Swift 进行连续识别。请记住,我通常使用 React Native/JS,并且总体上对 Swift 有非常基本的掌握。话虽这么说,任何帮助将不胜感激。这是我到目前为止所拥有的:
func recognizeFromMic() {
var speechConfig: SPXSpeechConfiguration?
do {
try speechConfig = SPXSpeechConfiguration(subscription: key, region: region)
} catch {
print("error \(error) happened")
speechConfig = nil
}
speechConfig?.speechRecognitionLanguage = "en-US"
let audioConfig = SPXAudioConfiguration(microphone: nil)
let reco = try! SPXSpeechRecognizer(speechConfiguration: speechConfig!, audioConfiguration: audioConfig!)
reco.addRecognizingEventHandler() {reco, evt in
print("Received intermediate result event. SessionId: \(evt.sessionId), Recognition Result: \(evt.result.text ?? "(no result)"), Status: \(evt.result.reason), Offset: \(evt.result.offset) Duration: \(evt.result.duration), ResultID: \(evt.result.resultId)")
}
reco.addRecognizedEventHandler() {reco, evt in
print("Received final result event. SessionId: \(evt.sessionId), Recognition Result: \(evt.result.text ?? "(no result)"), Status: \(evt.result.reason), Offset: \(evt.result.offset) Duration: \(evt.result.duration), ResultID: \(evt.result.resultId)")
}
reco.addSessionStoppedEventHandler() {reco, evt in
print("Received session stopped event. SessionId: \(evt.sessionId)")
}
print("Listening...")
do {
try reco.startContinuousRecognition()
} catch {
print("error \(error) happened")
}
}
运行上述函数时,出现以下错误:
Error Domain=SPXErrorDomain Code=21 "Error: Exception with an error code: 0x15 (SPXERR_MIC_ERROR)
经过一番努力,我终于有了一个工作功能。微软绝对应该改进他们的例子。对于那些在这里寻找相同答案的人来说,这就是我最终所做的。希望这对某人有帮助:)
var audioEngine = AVAudioEngine()
var audioData = Data()
var inputNode: AVAudioInputNode?
var reco: SPXSpeechRecognizer?;
var stream = SPXPullAudioInputStream()
func recognizeFromMic() {
var bytesPerFrame = UInt32()
var stream: SPXPushAudioInputStream?
func toData(PCMBuffer: AVAudioPCMBuffer) -> Data {
let channelCount = 1 // given PCMBuffer channel count is 1
let channels = UnsafeBufferPointer(start: PCMBuffer.int16ChannelData, count: channelCount)
let ch0Data = NSData(bytes: channels[0], length:Int(PCMBuffer.frameCapacity * PCMBuffer.format.streamDescription.pointee.mBytesPerFrame)) as Data
return ch0Data
}
guard let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: AVAudioChannelCount(1), interleaved: false) else {
return
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
}
inputNode = self.audioEngine.inputNode
let inputFormat = inputNode!.outputFormat(forBus: 0)
let converter = AVAudioConverter(from: inputFormat, to: outputFormat)
inputNode!.installTap(onBus: AVAudioNodeBus(0), bufferSize: AVAudioFrameCount(32000), format: self.audioEngine.inputNode.outputFormat(forBus: 0)) { (pcmBuffer, time) in
guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity:pcmBuffer.frameCapacity) else {
return
}
outputBuffer.frameLength = pcmBuffer.frameLength
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return pcmBuffer
}
var error:NSError? = nil
converter?.convert(to: outputBuffer, error: &error, withInputFrom: inputBlock)
self.audioData.append(toData(PCMBuffer: outputBuffer))
stream?.write(self.audioData)
}
self.audioEngine.prepare()
do {
try self.audioEngine.start()
} catch {
print("\(error)")
}
//set up push stream
let audioFormat = SPXAudioStreamFormat.init(usingPCMWithSampleRate: 16000, bitsPerSample: 16, channels: 1)
stream = SPXPushAudioInputStream(audioFormat: audioFormat!)
var speechConfig: SPXSpeechConfiguration?
do {
try speechConfig = SPXSpeechConfiguration(subscription: key, region: region)
speechConfig!.enableDictation();
} catch {
print("error \(error) happened")
speechConfig = nil
}
speechConfig?.speechRecognitionLanguage = "en-US"
let audioConfig = SPXAudioConfiguration(streamInput: stream!)
reco = try! SPXSpeechRecognizer(speechConfiguration: speechConfig!, audioConfiguration: audioConfig!)
reco!.addRecognizingEventHandler() {reco, evt in
print("Received intermediate result event. SessionId: \(evt.sessionId), Recognition Result: \(evt.result.text ?? "(no result)"), Status: \(evt.result.reason), Offset: \(evt.result.offset) Duration: \(evt.result.duration), ResultID: \(evt.result.resultId)")
}
reco!.addRecognizedEventHandler() {reco, evt in
print("Received final result event. SessionId: \(evt.sessionId), Recognition Result: \(evt.result.text ?? "(no result)"), Status: \(evt.result.reason), Offset: \(evt.result.offset) Duration: \(evt.result.duration), ResultID: \(evt.result.resultId)")
}
reco!.addSessionStoppedEventHandler() {reco, evt in
print("Received session stopped event. SessionId: \(evt.sessionId)")
}
print("Listening...")
// Start recording and append recording buffer to speech recognizer
do {
try reco!.startContinuousRecognition()
} catch {
print("error \(error) happened")
}
}
为了识别语音,我们需要先设置语音识别器。这可以在类初始化、视图出现或按钮操作时完成。
在开始语音识别之前,需要启动音频会话
/// Azure speech recognizer
private var speechRecognizer: SPXSpeechRecognizer?
/// Audio session
private var audioSession: AVAudioSession?
/// Audio engine
private var audioEngine = AVAudioEngine()
/// Audio input node
private var inputNode: AVAudioInputNode?
/// Starting the audio session
private func startAudioSession() -> Bool {
audioSession = AVAudioSession.sharedInstance()
do {
try audioSession?.setCategory(.record)
try audioSession?.setActive(true)
} catch {
return false
}
inputNode = audioEngine.inputNode
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
return false
}
return true
}
/// Stopping audio session
private func stopAudioSession() {
do {
try audioSession?.setActive(false)
} catch {
/// Handle error
}
audioEngine.stop()
}
/// Setting the azure speech configuration. Need to call this after `setupAudioSession`
private func setupVoiceAssistant(completion: (_ status: Bool) -> Void) {
let speechKey = "Your speech key"
let speechRegion = "Your speech region"
var speechConfig: SPXSpeechConfiguration?
do {
try speechConfig = SPXSpeechConfiguration(subscription: speechKey, region: speechRegion)
} catch {
/// handle error
completion(false)
speechConfig = nil
return
}
speechConfig?.speechRecognitionLanguage = languageCode
guard let speechConfig = speechConfig else {
/// handle error
completion(false)
return
}
let audioConfig = SPXAudioConfiguration()
guard let recognizer = try? SPXSpeechRecognizer(speechConfiguration: speechConfig, audioConfiguration: audioConfig) else {
/// Handle Recognizer error
completion(false)
return
}
speechRecognizer = recognizer
completion(true)
}
/// Recognizing the voice and converting to corresponding text
func recognizeVoice(completion: @escaping (_ isIntermediateResult: Bool, _ voiceText: String) -> Void) {
/// Starting the session here to avoid issues if thge session is used somewhere else
guard startAudioSession() else {
/// Handle session error
return
}
guard let speechRecognizer = speechRecognizer else {
/// Handle Recognizer error
return
}
speechRecognizer.addRecognizingEventHandler { [weak self] _, result in
guard let self else {
return
}
guard let voiceText = result.result.text else {
/// Handle voice text nil erroe
return
}
/// Intermediate response
completion(true, voiceText)
}
/// This will call on starting the recognition
completion(true, "Listening...".localized(.module))
guard let result = try? speechRecognizer.recognizeOnce() else {
/// Handle speechRecognizer result nil error
return
}
guard let finalVoiceText = result.text else {
/// Handle finalVoiceText is nil error
return
}
guard !finalVoiceText.isEmpty else {
/// Voice text is empty error
return
}
/// Final text
completion(false, finalVoiceText)
/// Stopping audio session after completing the voice to text conversion
stopAudioSession()
if result.reason != SPXResultReason.recognizedSpeech {
guard let cancellationDetails = try? SPXCancellationDetails(fromCanceledRecognitionResult: result) else {
/// Handle cancellationDetails error
return
}
/// handle cancelled reason here
}
}