我正在尝试使用 SPXPushAudioOutputStream 流式传输从语音 SDK 获得的音频。我毫无问题地获取了所有数据,并且可以将其写入 wav 或 mp3,然后使用下面的代码进行播放。
struct ContentView: View {
@State private var inputText = """
Die Gesundheitspolitik bleibt ein hartes Pflaster für Reformen. Bundesrätin Elisabeth Baume-Schneider forderte alle Akteure am Sonntag «nachdrücklich» auf, ihren Teil der Verantwortung zu übernehmen und «konkrete, mehrheitsfähige Sparvorschläge» vorzulegen. Mit Blick auf die vergangenen Jahrzehnte kann man darüber nur schmunzeln.
Solange besagte Akteure ihren Besitzstand eisern verteidigen und solange die politischen Kräfte aus allen Lagern ihrem Lobbydruck nachgeben, wird sich nichts ändern. Auch in den Kantonen überwiegen die Hemmungen, Spitäler zu schliessen und über die Grenzen hinweg die Zusammenarbeit zu verstärken. Ausnahmen bestätigen die Regel.
Das sagen die Ökonomen
Deshalb stellt sich die Frage, ob man nicht das zunehmend absurde Kopfprämiensystem abschaffen und auf ein durch Steuergelder finanziertes Gesundheitswesen umstellen sollte, wie in anderen Ländern. watson hat diese Frage den Gesundheitsökonomen Heinz Locher und Willy Oggier gestellt – und interessante Antworten erhalten.
"""
@State private var resultText = ""
@State private var isPlaying = false
@State private var audioPlayer: AVAudioPlayer?
@State private var synthesisCompleted = false
let speechKey = "censored"
let serviceRegion = "switzerlandnorth"
var body: some View {
VStack {
TextField("Enter text to synthesize", text: $inputText)
.textFieldStyle(RoundedBorderTextFieldStyle())
.padding()
Button(action: synthesisToPushAudioOutputStream) {
Text("Synthesize Speech")
}
.padding()
Button(action: playAudio) {
Text(isPlaying ? "Stop" : "Play")
}
.padding()
.disabled(!synthesisCompleted)
Text(resultText)
.padding()
}
.onChange(of: resultText) { newValue in
debug("Result text changed to: \(newValue)", function: "body.onChange")
synthesisCompleted = newValue.contains("Speech synthesis completed")
debug("Synthesis completed: \(synthesisCompleted)", function: "body.onChange")
}
}
private func synthesisToPushAudioOutputStream() {
let startTime = Date()
debug("Starting speech synthesis...", function: #function)
let filePath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("pushStream.mp3")
debug("File path: \(filePath.path)", function: #function)
if !FileManager.default.fileExists(atPath: filePath.path) {
debug("File doesn't exist. Creating new file...", function: #function)
FileManager.default.createFile(atPath: filePath.path, contents: nil, attributes: nil)
} else {
debug("File already exists. Will overwrite.", function: #function)
}
guard let fileHandle = try? FileHandle(forWritingTo: filePath) else {
debug("Failed to open file handle", function: #function)
updateResultText("Failed to open file at \(filePath.path)")
return
}
debug("File handle opened successfully", function: #function)
var totalBytesWritten: UInt = 0
let stream = SPXPushAudioOutputStream(writeHandler: { data -> UInt in
fileHandle.write(data)
totalBytesWritten += UInt(data.count)
debug("Wrote \(data.count) bytes. Total: \(totalBytesWritten) bytes", function: "SPXPushAudioOutputStream.writeHandler")
return UInt(data.count)
}, closeHandler: {
fileHandle.closeFile()
debug("File closed. Total bytes written: \(totalBytesWritten)", function: "SPXPushAudioOutputStream.closeHandler")
})!
debug("Configuring audio and speech...", function: #function)
let audioConfig = try? SPXAudioConfiguration(streamOutput: stream)
let speechConfig = try? SPXSpeechConfiguration(subscription: speechKey, region: serviceRegion)
guard let config = speechConfig, let audio = audioConfig else {
debug("Failed to create speech or audio configuration", function: #function)
updateResultText("Speech Config Error")
return
}
config.setSpeechSynthesisOutputFormat(.audio24Khz160KBitRateMonoMp3)
debug("Set output format to MP3", function: #function)
updateResultText("Synthesizing...")
debug("Creating speech synthesizer...", function: #function)
let synthesizer = try? SPXSpeechSynthesizer(speechConfiguration: config, audioConfiguration: audio)
guard let synth = synthesizer else {
debug("Failed to create speech synthesizer", function: #function)
updateResultText("Speech Synthesis Error")
return
}
debug("Starting text-to-speech...", function: #function)
let speechResult = try? synth.speakText(inputText)
if let result = speechResult {
if result.reason == SPXResultReason.canceled {
let details = try! SPXSpeechSynthesisCancellationDetails(fromCanceledSynthesisResult: result)
debug("Speech synthesis canceled: \(details.errorDetails ?? "Unknown error")", function: #function)
updateResultText("Canceled: \(details.errorDetails ?? "Unknown error")")
} else if result.reason == SPXResultReason.synthesizingAudioCompleted {
let synthesisTime = Date().timeIntervalSince(startTime)
debug("Speech synthesis completed successfully in \(String(format: "%.2f", synthesisTime)) seconds", function: #function)
updateResultText("Speech synthesis completed in \(String(format: "%.2f", synthesisTime)) seconds.")
// Add a small delay to ensure file writing is complete
DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) {
// Get file size
do {
let attributes = try FileManager.default.attributesOfItem(atPath: filePath.path)
let fileSize = attributes[.size] as? Int64 ?? 0
debug("File size: \(fileSize) bytes", function: "DispatchQueue.asyncAfter")
} catch {
debug("Error getting file size: \(error)", function: "DispatchQueue.asyncAfter")
}
// Get audio duration
let asset = AVAsset(url: filePath)
let duration = asset.duration
let durationSeconds = CMTimeGetSeconds(duration)
debug("Audio duration: \(durationSeconds) seconds", function: "DispatchQueue.asyncAfter")
self.updateResultText("Speech synthesis completed in \(String(format: "%.2f", synthesisTime)) seconds. Audio Duration: \(String(format: "%.2f", durationSeconds)) seconds, Size: \(FileManager.default.sizeFormatted(ofPath: filePath.path) ?? "Unknown")")
}
} else {
debug("Speech synthesis failed with reason: \(result.reason)", function: #function)
updateResultText("Speech synthesis error.")
}
} else {
debug("Speech synthesis failed (no result)", function: #function)
updateResultText("Speech synthesis error.")
}
}
private func updateResultText(_ text: String) {
DispatchQueue.main.async {
self.resultText = text
debug("Updated result text: \(text)", function: #function)
self.synthesisCompleted = text.contains("Speech synthesis completed")
debug("Synthesis completed: \(self.synthesisCompleted)", function: #function)
}
}
private func playAudio() {
if isPlaying {
audioPlayer?.stop()
isPlaying = false
debug("Audio playback stopped", function: #function)
} else {
let filePath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("pushStream.mp3")
debug("Attempting to play audio from: \(filePath.path)", function: #function)
do {
audioPlayer = try AVAudioPlayer(contentsOf: filePath)
audioPlayer?.play()
isPlaying = true
debug("Audio playback started", function: #function)
if let duration = audioPlayer?.duration {
debug("Audio duration: \(duration) seconds", function: #function)
}
} catch {
updateResultText("Error playing audio: \(error.localizedDescription)")
debug("Detailed error playing audio: \(error)", function: #function)
}
}
}
private func debug(_ message: String, function: String) {
let timestamp = DateFormatter.localizedString(from: Date(), dateStyle: .none, timeStyle: .medium)
print("[\(timestamp)] [\(function)] \(message)")
}
}
// Add this extension for formatting file size
extension FileManager {
func sizeFormatted(ofPath path: String) -> String? {
guard let attributes = try? attributesOfItem(atPath: path) else { return nil }
let size = attributes[.size] as? Int64 ?? 0
return ByteCountFormatter.string(fromByteCount: size, countStyle: .file)
}
}
但是我一生都无法弄清楚如何进行流媒体播放。我对 AVPlayer 知之甚少,所以这显然没有帮助,但我尝试使用浏览网络时能找到的所有方法...任何指向潜在解决方案的指针将不胜感激!
要使用
SPXPushAudioOutputStream
流式传输从语音 SDK 生成的音频,您可以修改现有代码以在流式传输时播放音频。
我已将
SPXPushAudioOutputStream
配置为将数据流式传输到 AVAudioEngine
进行实时播放。
private func synthesisToPushAudioOutputStream() {
let startTime = Date()
debug("Starting speech synthesis...", function: #function)
guard let audioEngine = audioEngine else {
debug("Audio engine is not initialized", function: #function)
updateResultText("Audio Engine Error")
return
}
// Prepare audio engine and player node
audioEngine.attach(audioPlayerNode)
let format = audioEngine.mainMixerNode.outputFormat(forBus: 0)
audioEngine.connect(audioPlayerNode, to: audioEngine.mainMixerNode, format: format)
let stream = SPXPushAudioOutputStream(writeHandler: { data -> UInt in
if let pcmBuffer = self.convertDataToPCMBuffer(data: data, format: format) {
self.audioPlayerNode.scheduleBuffer(pcmBuffer, completionHandler: nil)
}
return UInt(data.count)
}, closeHandler: {
audioEngine.stop()
debug("Audio engine stopped", function: "SPXPushAudioOutputStream.closeHandler")
})!
let audioConfig = try? SPXAudioConfiguration(streamOutput: stream)
let speechConfig = try? SPXSpeechConfiguration(subscription: speechKey, region: serviceRegion)
guard let config = speechConfig, let audio = audioConfig else {
debug("Failed to create speech or audio configuration", function: #function)
updateResultText("Speech Config Error")
return
}
config.setSpeechSynthesisOutputFormat(.audio16Khz16KbpsMonoPcm)
debug("Set output format to PCM", function: #function)
updateResultText("Synthesizing...")
let synthesizer = try? SPXSpeechSynthesizer(speechConfiguration: config, audioConfiguration: audio)
guard let synth = synthesizer else {
debug("Failed to create speech synthesizer", function: #function)
updateResultText("Speech Synthesis Error")
return
}
debug("Starting text-to-speech...", function: #function)
let speechResult = try? synth.speakText(inputText)
if let result = speechResult {
if result.reason == SPXResultReason.canceled {
let details = try! SPXSpeechSynthesisCancellationDetails(fromCanceledSynthesisResult: result)
debug("Speech synthesis canceled: \(details.errorDetails ?? "Unknown error")", function: #function)
updateResultText("Canceled: \(details.errorDetails ?? "Unknown error")")
} else if result.reason == SPXResultReason.synthesizingAudioCompleted {
let synthesisTime = Date().timeIntervalSince(startTime)
debug("Speech synthesis completed successfully in \(String(format: "%.2f", synthesisTime)) seconds", function: #function)
updateResultText("Speech synthesis completed in \(String(format: "%.2f", synthesisTime)) seconds.")
synthesisCompleted = true
} else {
debug("Speech synthesis failed with reason: \(result.reason)", function: #function)
updateResultText("Speech synthesis error.")
}
} else {
debug("Speech synthesis failed (no result)", function: #function)
updateResultText("Speech synthesis error.")
}
}
private func convertDataToPCMBuffer(data: Data, format: AVAudioFormat) -> AVAudioPCMBuffer? {
let audioBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: UInt32(data.count) / format.streamDescription.pointee.mBytesPerFrame)
audioBuffer?.frameLength = audioBuffer!.frameCapacity
let audioBufferPointer = audioBuffer?.floatChannelData?[0]
data.copyBytes(to: UnsafeMutableBufferPointer(start: audioBufferPointer, count: data.count / MemoryLayout<Float>.size))
return audioBuffer
}
import UIKit
import MicrosoftCognitiveServicesSpeech
let EmbeddedSpeechSynthesisVoicesFolderName = "TTS"
let EmbeddedSpeechSynthesisVoiceName = "YourEmbeddedSpeechSynthesisVoiceName"
let EmbeddedSpeechSynthesisVoiceKey = "YourEmbeddedSpeechSynthesisVoiceKey"
class ViewController: UIViewController, UITextFieldDelegate {
var textField: UITextField!
var synthButton: UIButton!
var inputText: String!
var embeddedSpeechConfig: SPXEmbeddedSpeechConfiguration?
override func viewDidLoad() {
super.viewDidLoad()
let bundle = Bundle(for: type(of: self))
if let absoluteModelPath = bundle.path(forResource: EmbeddedSpeechSynthesisVoicesFolderName, ofType: nil) {
do {
embeddedSpeechConfig = try SPXEmbeddedSpeechConfiguration(fromPath: absoluteModelPath)
embeddedSpeechConfig?.setSpeechSynthesisVoice(EmbeddedSpeechSynthesisVoiceName, key: EmbeddedSpeechSynthesisVoiceKey)
} catch {
print("Error: \(error) in initializing embedded speech configuration.")
embeddedSpeechConfig = nil
}
} else {
print("Error: Unable to locate the specified embedded speech synthesis voice.")
}
setupUI()
}
func setupUI() {
textField = UITextField(frame: CGRect(x: 100, y: 250, width: 200, height: 50))
textField.textColor = UIColor.black
textField.borderStyle = UITextField.BorderStyle.roundedRect
textField.placeholder = "Type something to synthesize."
textField.delegate = self
inputText = ""
synthButton = UIButton(frame: CGRect(x: 100, y: 400, width: 200, height: 50))
synthButton.setTitle("Synthesize", for: .normal)
synthButton.addTarget(self, action: #selector(synthesisButtonClicked), for: .touchUpInside)
synthButton.setTitleColor(UIColor.black, for: .normal)
self.view.addSubview(textField)
self.view.addSubview(synthButton)
}
func textField(_ textField: UITextField, shouldChangeCharactersIn range: NSRange, replacementString string: String) -> Bool {
if let demotext = textField.text, let textRange = Range(range, in: text) {
self.inputText = demotext.replacingCharacters(in: textRange, with: string)
}
return true
}
@objc func synthesisButtonClicked() {
DispatchQueue.global(qos: .userInitiated).async {
self.synthesisToWAV()
}
}
func synthesisToWAV() {
let synthesizer = try! SPXSpeechSynthesizer(embeddedSpeechConfiguration: embeddedSpeechConfig!)
if inputText.isEmpty {
return
}
do {
let audioConfig = try SPXAudioConfiguration.fromDefaultSpeakerOutput()
let result = try synthesizer.synthesizeSpeech(inputText, audioConfig: audioConfig)
guard let audioData = result.audioData else {
print("Error: Audio data is nil.")
return
}
let documentsPath = NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0] as NSString
let filePath = documentsPath.appendingPathComponent("output.wav")
let fileURL = URL(fileURLWithPath: filePath)
do {
try audioData.write(to: fileURL)
print("Speech synthesized and saved to: \(fileURL)")
} catch {
print("Error writing file: \(error)")
}
} catch {
print("Error synthesizing speech: \(error)")
}
}
}
synthesisToWAV()
方法使用SPXSpeechSynthesizer
将文本合成为语音,并将合成的音频保存为.wav
文件。
samples_swift_ios.wav: