我正在编写逻辑来将音频流编码为 opus(通过网络套接字发送);我不断收到
Invalid frame size: 4800. Must be one of [120, 240, 480, 960, 1920, 2880]
错误:
//
// AudioManager.swift
//
//
import Foundation
import AVFoundation
import Opus
class AudioManager: ObservableObject {
private var audioEngine: AVAudioEngine?
private var audioInputNode: AVAudioInputNode?
private weak var appManager: AppManager?
private var canSendData: Bool = false // Flag to control data sending
private var audioBufferQueue: AVAudioPCMBuffer?
private var opusEncoder: Opus.Encoder?
init(appManager: AppManager) {
self.appManager = appManager
setupOpusEncoder()
setupAudioSession()
setupAudioEngine()
}
private func setupAudioSession() {
let session = AVAudioSession.sharedInstance()
do {
try session.setPreferredSampleRate(48000)
try session.setPreferredInputNumberOfChannels(1)
try session.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
try session.setActive(true)
print("Audio session setup complete with sample rate 48000 Hz and mono channel.")
} catch {
print("Failed to set up audio session: \(error)")
}
}
func checkMicrophonePermission() -> Bool {
return AVAudioSession.sharedInstance().recordPermission == .granted
}
func requestMicrophoneAccess(completion: @escaping (Bool) -> Void) {
let audioSession = AVAudioSession.sharedInstance()
switch audioSession.recordPermission {
case .granted:
completion(true)
case .denied:
completion(false)
case .undetermined:
audioSession.requestRecordPermission { granted in
DispatchQueue.main.async {
completion(granted)
}
}
@unknown default:
completion(false)
}
}
private func setupOpusEncoder() {
// Define the parameters for the audio format
let sampleRate = 48000.0 // Sample rate in Hz
let channels = 1 // Number of audio channels
// Attempt to create an AVAudioFormat instance
guard let opusFormat = AVAudioFormat(opusPCMFormat: .float32, sampleRate: sampleRate, channels: AVAudioChannelCount(channels)) else {
print("Invalid audio format parameters")
return
}
do {
// Create the Opus encoder with the valid audio format
opusEncoder = try Opus.Encoder(format: opusFormat)
print("Opus encoder successfully created")
} catch {
// Handle any errors that might occur during the encoder initialization
print("Failed to create Opus encoder: \(error)")
}
}
func setupAudioEngine() {
audioEngine = AVAudioEngine()
guard let audioEngine = audioEngine else {
print("Audio engine could not be initialized")
return
}
let inputNode = audioEngine.inputNode
let mixerNode = AVAudioMixerNode()
audioEngine.attach(mixerNode)
// Choose an Opus-compatible buffer size
let opusCompatibleBufferSize: AVAudioFrameCount = 960 // Choose based on your latency and quality requirements
let desiredFormat = AVAudioFormat(standardFormatWithSampleRate: 48000, channels: 1)
audioEngine.connect(inputNode, to: mixerNode, format: inputNode.inputFormat(forBus: 0))
audioEngine.connect(mixerNode, to: audioEngine.mainMixerNode, format: desiredFormat)
mixerNode.installTap(onBus: 0, bufferSize: opusCompatibleBufferSize, format: desiredFormat) { [weak self] (buffer, when) in
self?.bufferAudioData(buffer)
}
do {
try audioEngine.start()
print("Audio engine started with desired format.")
} catch {
print("Failed to start audio engine: \(error)")
}
}
private func bufferAudioData(_ buffer: AVAudioPCMBuffer) {
guard let encoder = opusEncoder else {
print("Opus encoder not initialized")
return
}
// Validate buffer format again before attempting to encode
if buffer.format.sampleRate != 48000 || buffer.format.channelCount != 1 {
print("Buffer format mismatch: Expected 48000 Hz, 1 channel, but got \(buffer.format.sampleRate) Hz, \(buffer.format.channelCount) channels")
return
}
// Ensure the buffer frame size is a valid Opus frame size
let validFrameSizes = [120, 240, 480, 960, 1920, 2880] // Frame sizes for 48000 Hz
guard validFrameSizes.contains(Int(buffer.frameLength)) else {
print("Invalid frame size: \(buffer.frameLength). Must be one of \(validFrameSizes)")
return
}
var opusData = Data() // Initialize an empty Data object to hold the encoded data.
do {
// Attempt to encode and capture the number of bytes encoded
let bytesEncoded = try encoder.encode(buffer, to: &opusData)
print("Encoded \(bytesEncoded) bytes of data.")
if !opusData.isEmpty && canSendData {
appManager?.webSocketManager.send(data: opusData) {
print("Opus encoded audio data sent.")
}
}
} catch let error as Opus.Error {
// Print the Opus error with its raw value and a possible interpretation
print("Failed to encode audio: Opus Error \(error.rawValue) - \(interpretOpusError(error))")
} catch {
// This catches non-Opus errors
print("Failed to encode audio: \(error)")
}
}
/// Interprets Opus error codes into human-readable descriptions
private func interpretOpusError(_ error: Opus.Error) -> String {
switch error {
case .ok:
return "No error."
case .badArgument:
return "One or more invalid/out of range arguments."
case .bufferTooSmall:
return "The mode struct passed is invalid."
case .internalError:
return "An internal error was detected."
case .invalidPacket:
return "The compressed data passed is corrupted."
case .unimplemented:
return "Invalid/unsupported request number."
case .invalidState:
return "An encoder or decoder structure is invalid or already freed."
case .allocationFailure:
return "Memory allocation has failed."
default:
return "Unknown error."
}
}
func startRecording() {
print("Starting recording...")
canSendData = true
appManager?.webSocketManager.send(string: "{\"command\": \"aq_start\"}") {
print("Sent start recording command.")
}
}
func stopRecording() {
print("Stopping recording...")
canSendData = false
appManager?.webSocketManager.send(string: "{\"command\": \"aq_stop\"}") {
print("Sent stop recording command.")
}
audioEngine?.stop()
print("Recording stopped.")
}
}
您的
AVAudioNode
点击 bufferSize
被忽略,您将获得 10 毫秒的音频块,而 opus 编码器需要更小的块。
您需要将过大的音频缓冲区分解为 Opus 大小的块。
我从未见过
AVAudioNode
水龙头尊重其bufferSize
参数,但头文件注释说
支持的范围是[100, 400] ms
所以我也可能只是选择了太小的缓冲区大小。
但是在线文档说
实现时可以选择其他尺寸。
这也许可以解释为什么你没有注意到,以及在过去 10 年的
bufferSize
中我对 AVAudioEngine
越来越失望。
据称,在 48kHz 下,该抽头支持 [4800, 19200] 个样本的
bufferSize
范围。如果您感到懒惰或只是想快速获得结果,19200 个样本(Opus 1920 帧大小的倍数)意味着您不会获得烦人的剩余缓冲区,但是正如 doco 所说,API 可以随意忽略 bufferSize
因此,对于其他人可能看到的代码,您确实应该正确执行。