let opusRecordingSettings = [AVFormatIDKey: kAudioFormatOpus,
                             AVSampleRateKey: 16000.0,
                             AVNumberOfChannelsKey: 1] as [String: Any]

do {
    try audioRecordingSession.setCategory(.playAndRecord, mode: .default)
    try audioRecordingSession.setActive(true)

    audioRecorder = try AVAudioRecorder(url: fileUrl(), settings: opusRecordingSettings)
    audioRecorder.delegate = self
catch _ { }

// ... ... ...


| header | encoded opus data | header | encoded opus data | ... | ... |



  • AVAudioRecorder连续将opus数据包并排放置(因为在.opus文件后,默认的opus数据包是不定界的)录制完成

  • 默认帧持续时间为20毫秒,因此frameSize = 20 * 16 = 320(因为我使用的是16 kHz采样率)

  • 每个数据包仅包含一帧,因此packetSize == frameSize

[我了解,如果我可以某种方式遍历Opus数据包,那么我可以计算每个数据包的大小并将其附加为标头(dataChunk = header + encodedOpusData


  • 如何创建分隔的opus音频文件由标题分隔,其中标题指示以字节为单位的opus数据大小]
  • 如何以及何时添加标题录制时或录制完成后?
  • 录制线性PCM使用AVAudioRecorder并保存录制的音频文件即temp.wav

  • 使用AVAssetReader读取录制的音频文件并提取PCM(不要使用Data(contentsOf: ...)加载音频文件,音频文件中存在元数据/标题,这会产生一些噪音)

  • 将PCM数据分割为x个字节块,并对其进行循环(在我的情况下为x = 640

  • 使用x窗格将OpusKit字节PCM块编码为作品
  • 计算编码后的作品的大小并追加为标题

代码(Swift 5,iOS 13,Xcode 11.3

import UIKit
import MapKit
import MessageKit
import AVFoundation
import OpusKit
import os

class BasicChatViewController: ChatViewController {

    override func viewDidLoad() {


        Logger.logIt("Initilizing opus lib kit")
        OpusKit.shared.initialize(sampleRate: Opus.SAMPLE_RATE_DEFAULT,
                                  numberOfChannels: Opus.CHANNEL_COUNT_DEFAULT,
                                  packetSize: Opus.OPUS_ENCODER_BUFFER_SIZE,
                                  encodeBlockSize: Opus.FRAME_SIZE_DEFAULT)

        // configure record button here

    // MARK - recording
    var isRecording = false
    var avAudioPlayer: AVAudioPlayer!
    var audioRecorder: AVAudioRecorder!

    func onTapRecordButton(sender: UIButton){



    private func toggleRecording(){


        Logger.logIt("isRecording: \(isRecording)")

        if isRecording {

            isRecording = false


        } else {

            isRecording = true

    // END - recording
// Audio recording related extensions
extension BasicChatViewController: AVAudioRecorderDelegate {

    private func checkPermissionAndStartRecording() {


        AudioUtil.checkRecordingPermission() { isPermissionGranted in

            Logger.logIt("isPermissionGranted: \(isPermissionGranted)")

            if isPermissionGranted {
            } else {
                Logger.logIt("don't have permission to record")

    private func setupRecorder() {


        let tempAudioFileUrl = AudioUtil.TEMP_WAV_FILE
        Logger.logIt("tempAudioFileUrl: \(tempAudioFileUrl)")

        let linearPcmRecordingSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
        Logger.logIt("RecordingSettings: \(linearPcmRecordingSettings)")

        do {


            audioRecorder = try AVAudioRecorder(url: tempAudioFileUrl, settings: linearPcmRecordingSettings)
            audioRecorder.delegate = self
            //audioRecorder.isMeteringEnabled = true
        catch {

    private func startRecording() {


        if audioRecorder == nil {


    private func stopRecording() {


        guard audioRecorder != nil else {


    private func deleteTempAudioFile(){


        guard audioRecorder != nil else {

        if audioRecorder.isRecording {

        // delete temporary audio file
        let recordingDeleted = audioRecorder.deleteRecording()
        if recordingDeleted {
            Logger.logIt("temp (recorded) audio file deleted")
        } else {
            Logger.logIt("failed to delete temp (recorded) audio file")

    private func startRecordingSession(){


        do {
            try AVAudioSession.sharedInstance().setCategory(.record, mode: .spokenAudio)
            try AVAudioSession.sharedInstance().setActive(true)
        } catch {
            Logger.logIt("Failed to deactivate recording session")

    private func stopRecordingSession(){


        do {
            try AVAudioSession.sharedInstance().setActive(false)
        } catch {
            Logger.logIt("Failed to deactivate recording session")

    private func recordUsingAVAudioRecorder(){




    private func encodeRecordedAudio(){


        let pcmData = AudioUtil.extractPcmOnly(from: AudioUtil.TEMP_WAV_FILE)

        if pcmData.count > 1 {

            Logger.logIt("encoding pcm to self-delimited opus")

            let encodedOpusData = AudioUtil.encodeToSelfDelimitedOpus(pcmData: pcmData, splitSize: PCM.SPLIT_CHUNK_SIZE_DEFAULT)
            Logger.logIt("encoded opus: \(encodedOpusData)")

            Logger.logIt("save encoded opus")
            AudioUtil.saveAudio(to: AudioUtil.ENCODED_OPUS_FILE, audioData: encodedOpusData)

        } else {
            Logger.logIt("no data to encode")


    func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {


        let finishedSuccessFully = flag

        if finishedSuccessFully {

            Logger.logIt("finished recording successfully")


        } else {
            Logger.logIt("recording failed - audio encoding error")


import Foundation
import AVFoundation
import OpusKit

// Opus audio info.
public class OpusAudioInfo {

    public static let `default` = OpusAudioInfo()

    var channels: opus_int32
    var headerSize: Int // bytes
    var packetSize: opus_int32
    var sampleRate: opus_int32 {
        didSet {
            packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)

    public init(sampleRate: opus_int32 = Opus.SAMPLE_RATE_16_KHZ,
                channels: opus_int32 = Opus.CHANNEL_COUNT_DEFAULT,
                headerSize: Int = 1) {
        self.sampleRate = sampleRate
        self.packetSize =  Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
        self.channels = channels
        self.headerSize = headerSize

// RAW PCM info.
public class PCMInfo {

    public static let `default` = PCMInfo()

    var sampleRate:Int32
    var channels:Int16
    var bitDepth:Int16

    public init(sampleRate:Int32 = PCM.SAMPLE_RATE_16_KHZ,
                channels:Int16 = Int16(PCM.MONO),
                bitDepth:Int16 = Int16(PCM.BIT_DEPTH_DEFAULT)) {
        self.sampleRate = sampleRate
        self.channels = channels
        self.bitDepth = bitDepth

// Utility class for audio related operations
public class AudioUtil {

    private init(){}

    // Default audio files url in document directory
    public static let RAW_PCM_FILE = FileUtil.createFileUrl(for: "pcm.raw", in: FileUtil.DOCUMENTS_DIR)
    public static let TEMP_WAV_FILE = FileUtil.createFileUrl(for: "wav.wav", in: FileUtil.DOCUMENTS_DIR)
    public static let ENCODED_OPUS_FILE = FileUtil.createFileUrl(for: "encoded_opus_ios.opus", in: FileUtil.DOCUMENTS_DIR)
    public static let DECODED_WAV_WITH_HEADER_FILE = FileUtil.createFileUrl(for: "decoded_wav_with_header.wav", in: FileUtil.DOCUMENTS_DIR)

 Creates fake wav header to play Linear PCM

 AVAudioPlayer by default can not play Linear PCM, therefore we need to create a fake wav header

 - parameter sampleRate: samples per second
 - parameter channelCount: number of channels
 - parameter bitDepth: bits per sample
 - parameter pcmDataSizeInBytes: PCM data size in bytes

 - returns : Data - wav header data
public static func createWavHeader(sampleRate: Int32, channelCount: Int16, bitDepth: Int16, pcmDataSizeInBytes dataSize: Int32) -> Data {


     WAV header details: http://www.topherlee.com/software/pcm-tut-wavformat.html

     Positions    Sample Value    Description
     1 - 4    "RIFF"    Marks the file as a riff file. Characters are each 1 byte long.
     5 - 8    File size (integer)    Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
     9 -12    "WAVE"    File Type Header. For our purposes, it always equals "WAVE".
     13-16    "fmt "    Format chunk marker. Includes trailing null
     17-20    16    Length of format data as listed above
     21-22    1    Type of format (1 is PCM) - 2 byte integer
     23-24    2    Number of Channels - 2 byte integer
     25-28    44100    Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
     29-32    176400    (Sample Rate * BitsPerSample * Channels) / 8.
     33-34    4    (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
     35-36    16    Bits per sample
     37-40    "data"    "data" chunk header. Marks the beginning of the data section.
     41-44    File size (data)    Size of the data section.
     Sample values are given above for a 16-bit stereo source.

     An example in swift :

     let WAV_HEADER: [Any] = [
     0xFF,0xFF,0xFF,0x7F,  // file size
     "f","m","t"," ",      // Chunk ID
     0x10,0x00,0x00,0x00,  // Chunk Size - length of format above
     0x01,0x00,            // Format Code: 1 is PCM, 3 is IEEE float
     0x01,0x00,            // Number of Channels (e.g. 2)
     0x80,0xBB,0x00,0x00,  // Samples per Second, Sample Rate (e.g. 48000)
     0x00,0xDC,0x05,0x00,  // Bytes per second, byte rate = sample rate * bits per sample * channels / 8
     0x08,0x00,            // Bytes per Sample Frame, block align = bits per sample * channels / 8
     0x20,0x00,            // bits per sample (16 for PCM, 32 for float)
     0xFF,0xFF,0xFF,0x7F   // size of data section

    let WAV_HEADER_SIZE:Int32 = 44
    let FORMAT_CODE_PCM:Int16 = 1

    let fileSize:Int32 = dataSize + WAV_HEADER_SIZE

    let sampleRate:Int32 = sampleRate
    let subChunkSize:Int32 = 16
    let format:Int16 = FORMAT_CODE_PCM
    let channels:Int16 = channelCount
    let bitsPerSample:Int16 = bitDepth
    let byteRate:Int32 = sampleRate * Int32(channels * bitsPerSample / 8)
    let blockAlign: Int16 = (bitsPerSample * channels) / 8

    let header = NSMutableData()

    header.append([UInt8]("RIFF".utf8), length: 4)

    header.append(byteArray(from: fileSize), length: 4)

    header.append([UInt8]("WAVE".utf8), length: 4)

    header.append([UInt8]("fmt ".utf8), length: 4)
    header.append(byteArray(from: subChunkSize), length: 4)

    header.append(byteArray(from: format), length: 2)
    header.append(byteArray(from: channels), length: 2)
    header.append(byteArray(from: sampleRate), length: 4)
    header.append(byteArray(from: byteRate), length: 4)
    header.append(byteArray(from: blockAlign), length: 2)
    header.append(byteArray(from: bitsPerSample), length: 2)

    header.append([UInt8]("data".utf8), length: 4)
    header.append(byteArray(from: dataSize), length: 4)

    return header as Data

 Creates default wav header based on default PCM constants

 - parameter dataSize: size of PCM data in bytes

 - returns : Data - wav header data
public static func createDefaultWavHeader(dataSize: Int32) -> Data {

    return createWavHeader(sampleRate: PCM.SAMPLE_RATE_DEFAULT,
                           channelCount: Int16(PCM.CHANNEL_COUNT_DEFAULT),
                           bitDepth: Int16(PCM.BIT_DEPTH_DEFAULT),
                           pcmDataSizeInBytes: dataSize)

     Converts given value to byte array

     - parameter value:FixedWidthInteger type

     - returns: array of bytes
    public static func byteArray<T>(from value: T) -> [UInt8] where T: FixedWidthInteger {
        // https://stackoverflow.com/a/56964191/4802664
        // .littleEndian is required
        return withUnsafeBytes(of: value.littleEndian) { Array($0) }

     Generates wav audio data buffer from given header and raw PCM

     - parameter wavHeader: a fake RIFF WAV header (appended to PCM)
     - parameter pcmData: Linear PCM data

     - returns: Data
    public static func generateWav(header wavHeader: Data, pcmData: Data) -> Data {

        var wavData = Data()


        return wavData

     Checks permission for recording and invokes callback with flag

     - parameter callback: clouser to invoked after checking permission
    public static func checkRecordingPermission(onPermissionChecked callback: @escaping(_ isPermissionGranted: Bool) -> Void) {


        var isPermissionGranted = false

        switch AVAudioSession.sharedInstance().recordPermission {

        case .granted:
            isPermissionGranted = true

        case .denied:
            isPermissionGranted = false

        case .undetermined:
            AVAudioSession.sharedInstance().requestRecordPermission({ (allowed) in
                if allowed {
                    isPermissionGranted = true
                } else {
                    isPermissionGranted = false

            isPermissionGranted = false


     Saves given audio data to specified url

     - parameter fileUri: file url where audio data will be saved
    public static func saveAudio(to fileUri: URL, audioData: Data) {

        Logger.logIt("save to: \(fileUri)")

        do {
            try audioData.write(to: fileUri)
        } catch {

     Encodes given PCM data into self delimited opus (`|header|data|header|data|...|`) using libopus

     - parameter pcmData: Linear PCM data buffer (loaded from file or coming from AudioEngine tapping)
     - parameter splitSize: size of chunk to split the given pcmData
     - returns : encoded data (encoded as: `|header|data|header|data|...|`)
    public static func encodeToSelfDelimitedOpus(pcmData: Data, splitSize: Int) -> Data {


        var encodedData = Data()

        var readIndex = 0
        var readStart = 0
        var readEnd = 0

        var pcmChunk: Data

        var readCount = 1
        let splitCount = (pcmData.count / splitSize)
        Logger.logIt("split count: \(splitCount)")

        var header: Data

        while readCount <=  splitCount {

            readStart = readIndex
            readEnd = readStart + splitSize

            // to prevent index out of bound exception
            // check readEnd index
            if(readEnd >= pcmData.count){
                readEnd = readStart + (pcmData.count - readIndex)

            pcmChunk = pcmData[readStart..<readEnd]
            //print("chunk: \(pcmChunk)")

            if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {

                // header is exactly one byte
                // header indicates size of the encoded opus data
                header = Data(from: encodedChunk.count)[0..<1]
                //Logger.logIt("header: \([UInt8](header))")

            } else {
                print("failed to encode at index: \(readStart)")

            readIndex += splitSize
            readCount += 1

        // remaining data
        //Logger.logIt("append remaining data")
        pcmChunk = pcmData[readIndex..<pcmData.count]

        if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
            header = Data(from: encodedChunk.count)[0..<1]
            //Logger.logIt("header: \([UInt8](header))")

        } else {
            print("failed to encode at index: \(readIndex)")

        return encodedData

     Decodes given self delimited opus data to PCM

     Custom opus is encoded as `|header|data|header|data|...|`
     Loops over the data, reads data size from header and takes slice/chunk of given opus data based on data size from header. Then each chunk is decode using libopus

     - parameter opusData: Encoded opus data buffer
     - parameter headerSizeInBytes: size of header in bytes (default is 1)
     - returns : decoded pcm data
    public static  func decodeSelfDelimitedOpusToPcm(opusData: Data, headerSizeInBytes headerSize: Int = 1) -> Data {

        var decodedData: Data = Data()

        var headerData: Data
        var opusChunkSizeFromHeader = 0
        var readIndex = 0
        var readStart = 0
        var readEnd = 0
        var extractedOpusChunk: Data

        while readIndex < opusData.count {

            headerData = opusData[readIndex..<(readIndex + headerSize)]
            //Logger.logIt("headerData: \([UInt8](headerData))")

            opusChunkSizeFromHeader = Int([UInt8](headerData)[0])

            readStart = readIndex + headerSize
            readEnd = readStart + opusChunkSizeFromHeader

            extractedOpusChunk = opusData[readStart..<readEnd]
            //Logger.logIt("extracted: \(extractedOpusChunk)")

            if let decodedDataChunk = OpusKit.shared.decodeData(extractedOpusChunk) {
                //Logger.logIt("decodedDataChunk: \(decodedDataChunk)")
            } else {
                print("failed to decode at index: \(readStart)")

            readIndex += (headerSize + opusChunkSizeFromHeader)

        return decodedData

     Extracts PCM only from a audio file using AVAssetReader

     Normally system will append some meta data while saving audio file with extension, and therefore we need to use AVAssetReader to get PCM only

     - parameter fileUrl : audio file url

     - returns: PCM  Data
    public static func extractPcmOnly(from fileUrl: URL) -> Data {

        let pcmOnly = NSMutableData()

        do {

            let asset = AVAsset(url: fileUrl)
            let assetReader = try AVAssetReader(asset: asset)
            let track = asset.tracks(withMediaType: AVMediaType.audio).first
            let outputSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT

            let trackOutput = AVAssetReaderTrackOutput(track: track!, outputSettings: outputSettings)


            Logger.logIt("reading data with AVAssetReader")
            while assetReader.status == AVAssetReader.Status.reading {

                if let sampleBufferRef = trackOutput.copyNextSampleBuffer() {

                    if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {

                        let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
                        let data = NSMutableData(length: bufferLength)

                        // func CMBlockBufferCopyDataBytes(_ theSourceBuffer: CMBlockBuffer, atOffset offsetToData: Int, dataLength: Int, destination: UnsafeMutableRawPointer) -> OSStatus
                        CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: data!.mutableBytes)

                        let samples = data!.mutableBytes.assumingMemoryBound(to: UInt16.self)
                        pcmOnly.append(samples, length: bufferLength)

                } else {
                    Logger.logIt("failed to copy next")
        } catch {

        return pcmOnly as Data
