AVCaptureDepthData输出数据的延迟

Question

我有 ViewController，它实现了 AVCaptureVideoDataOutputSampleBufferDelegate 和 AVCaptureDepthDataOutputDelegate 协议。我想收集视频和深度数据。视频数据用于执行 Vision ML 请求，深度用于计算从相机到相机上特定点的距离。

extension MainRecognizerViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(_ output: AVCaptureOutput, 
                       didOutput sampleBuffer: CMSampleBuffer,
                       from connection: AVCaptureConnection) {
        DispatchQueue.main.async {
            self.captureSessionManager.manageFlashlight(for: sampleBuffer, force: nil)
        }
        
        guard let cvPixelBuffer = sampleBuffer.convertToPixelBuffer() else {
            return
        }
        
        let exifOrientation = exifOrientationFromDeviceOrientation()
        let handler = VNImageRequestHandler(cvPixelBuffer: cvPixelBuffer,
                                            orientation: exifOrientation)
        
        let objectsRecognitionRequest = prepareVisionRequestForObjectsRecognition(
            pixelBuffer: cvPixelBuffer
        )
        
        DispatchQueue.global().async {
            try? handler.perform([objectsRecognitionRequest])
            try? handler.perform(self.roadLightsRecognizerRequests)
            try? handler.perform(self.pedestrianCrossingRecognizerRequests)
        }
    }
}

和

extension MainRecognizerViewController: AVCaptureDepthDataOutputDelegate {
    func depthDataOutput(_ output: AVCaptureDepthDataOutput, 
                         didOutput depthData: AVDepthData,
                         timestamp: CMTime,
                         connection: AVCaptureConnection) {
        if depthMeasurementsLeftInLoop == 0 {
            depthMeasurementsCumul = 0.0
            depthMeasurementMin = 9999.9
            depthMeasurementMax = 0.0
            depthMeasurementsLeftInLoop = depthMeasurementRepeats
        }
        
        if depthMeasurementsLeftInLoop > 0 {
            var convertedDepthData: AVDepthData = depthData.converting(
                toDepthDataType: kCVPixelFormatType_DepthFloat16
            )
            let depthFrame = convertedDepthData.depthDataMap
            let depthPoint = CGPoint(x: CGFloat(CVPixelBufferGetWidth(depthFrame)) / 2,
                                     y: CGFloat(CVPixelBufferGetHeight(depthFrame) / 2))
            let depthVal = getDepthValueFromFrame(fromFrame: depthFrame, 
                                                  atPoint: depthPoint)
            print(depthVal)
            
            let measurement = depthVal * 100
            
            depthMeasurementsCumul += measurement
            
            if measurement > depthMeasurementMax {
                depthMeasurementMax = measurement
            }
            
            if measurement < depthMeasurementMin {
                depthMeasurementMin = measurement
            }
            
            depthMeasurementsLeftInLoop -= 1
            
            //            let printStr = String(format: "Measurement %d: %.2f cm",
            //                depthMeasurementRepeats - depthMeasurementsLeftInLoop, measurement)
            
            DispatchQueue.main.async { [weak self] in
                self?.distanceMeasurerViewModel?.distanceString = String(format: "%.2f", measurement)
            }
        }
    }
}

我在 CaptureSessionManager 结构中执行整个相机设置：

import AVFoundation

final class CaptureSessionManager: CaptureSessionManaging {
    @Inject private var flashlightManager: FlashlightManaging
    
    private let captureSessionQueue = DispatchQueue(label: "captureSessionQueue")
    private let captureSessionDataOutputQueue = DispatchQueue(
        label: "captureSessionVideoDataOutput",
        qos: .userInitiated,
        attributes: [],
        autoreleaseFrequency: .workItem
    )
    
    private var sampleBufferOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
    private var sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate?
    private var depthDataOutput: AVCaptureDepthDataOutput = AVCaptureDepthDataOutput()
    private var depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate?
    var cameraMode: CameraMode?
    private var desiredFrameRate: Double?
    
    private var videoDevice: AVCaptureDevice? = AVCaptureDevice.default(
        .builtInLiDARDepthCamera,
        for: .video,
        position: .back
    )
    
    var bufferSize: CGSize = .zero
    
    var captureSession: AVCaptureSession!
    
    func setUp(with sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate,
               and depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate,
               for cameraMode: CameraMode,
               cameraPosition: AVCaptureDevice.Position,
               desiredFrameRate: Double,
               completion: @escaping () -> ()) {
        stopCaptureSession()
        
        self.sampleBufferDelegate = sampleBufferDelegate
        self.depthDataOutputDelegate = depthDataOutputDelegate
        self.cameraMode = cameraMode
        self.desiredFrameRate = desiredFrameRate
        
        authorizeCaptureSession {
            completion()
        }
    }
    
    func manageFlashlight(for sampleBuffer: CMSampleBuffer?, 
                          force torchMode: AVCaptureDevice.TorchMode?) {
        flashlightManager.manageFlashlight(for: sampleBuffer,
                                           and: self.videoDevice,
                                           force: torchMode)
    }
    
    private func authorizeCaptureSession(completion: @escaping () -> ())  {
        switch AVCaptureDevice.authorizationStatus(for: .video) {
        case .authorized:
            setupCaptureSession {
                completion()
            }
        case .notDetermined:
            AVCaptureDevice.requestAccess(for: .video) { [weak self] granted in
                if granted {
                    self?.setupCaptureSession {
                        completion()
                    }
                }
            }
        default:
            return
        }
    }
    
    private func setupCaptureSession(completion: @escaping () -> ()) {
        captureSessionQueue.async { [unowned self] in
            var captureSession: AVCaptureSession = AVCaptureSession()
            captureSession.beginConfiguration()
            
            guard let videoDevice = videoDevice else {
                return
            }
            
            do {
                let captureDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
                guard captureSession.canAddInput(captureDeviceInput) else {
                    return
                }
                captureSession.addInput(captureDeviceInput)
            } catch {
                return
            }
            
            let sessionPreset: SessionPreset = .hd1280x720
            
            guard let videoSetupedCaptureSession: AVCaptureSession = setupCaptureSessionForVideo(
                captureSession: captureSession,
                sessionPreset: sessionPreset
            ) else {
                return
            }
            
            guard let depthAndVideoSetupedCaptureSession = setupCaptureSessionForDepth(
                captureSession: videoSetupedCaptureSession
            ) else {
                return
            }
            
            
            
            captureSession = depthAndVideoSetupedCaptureSession
            captureSession.sessionPreset = sessionPreset.preset
            captureSession.commitConfiguration()
            
            self.captureSession = captureSession
            self.startCaptureSession()
            completion()
        }
    }
    
    private func setupCaptureSessionForVideo(captureSession: AVCaptureSession,
                                             sessionPreset: SessionPreset) -> AVCaptureSession? {
        let captureSessionVideoOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
        captureSessionVideoOutput.videoSettings = [
            kCVPixelBufferPixelFormatTypeKey as String: NSNumber(
                value: kCMPixelFormat_32BGRA
            )
        ]
        captureSessionVideoOutput.alwaysDiscardsLateVideoFrames = true
        captureSessionVideoOutput.setSampleBufferDelegate(
            self.sampleBufferDelegate,
            queue: captureSessionDataOutputQueue
        )
        
        guard let videoDevice = videoDevice else {
            return nil
        }
        
        var formatToSet: AVCaptureDevice.Format = videoDevice.formats[0]
        
        guard let desiredFrameRate = desiredFrameRate else {
            return nil
        }
        
        for format in videoDevice.formats.reversed() {
            let ranges = format.videoSupportedFrameRateRanges
            let frameRates = ranges[0]

            if desiredFrameRate <= frameRates.maxFrameRate,
               format.formatDescription.dimensions.width == sessionPreset.formatWidth,
               format.formatDescription.dimensions.height == sessionPreset.formatHeight {
                formatToSet = format
                break
            }
        }
        
        do {
            try videoDevice.lockForConfiguration()
            
            if videoDevice.hasTorch {
                self.manageFlashlight(for: nil, force: .auto)
            }
            
            let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
            bufferSize.width = CGFloat(dimensions.width)
            bufferSize.height = CGFloat(dimensions.height)
            
            videoDevice.activeFormat = formatToSet

            let timescale = CMTimeScale(desiredFrameRate)
            if videoDevice.activeFormat.videoSupportedFrameRateRanges[0].maxFrameRate >= desiredFrameRate {
                videoDevice.activeVideoMinFrameDuration = CMTime(value: 1, timescale: timescale)
                videoDevice.activeVideoMaxFrameDuration = CMTime(value: 1, timescale: timescale)
            }
            
            videoDevice.unlockForConfiguration()
        } catch {
            return nil
        }
        
        guard captureSession.canAddOutput(captureSessionVideoOutput) else {
            return nil
        }
        
        let captureConnection = captureSessionVideoOutput.connection(with: .video)
        captureConnection?.isEnabled = true
        
        captureSession.addOutput(captureSessionVideoOutput)
        
        if let cameraMode = self.cameraMode,
           CameraMode.modesWithPortraitVideoConnection.contains(cameraMode) {
            captureSessionVideoOutput.connection(with: .video)?.videoOrientation = .portrait
        }
        
        return captureSession
    }
    
    private func setupCaptureSessionForDepth(captureSession: AVCaptureSession) -> AVCaptureSession? {
        guard let depthDataOutputDelegate = depthDataOutputDelegate else {
            return nil
        }
        
        if captureSession.canAddOutput(depthDataOutput) {
            captureSession.addOutput(depthDataOutput)
            depthDataOutput.isFilteringEnabled = false
        } else {
            return nil
        }
        
        if let connection = depthDataOutput.connection(with: .depthData) {
            connection.isEnabled = true
            depthDataOutput.isFilteringEnabled = false
            depthDataOutput.setDelegate(
                depthDataOutputDelegate,
                callbackQueue: captureSessionDataOutputQueue
            )
        } else {
            return nil
        }
        
        guard let videoDevice = videoDevice else {
            return nil
        }
        
        let availableFormats = videoDevice.activeFormat.supportedDepthDataFormats
        let availableHdepFormats = availableFormats.filter { f in
            CMFormatDescriptionGetMediaSubType(f.formatDescription) == kCVPixelFormatType_DepthFloat16
        }
        let selectedFormat = availableHdepFormats.max(by: {
            lower, higher in CMVideoFormatDescriptionGetDimensions(lower.formatDescription).width < CMVideoFormatDescriptionGetDimensions(higher.formatDescription).width
        })
        
        do {
            try videoDevice.lockForConfiguration()
            videoDevice.activeDepthDataFormat = selectedFormat
            videoDevice.unlockForConfiguration()
        } catch {
            return nil
        }
        
        return captureSession
    }
    
    func startCaptureSession() {
        self.captureSession?.startRunning()
    }
    
    func stopCaptureSession() {
        self.captureSession?.stopRunning()
    }
}

问题是我接收深度数据的更新太慢 - 来自

captureOutput

的函数

AVCaptureVideoDataOutputSampleBufferDelegate

的执行频率比来自

depthDataOutput

的

AVCaptureDepthDataOutputDelegate

执行得更频繁

可能是什么原因？

Answer 1

我设法解决了它。问题的原因是 CaptureSessionManager 中只有一个队列用于视频输出数据和深度输出数据收集。这导致了意想不到的结果。

我又添加了一个队列

private let captureSessionDepthDataOutputQueue = DispatchQueue(
        label: "captureSessionVideoDataOutput",
        qos: .userInitiated,
        attributes: [],
        autoreleaseFrequency: .workItem
    )

并将其设置在那里

depthDataOutput.setDelegate(
                depthDataOutputDelegate,
                callbackQueue: captureSessionDepthDataOutputQueue
            )

AVCaptureDepthData输出数据的延迟

问题描述投票：0回答：1

1个回答

最新问题

AVCaptureDepthData输出数据的延迟

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1