我有 ViewController,它实现了 AVCaptureVideoDataOutputSampleBufferDelegate 和 AVCaptureDepthDataOutputDelegate 协议。我想收集视频和深度数据。视频数据用于执行 Vision ML 请求,深度用于计算从相机到相机上特定点的距离。
extension MainRecognizerViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
DispatchQueue.main.async {
self.captureSessionManager.manageFlashlight(for: sampleBuffer, force: nil)
}
guard let cvPixelBuffer = sampleBuffer.convertToPixelBuffer() else {
return
}
let exifOrientation = exifOrientationFromDeviceOrientation()
let handler = VNImageRequestHandler(cvPixelBuffer: cvPixelBuffer,
orientation: exifOrientation)
let objectsRecognitionRequest = prepareVisionRequestForObjectsRecognition(
pixelBuffer: cvPixelBuffer
)
DispatchQueue.global().async {
try? handler.perform([objectsRecognitionRequest])
try? handler.perform(self.roadLightsRecognizerRequests)
try? handler.perform(self.pedestrianCrossingRecognizerRequests)
}
}
}
和
extension MainRecognizerViewController: AVCaptureDepthDataOutputDelegate {
func depthDataOutput(_ output: AVCaptureDepthDataOutput,
didOutput depthData: AVDepthData,
timestamp: CMTime,
connection: AVCaptureConnection) {
if depthMeasurementsLeftInLoop == 0 {
depthMeasurementsCumul = 0.0
depthMeasurementMin = 9999.9
depthMeasurementMax = 0.0
depthMeasurementsLeftInLoop = depthMeasurementRepeats
}
if depthMeasurementsLeftInLoop > 0 {
var convertedDepthData: AVDepthData = depthData.converting(
toDepthDataType: kCVPixelFormatType_DepthFloat16
)
let depthFrame = convertedDepthData.depthDataMap
let depthPoint = CGPoint(x: CGFloat(CVPixelBufferGetWidth(depthFrame)) / 2,
y: CGFloat(CVPixelBufferGetHeight(depthFrame) / 2))
let depthVal = getDepthValueFromFrame(fromFrame: depthFrame,
atPoint: depthPoint)
print(depthVal)
let measurement = depthVal * 100
depthMeasurementsCumul += measurement
if measurement > depthMeasurementMax {
depthMeasurementMax = measurement
}
if measurement < depthMeasurementMin {
depthMeasurementMin = measurement
}
depthMeasurementsLeftInLoop -= 1
// let printStr = String(format: "Measurement %d: %.2f cm",
// depthMeasurementRepeats - depthMeasurementsLeftInLoop, measurement)
DispatchQueue.main.async { [weak self] in
self?.distanceMeasurerViewModel?.distanceString = String(format: "%.2f", measurement)
}
}
}
}
我在 CaptureSessionManager 结构中执行整个相机设置:
import AVFoundation
final class CaptureSessionManager: CaptureSessionManaging {
@Inject private var flashlightManager: FlashlightManaging
private let captureSessionQueue = DispatchQueue(label: "captureSessionQueue")
private let captureSessionDataOutputQueue = DispatchQueue(
label: "captureSessionVideoDataOutput",
qos: .userInitiated,
attributes: [],
autoreleaseFrequency: .workItem
)
private var sampleBufferOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
private var sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate?
private var depthDataOutput: AVCaptureDepthDataOutput = AVCaptureDepthDataOutput()
private var depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate?
var cameraMode: CameraMode?
private var desiredFrameRate: Double?
private var videoDevice: AVCaptureDevice? = AVCaptureDevice.default(
.builtInLiDARDepthCamera,
for: .video,
position: .back
)
var bufferSize: CGSize = .zero
var captureSession: AVCaptureSession!
func setUp(with sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate,
and depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate,
for cameraMode: CameraMode,
cameraPosition: AVCaptureDevice.Position,
desiredFrameRate: Double,
completion: @escaping () -> ()) {
stopCaptureSession()
self.sampleBufferDelegate = sampleBufferDelegate
self.depthDataOutputDelegate = depthDataOutputDelegate
self.cameraMode = cameraMode
self.desiredFrameRate = desiredFrameRate
authorizeCaptureSession {
completion()
}
}
func manageFlashlight(for sampleBuffer: CMSampleBuffer?,
force torchMode: AVCaptureDevice.TorchMode?) {
flashlightManager.manageFlashlight(for: sampleBuffer,
and: self.videoDevice,
force: torchMode)
}
private func authorizeCaptureSession(completion: @escaping () -> ()) {
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .authorized:
setupCaptureSession {
completion()
}
case .notDetermined:
AVCaptureDevice.requestAccess(for: .video) { [weak self] granted in
if granted {
self?.setupCaptureSession {
completion()
}
}
}
default:
return
}
}
private func setupCaptureSession(completion: @escaping () -> ()) {
captureSessionQueue.async { [unowned self] in
var captureSession: AVCaptureSession = AVCaptureSession()
captureSession.beginConfiguration()
guard let videoDevice = videoDevice else {
return
}
do {
let captureDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
guard captureSession.canAddInput(captureDeviceInput) else {
return
}
captureSession.addInput(captureDeviceInput)
} catch {
return
}
let sessionPreset: SessionPreset = .hd1280x720
guard let videoSetupedCaptureSession: AVCaptureSession = setupCaptureSessionForVideo(
captureSession: captureSession,
sessionPreset: sessionPreset
) else {
return
}
guard let depthAndVideoSetupedCaptureSession = setupCaptureSessionForDepth(
captureSession: videoSetupedCaptureSession
) else {
return
}
captureSession = depthAndVideoSetupedCaptureSession
captureSession.sessionPreset = sessionPreset.preset
captureSession.commitConfiguration()
self.captureSession = captureSession
self.startCaptureSession()
completion()
}
}
private func setupCaptureSessionForVideo(captureSession: AVCaptureSession,
sessionPreset: SessionPreset) -> AVCaptureSession? {
let captureSessionVideoOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
captureSessionVideoOutput.videoSettings = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(
value: kCMPixelFormat_32BGRA
)
]
captureSessionVideoOutput.alwaysDiscardsLateVideoFrames = true
captureSessionVideoOutput.setSampleBufferDelegate(
self.sampleBufferDelegate,
queue: captureSessionDataOutputQueue
)
guard let videoDevice = videoDevice else {
return nil
}
var formatToSet: AVCaptureDevice.Format = videoDevice.formats[0]
guard let desiredFrameRate = desiredFrameRate else {
return nil
}
for format in videoDevice.formats.reversed() {
let ranges = format.videoSupportedFrameRateRanges
let frameRates = ranges[0]
if desiredFrameRate <= frameRates.maxFrameRate,
format.formatDescription.dimensions.width == sessionPreset.formatWidth,
format.formatDescription.dimensions.height == sessionPreset.formatHeight {
formatToSet = format
break
}
}
do {
try videoDevice.lockForConfiguration()
if videoDevice.hasTorch {
self.manageFlashlight(for: nil, force: .auto)
}
let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
bufferSize.width = CGFloat(dimensions.width)
bufferSize.height = CGFloat(dimensions.height)
videoDevice.activeFormat = formatToSet
let timescale = CMTimeScale(desiredFrameRate)
if videoDevice.activeFormat.videoSupportedFrameRateRanges[0].maxFrameRate >= desiredFrameRate {
videoDevice.activeVideoMinFrameDuration = CMTime(value: 1, timescale: timescale)
videoDevice.activeVideoMaxFrameDuration = CMTime(value: 1, timescale: timescale)
}
videoDevice.unlockForConfiguration()
} catch {
return nil
}
guard captureSession.canAddOutput(captureSessionVideoOutput) else {
return nil
}
let captureConnection = captureSessionVideoOutput.connection(with: .video)
captureConnection?.isEnabled = true
captureSession.addOutput(captureSessionVideoOutput)
if let cameraMode = self.cameraMode,
CameraMode.modesWithPortraitVideoConnection.contains(cameraMode) {
captureSessionVideoOutput.connection(with: .video)?.videoOrientation = .portrait
}
return captureSession
}
private func setupCaptureSessionForDepth(captureSession: AVCaptureSession) -> AVCaptureSession? {
guard let depthDataOutputDelegate = depthDataOutputDelegate else {
return nil
}
if captureSession.canAddOutput(depthDataOutput) {
captureSession.addOutput(depthDataOutput)
depthDataOutput.isFilteringEnabled = false
} else {
return nil
}
if let connection = depthDataOutput.connection(with: .depthData) {
connection.isEnabled = true
depthDataOutput.isFilteringEnabled = false
depthDataOutput.setDelegate(
depthDataOutputDelegate,
callbackQueue: captureSessionDataOutputQueue
)
} else {
return nil
}
guard let videoDevice = videoDevice else {
return nil
}
let availableFormats = videoDevice.activeFormat.supportedDepthDataFormats
let availableHdepFormats = availableFormats.filter { f in
CMFormatDescriptionGetMediaSubType(f.formatDescription) == kCVPixelFormatType_DepthFloat16
}
let selectedFormat = availableHdepFormats.max(by: {
lower, higher in CMVideoFormatDescriptionGetDimensions(lower.formatDescription).width < CMVideoFormatDescriptionGetDimensions(higher.formatDescription).width
})
do {
try videoDevice.lockForConfiguration()
videoDevice.activeDepthDataFormat = selectedFormat
videoDevice.unlockForConfiguration()
} catch {
return nil
}
return captureSession
}
func startCaptureSession() {
self.captureSession?.startRunning()
}
func stopCaptureSession() {
self.captureSession?.stopRunning()
}
}
问题是我接收深度数据的更新太慢 - 来自
captureOutput
的函数 AVCaptureVideoDataOutputSampleBufferDelegate
的执行频率比来自 depthDataOutput
的
AVCaptureDepthDataOutputDelegate
执行得更频繁
可能是什么原因?
我设法解决了它。问题的原因是 CaptureSessionManager 中只有一个队列用于视频输出数据和深度输出数据收集。这导致了意想不到的结果。
我又添加了一个队列
private let captureSessionDepthDataOutputQueue = DispatchQueue(
label: "captureSessionVideoDataOutput",
qos: .userInitiated,
attributes: [],
autoreleaseFrequency: .workItem
)
并将其设置在那里
depthDataOutput.setDelegate(
depthDataOutputDelegate,
callbackQueue: captureSessionDepthDataOutputQueue
)