在 ROS 中对齐来自英特尔实感 D435i 的红外和 RGB 图像的问题

问题描述 投票:0回答:1

我正在开展一个项目,使用 ROS 对齐来自英特尔实感 D435i 摄像头的红外和 RGB 图像。我的目标是订阅 RGB 和红外图像主题并发布对齐的红外图像以匹配 RGB 图像帧。但是,我面临的问题是对齐的图像没有完全重叠。

这是我正在使用的代码:

#!/usr/bin/env python3
import rospy
import message_filters
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge, CvBridgeError
import cv2
import numpy as np
import tf

class ImageAligner:
    def __init__(self):
        self.bridge = CvBridge()
        self.tf_listener = tf.TransformListener()
        
        self.rgb_image_sub = message_filters.Subscriber("/camera/color/image_raw", Image)
        self.infra_image_sub = message_filters.Subscriber("/camera/infra1/image_rect_raw", Image)
        self.rgb_info_sub = message_filters.Subscriber("/camera/color/camera_info", CameraInfo)
        self.infra_info_sub = message_filters.Subscriber("/camera/infra1/camera_info", CameraInfo)
        
        self.ts = message_filters.ApproximateTimeSynchronizer([self.rgb_image_sub, self.infra_image_sub, self.rgb_info_sub, self.infra_info_sub], 10, 0.1)
        self.ts.registerCallback(self.align_image)
        
        self.aligned_infra_pub = rospy.Publisher("/camera/aligned_infra1_to_color/image_raw", Image, queue_size=10)
        self.aligned_infra_info_pub = rospy.Publisher('camera/aligned_infra1_to_color/camera_info', CameraInfo, queue_size=10)
    
    def align_image(self, rgb_image, infra_image, rgb_info, infra_info):
        rospy.loginfo("Callback triggered.")
        try:
            rgb_cv_image = self.bridge.imgmsg_to_cv2(rgb_image, "bgr8")
            infra_cv_image = self.bridge.imgmsg_to_cv2(infra_image, "mono8")
            rospy.loginfo("Converted images to OpenCV format. RGB size: {}, Infra size: {}".format(rgb_cv_image.shape, infra_cv_image.shape))
            
            K_rgb = np.array(rgb_info.K).reshape((3, 3)).astype(np.float32)
            K_infra = np.array(infra_info.K).reshape((3, 3)).astype(np.float32)
            rospy.loginfo("Obtained camera matrices.")
            
            try:
                self.tf_listener.waitForTransform("camera_color_optical_frame", "camera_infra1_optical_frame", rospy.Time(0), rospy.Duration(1.0))
                (trans, rot) = self.tf_listener.lookupTransform("camera_color_optical_frame", "camera_infra1_optical_frame", rospy.Time(0))
                rospy.loginfo("Transformation from infrared to RGB camera obtained.")
            except (tf.Exception, tf.LookupException, tf.ConnectivityException) as e:
                rospy.logerr("Failed to get transformation from infrared to RGB camera: {}".format(e))
                return
            
            rot_matrix = tf.transformations.quaternion_matrix(rot)[:3, :3].astype(np.float32)
            trans_matrix = np.array(trans).reshape((3, 1)).astype(np.float32)
            rospy.loginfo("Rotation matrix: {}".format(rot_matrix))
            rospy.loginfo("Translation matrix: {}".format(trans_matrix))  
            
            n = rot_matrix[:, 2]
            d = np.linalg.norm(n)
            rospy.loginfo("Normal vector: {}".format(n))
            rospy.loginfo("Distance from camera: {}".format(d))
            
            h_matrix = np.dot(K_rgb, np.dot((rot_matrix-trans_matrix*np.transpose(n)/d), np.linalg.inv(K_infra)))
            rospy.loginfo("Computed homography matrix.")
            
            infra_aligned = cv2.warpPerspective(infra_cv_image, h_matrix, (rgb_cv_image.shape[1], rgb_cv_image.shape[0]))
            rospy.loginfo("Warped infrared image to RGB perspective. Size: {}".format(infra_aligned.shape))
            
            if infra_aligned is not None and infra_aligned.size > 0:
                infra_aligned_msg = self.bridge.cv2_to_imgmsg(infra_aligned, "mono8")
                rospy.loginfo("Converted aligned infrared image to ROS format.")
                self.aligned_infra_pub.publish(infra_aligned_msg)
                rospy.loginfo("Published aligned infrared image.")
            else:
                rospy.logwarn("Aligned infrared image is empty. Skipping publish.")
            
            self.aligned_camera_info(infra_info)
                
        except CvBridgeError as e:
            rospy.logerr("CvBridge Error: {0}".format(e))
        except Exception as e:
            rospy.logerr("Error in aligning images: {0}".format(e))
            
    def aligned_camera_info(self, infra_info):
        if infra_info is None:
            return

        aligned_camera_info = infra_info
        aligned_camera_info.header.frame_id = 'camera_color_optical_frame'
        
        self.aligned_infra_info_pub.publish(aligned_camera_info)

if __name__ == '__main__':
    rospy.init_node('image_aligner', anonymous=True)
    ImageAligner()
    try:
        rospy.spin()
    except KeyboardInterrupt:
        rospy.loginfo("Shutting down image aligner node.")

对齐后图像未完全重叠。我怀疑问题可能与单应矩阵计算或相机帧之间的转换有关。

RGB 图像

红外图像(原版)

对准的红外图像

详情:

  • 我对 RGB 图像使用 /camera/color/image_raw 主题,对红外图像使用 /camera/infra1/image_rect_raw 主题。
  • 单应性矩阵是使用相机矩阵以及红外和 RGB 相机之间的变换来计算的。

问题:

  1. 单应性矩阵计算是否有错误?
  2. 是否需要任何额外的步骤来确保图像完全重叠?
  3. 是否有更好的方法使用 ROS 和 OpenCV 来对齐这些图像?

任何解决此问题的指导或建议将不胜感激。

image-processing computer-vision ros homography realsense
1个回答
0
投票

为了对齐 2 个不同相机获取的图像,您需要以下信息:

  • RGB相机内在矩阵(3x3)
  • 红外相机固有矩阵(3x3)
  • 代表 RGB 和 IR 相机之间变换的外在矩阵 (4x4)
  • rgb图像大小

此Python代码将对齐IR图像,返回可与RGB图像重叠的图像:

import numpy as np

def align(
    intrinsics_ir, intrinsics_rgb, extrinsics_rgb2ir, ir_image, rgb_image_shape
) -> np.ndarray:
    width, height = rgb_image_shape
    out = np.zeros((height, width)) / 0
    y, x = np.meshgrid(
        np.arange(ir_image.shape[0]), np.arange(ir_image.shape[1]), indexing="ij"
    )
    x = x.reshape(1, -1)
    y = y.reshape(1, -1)
    z = ir_image.reshape(1, -1)
    x = (x - intrinsics_ir[0, 2]) / intrinsics_ir[0, 0]
    y = (y - intrinsics_ir[1, 2]) / intrinsics_ir[1, 1]
    pts = np.vstack((x * z, y * z, z))
    pts = extrinsics_rgb2ir[:3, :3] @ pts + extrinsics_rgb2ir[:3, 3:]
    pts = intrinsics_rgb @ pts
    px = np.round(pts[0, :] / pts[2, :])
    py = np.round(pts[1, :] / pts[2, :])
    mask = (px >= 0) * (py >= 0) * (px < width) * (py < height)
    out[py[mask].astype(int), px[mask].astype(int)] = pts[2, mask]
    return out
© www.soinside.com 2019 - 2024. All rights reserved.