Open3D 中不支持的图像格式从 RGBD 图像创建点云

问题描述 投票:0回答:1

我正在使用 PIL 加载 RGB 图像,然后将图像处理为 numpy 数组以便从 RGBD 图像创建点云,深度图通过使用 GLPN-Transformer 深度估计模型成功处理。我将深度图和输入图像都处理成 open3.geometry.Image() dtype。但是,我收到以下错误:

    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_3d, depth_3d, convert_rgb_to_intensity=False)
RuntimeError: [Open3D Error] (class std::shared_ptr<class open3d::geometry::RGBDImage> __cdecl open3d::geometry::RGBDImage::CreateFromColorAndDepth(const class open3d::geometry::Image &,const class open3d::geometry::Image &,double,double,bool)) D:\a\Open3D\Open3D\cpp\open3d\geometry\RGBDImageFactory.cpp:21: Unsupported image format.

它说它是一种不受支持的图像格式,但是,

create_from_color_and_depth
需要 o3d.geometry.Image 作为数据类型

完整代码:

import cv2
import torch
import open3d as o3d 
import numpy as np
import matplotlib.pyplot as plt

from transformers import GLPNImageProcessor, GLPNForDepthEstimation
from PIL import Image

def LoadImageCV(input_path):
    input_image = cv2.imread(input_path)
    input_image = cv2.resize(input_image, (1000, 1000), interpolation=cv2.INTER_LINEAR)
    return cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)

def LoadImagePIL(input_path):
    input_image = Image.open(input_path)
    return input_image.resize((1000, 1000))

def DisplayImage(input, colormap):
    plt.imshow(input, cmap=colormap)
    plt.show()

def GenerateDepthMap(input_image):
    # load image processing and depth estimation models
    feature_extractor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
    model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")

    inputs = feature_extractor(images=input_image, return_tensors="pt")

    # get predictions
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # process output into optimal sizes
    padding = 16
    output = predicted_depth.squeeze().cpu().numpy() * 1000.0
    output = output[padding:-padding, padding:-padding]

    return output

def GeneratePointcloud3D(input_image, depth_map):
    # get sizes of input image and convert to numpy array for processing
    width, height = input_image.size
    input_image = np.array(input_image)

    # process depth map
    depth_map = (depth_map * 255 / np.max(depth_map)).astype("uint8")

    depth_3d = o3d.geometry.Image(depth_map)
    image_3d = o3d.geometry.Image(input_image)
    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_3d, depth_3d, convert_rgb_to_intensity=False)

    # camera settings 
    camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
    camera_intrinsic.set_intrinsic(width, height, 500, 500, width / 2, height / 2)

    # generate pointcloud 
    pointcloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)

    return pointcloud

def GenerateMesh3D(pointcloud):
    # outliers removal
    cl, index = pointcloud.remove_statistical_outlier(nb_neighbors=20, std_ratio=20.0)
    pointcloud = pointcloud.select_by_index(index)

    # get normals from pointcloud
    pointcloud.estimate_normals()
    pointcloud.orient_normals_to_align_with_direction()

    # construct mesh
    mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pointcloud, depth=10, n_threads=1)[0]

    # rotate mesh
    rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
    mesh.rotate(rotation, center=(0, 0, 0))

    return mesh

def main():
    # input image path
    INPUT_PATH = "src/Images/sample1.jpg"

    # load input image
    input_image = LoadImagePIL(INPUT_PATH)
    # input_image = LoadImageCV(INPUT_PATH)

    # get depth map
    depth_map = GenerateDepthMap(input_image)
    DisplayImage(depth_map, "plasma")

    # get Pointcloud
    pointcloud = GeneratePointcloud3D(input_image, depth_map)

    # get mesh
    mesh = GenerateMesh3D(pointcloud)

    # display pointcloud and mesh
    o3d.visualizations.draw_geometries([pointcloud])
    o3d.visualizations.draw_geometries([mesh], mesh_show_back_face=True)

    cv2.waitKey(0)

if __name__ == "__main__":
    main()

python image 3d open3d
1个回答
0
投票

主要问题

颜色和深度图像的分辨率不匹配。请参阅 Open3D 代码here.

不匹配的发生是由于模型输出与彩色图像的尺寸不同,而且通过以下方式进行了另一次裁剪:

output[padding:-padding, padding:-padding]
(不确定为什么需要这样做)。

您可以裁剪彩色图像,以便创建具有颜色和深度值的 RGBD 图像,或者保持彩色图像不变,并用 0/无穷大填充深度图像(无论您喜欢哪个,请注意 - 无穷大意味着尽可能远可能)。

裁剪彩色图像:-

# Assuming neural network lead to loss of margin on all sides equally
p = (width - depth_map.shape[0]) // 2  # p for padding
input_image = input_image[p:-p, p:-p]

填充深度图像:-

# Assuming neural network lead to loss of margin on all sides equally
p = padding = (width - depth_map.shape[0]) // 2  # p for padding
depth_map = np.pad(depth_map, [(p, p), (p, p)], mode="constant", constant_values=0)

其他小错误:

  1. 删除
    set_intrinsic
    并使用正确的构造函数。

camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(width, height, 500, 500, width / 2, height / 2) 

  1. 修复

    o3d.visualization
    包名称。
    s
    中没有
    visualization

  2. 确保 numpy 数组在映射到 o3d 图像类型之前是连续的

depth_3d = o3d.geometry.Image(np.ascontiguousarray(depth_map))
  1. 深度图缩放成图像

不明白为什么

depth_map = (depth_map * 255 / np.max(depth_map)).astype("uint8")
完成了。 Open3D 需要实际的深度图,无需任何 uint8 转换。

完整代码

import cv2
import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
from PIL import Image
from transformers import GLPNImageProcessor, GLPNForDepthEstimation


def LoadImageCV(input_path):
    input_image = cv2.imread(input_path)
    input_image = cv2.resize(input_image, (1000, 1000), interpolation=cv2.INTER_LINEAR)
    return cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)


def LoadImagePIL(input_path):
    input_image = Image.open(input_path)
    return input_image.resize((1000, 1000))


def DisplayImage(input, colormap=None):
    plt.imshow(input, cmap=colormap)
    plt.show()


def GenerateDepthMap(input_image):
    # load image processing and depth estimation models
    feature_extractor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
    model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")

    inputs = feature_extractor(images=input_image, return_tensors="pt")

    # get predictions
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # process output into optimal sizes
    padding = 16
    output = predicted_depth.squeeze().cpu().numpy() * 1000.0
    output = output[padding:-padding, padding:-padding]

    return output


def generate_pointcloud_3D(input_image, depth_map):
    # get sizes of input image and convert to numpy array for processing
    width, height = input_image.size
    input_image = np.array(input_image)

    # process depth map
    # Assuming neural network lead to loss of margin on all sides equally
    p = (width - depth_map.shape[0]) // 2  # p for padding
    # depth_map = np.pad(depth_map, [(p, p), (p, p)], mode="constant", constant_values=0)

    input_image = input_image[p:-p, p:-p]

    depth_3d = o3d.geometry.Image(np.ascontiguousarray(depth_map))
    image_3d = o3d.geometry.Image(np.ascontiguousarray(input_image))
    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_3d, depth_3d, convert_rgb_to_intensity=False)

    # camera settings
    camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
        input_image.shape[0], input_image.shape[1], 500, 500, input_image.shape[0] / 2, input_image.shape[1] / 2
    )

    # generate pointcloud
    pointcloud = o3d.geometry.PointCloud.create_from_rgbd_image(
        rgbd_image,
        camera_intrinsic,
    )
    pointcloud.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])

    return pointcloud


def GenerateMesh3D(pointcloud):
    # outliers removal
    cl, index = pointcloud.remove_statistical_outlier(nb_neighbors=20, std_ratio=20.0)
    pointcloud = pointcloud.select_by_index(index)

    # get normals from pointcloud
    pointcloud.estimate_normals()
    pointcloud.orient_normals_to_align_with_direction()

    # construct mesh
    mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pointcloud, depth=10, n_threads=1)[0]

    # rotate mesh
    rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
    mesh.rotate(rotation, center=(0, 0, 0))

    return mesh


def main():
    # input image path
    INPUT_PATH = "data/2023-03-13_14-07-39/image/00000.jpg"

    # load input image
    input_image = LoadImagePIL(INPUT_PATH)
    DisplayImage(input_image.copy(), colormap=None)

    # get depth map
    depth_map = GenerateDepthMap(input_image)
    DisplayImage(depth_map.copy(), colormap="plasma")

    # get Pointcloud
    pointcloud = generate_pointcloud_3D(input_image, depth_map)

    # get mesh
    mesh = GenerateMesh3D(pointcloud)

    # display pointcloud and mesh
    o3d.visualization.draw_geometries([pointcloud])
    o3d.visualization.draw_geometries([mesh], mesh_show_back_face=True)


if __name__ == "__main__":
    main()

结果:-

© www.soinside.com 2019 - 2024. All rights reserved.