我正在使用 PIL 加载 RGB 图像,然后将图像处理为 numpy 数组以便从 RGBD 图像创建点云,深度图通过使用 GLPN-Transformer 深度估计模型成功处理。我将深度图和输入图像都处理成 open3.geometry.Image() dtype。但是,我收到以下错误:
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_3d, depth_3d, convert_rgb_to_intensity=False)
RuntimeError: [Open3D Error] (class std::shared_ptr<class open3d::geometry::RGBDImage> __cdecl open3d::geometry::RGBDImage::CreateFromColorAndDepth(const class open3d::geometry::Image &,const class open3d::geometry::Image &,double,double,bool)) D:\a\Open3D\Open3D\cpp\open3d\geometry\RGBDImageFactory.cpp:21: Unsupported image format.
它说它是一种不受支持的图像格式,但是,
create_from_color_and_depth
需要 o3d.geometry.Image 作为数据类型
完整代码:
import cv2
import torch
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
from transformers import GLPNImageProcessor, GLPNForDepthEstimation
from PIL import Image
def LoadImageCV(input_path):
input_image = cv2.imread(input_path)
input_image = cv2.resize(input_image, (1000, 1000), interpolation=cv2.INTER_LINEAR)
return cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
def LoadImagePIL(input_path):
input_image = Image.open(input_path)
return input_image.resize((1000, 1000))
def DisplayImage(input, colormap):
plt.imshow(input, cmap=colormap)
plt.show()
def GenerateDepthMap(input_image):
# load image processing and depth estimation models
feature_extractor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")
inputs = feature_extractor(images=input_image, return_tensors="pt")
# get predictions
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# process output into optimal sizes
padding = 16
output = predicted_depth.squeeze().cpu().numpy() * 1000.0
output = output[padding:-padding, padding:-padding]
return output
def GeneratePointcloud3D(input_image, depth_map):
# get sizes of input image and convert to numpy array for processing
width, height = input_image.size
input_image = np.array(input_image)
# process depth map
depth_map = (depth_map * 255 / np.max(depth_map)).astype("uint8")
depth_3d = o3d.geometry.Image(depth_map)
image_3d = o3d.geometry.Image(input_image)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_3d, depth_3d, convert_rgb_to_intensity=False)
# camera settings
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsic.set_intrinsic(width, height, 500, 500, width / 2, height / 2)
# generate pointcloud
pointcloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
return pointcloud
def GenerateMesh3D(pointcloud):
# outliers removal
cl, index = pointcloud.remove_statistical_outlier(nb_neighbors=20, std_ratio=20.0)
pointcloud = pointcloud.select_by_index(index)
# get normals from pointcloud
pointcloud.estimate_normals()
pointcloud.orient_normals_to_align_with_direction()
# construct mesh
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pointcloud, depth=10, n_threads=1)[0]
# rotate mesh
rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
mesh.rotate(rotation, center=(0, 0, 0))
return mesh
def main():
# input image path
INPUT_PATH = "src/Images/sample1.jpg"
# load input image
input_image = LoadImagePIL(INPUT_PATH)
# input_image = LoadImageCV(INPUT_PATH)
# get depth map
depth_map = GenerateDepthMap(input_image)
DisplayImage(depth_map, "plasma")
# get Pointcloud
pointcloud = GeneratePointcloud3D(input_image, depth_map)
# get mesh
mesh = GenerateMesh3D(pointcloud)
# display pointcloud and mesh
o3d.visualizations.draw_geometries([pointcloud])
o3d.visualizations.draw_geometries([mesh], mesh_show_back_face=True)
cv2.waitKey(0)
if __name__ == "__main__":
main()
颜色和深度图像的分辨率不匹配。请参阅 Open3D 代码here.
不匹配的发生是由于模型输出与彩色图像的尺寸不同,而且通过以下方式进行了另一次裁剪:
output[padding:-padding, padding:-padding]
(不确定为什么需要这样做)。
您可以裁剪彩色图像,以便创建具有颜色和深度值的 RGBD 图像,或者保持彩色图像不变,并用 0/无穷大填充深度图像(无论您喜欢哪个,请注意 - 无穷大意味着尽可能远可能)。
裁剪彩色图像:-
# Assuming neural network lead to loss of margin on all sides equally
p = (width - depth_map.shape[0]) // 2 # p for padding
input_image = input_image[p:-p, p:-p]
填充深度图像:-
# Assuming neural network lead to loss of margin on all sides equally
p = padding = (width - depth_map.shape[0]) // 2 # p for padding
depth_map = np.pad(depth_map, [(p, p), (p, p)], mode="constant", constant_values=0)
set_intrinsic
并使用正确的构造函数。camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(width, height, 500, 500, width / 2, height / 2)
修复
o3d.visualization
包名称。 s
中没有visualization
。
确保 numpy 数组在映射到 o3d 图像类型之前是连续的。
depth_3d = o3d.geometry.Image(np.ascontiguousarray(depth_map))
不明白为什么
depth_map = (depth_map * 255 / np.max(depth_map)).astype("uint8")
完成了。 Open3D 需要实际的深度图,无需任何 uint8 转换。
import cv2
import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
from PIL import Image
from transformers import GLPNImageProcessor, GLPNForDepthEstimation
def LoadImageCV(input_path):
input_image = cv2.imread(input_path)
input_image = cv2.resize(input_image, (1000, 1000), interpolation=cv2.INTER_LINEAR)
return cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
def LoadImagePIL(input_path):
input_image = Image.open(input_path)
return input_image.resize((1000, 1000))
def DisplayImage(input, colormap=None):
plt.imshow(input, cmap=colormap)
plt.show()
def GenerateDepthMap(input_image):
# load image processing and depth estimation models
feature_extractor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")
inputs = feature_extractor(images=input_image, return_tensors="pt")
# get predictions
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# process output into optimal sizes
padding = 16
output = predicted_depth.squeeze().cpu().numpy() * 1000.0
output = output[padding:-padding, padding:-padding]
return output
def generate_pointcloud_3D(input_image, depth_map):
# get sizes of input image and convert to numpy array for processing
width, height = input_image.size
input_image = np.array(input_image)
# process depth map
# Assuming neural network lead to loss of margin on all sides equally
p = (width - depth_map.shape[0]) // 2 # p for padding
# depth_map = np.pad(depth_map, [(p, p), (p, p)], mode="constant", constant_values=0)
input_image = input_image[p:-p, p:-p]
depth_3d = o3d.geometry.Image(np.ascontiguousarray(depth_map))
image_3d = o3d.geometry.Image(np.ascontiguousarray(input_image))
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_3d, depth_3d, convert_rgb_to_intensity=False)
# camera settings
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
input_image.shape[0], input_image.shape[1], 500, 500, input_image.shape[0] / 2, input_image.shape[1] / 2
)
# generate pointcloud
pointcloud = o3d.geometry.PointCloud.create_from_rgbd_image(
rgbd_image,
camera_intrinsic,
)
pointcloud.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
return pointcloud
def GenerateMesh3D(pointcloud):
# outliers removal
cl, index = pointcloud.remove_statistical_outlier(nb_neighbors=20, std_ratio=20.0)
pointcloud = pointcloud.select_by_index(index)
# get normals from pointcloud
pointcloud.estimate_normals()
pointcloud.orient_normals_to_align_with_direction()
# construct mesh
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pointcloud, depth=10, n_threads=1)[0]
# rotate mesh
rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
mesh.rotate(rotation, center=(0, 0, 0))
return mesh
def main():
# input image path
INPUT_PATH = "data/2023-03-13_14-07-39/image/00000.jpg"
# load input image
input_image = LoadImagePIL(INPUT_PATH)
DisplayImage(input_image.copy(), colormap=None)
# get depth map
depth_map = GenerateDepthMap(input_image)
DisplayImage(depth_map.copy(), colormap="plasma")
# get Pointcloud
pointcloud = generate_pointcloud_3D(input_image, depth_map)
# get mesh
mesh = GenerateMesh3D(pointcloud)
# display pointcloud and mesh
o3d.visualization.draw_geometries([pointcloud])
o3d.visualization.draw_geometries([mesh], mesh_show_back_face=True)
if __name__ == "__main__":
main()
结果:-