如何将300度等距柱状全景图像转换为立方体面？

Question

我想使用 Python 中的 OpenCV 将 300 度等距柱状全景图像转换为立方体面。我找到了 360 度图像的代码。如何修改它以处理 300 度图像？

import cv2
import numpy as np


def equirectangular_to_cube(img, cube_size):
    h, w = img.shape[:2]
    #
    # Create cube map faces
    cube_faces = np.zeros((cube_size, cube_size * 6, 3), dtype=np.uint8)
    #
    # Calculate the size of each cube face
    face_size = cube_size
    #
    # Define the mapping coordinates for 360 degrees
    x = np.linspace(-np.pi, np.pi, num=w, dtype=np.float32)
    y = np.linspace(np.pi / 2, -np.pi / 2, num=h, dtype=np.float32)
    #
    # Create grid of coordinates
    xx, yy = np.meshgrid(x, y)
    #
    # Calculate 3D coordinates
    z = np.cos(yy) * np.cos(xx)
    x = np.cos(yy) * np.sin(xx)
    y = np.sin(yy)
    #
    # Normalize coordinates
    norm = np.sqrt(x**2 + y**2 + z**2)
    x /= norm
    y /= norm
    z /= norm
    #
    # Map coordinates to cube faces
    front_mask = (z >= np.abs(x)) & (z >= np.abs(y))
    right_mask = (x >= np.abs(y)) & (x >= np.abs(z))
    back_mask = (z <= -np.abs(x)) & (z <= -np.abs(y))
    left_mask = (x <= -np.abs(y)) & (x <= -np.abs(z))
    top_mask = (y >= np.abs(x)) & (y >= np.abs(z))
    bottom_mask = (y <= -np.abs(x)) & (y <= -np.abs(z))
    #
    # Interpolate and assign pixel values to cube faces
    for i in range(cube_size):
        for j in range(cube_size):
            # Front face
            u = (0.5 + 0.5 * x[front_mask] / z[front_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[front_mask] / z[front_mask]) * (h - 1)
            cube_faces[i, j] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            #
            # Right face
            u = (0.5 + 0.5 * z[right_mask] / x[right_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[right_mask] / x[right_mask]) * (h - 1)
            cube_faces[i, j + cube_size] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            #
            # Back face
            u = (0.5 - 0.5 * x[back_mask] / z[back_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[back_mask] / z[back_mask]) * (h - 1)
            cube_faces[i, j + cube_size*2] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            #
            # Left face
            u = (0.5 - 0.5 * z[left_mask] / x[left_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[left_mask] / x[left_mask]) * (h - 1)
            cube_faces[i, j + cube_size*3] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            #
            # Top face
            u = (0.5 + 0.5 * x[top_mask] / y[top_mask]) * (w - 1)
            v = (0.5 - 0.5 * z[top_mask] / y[top_mask]) * (h - 1)
            cube_faces[i, j + cube_size*4] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            #
            # Bottom face
            u = (0.5 + 0.5 * x[bottom_mask] / y[bottom_mask]) * (w - 1)
            v = (0.5 + 0.5 * z[bottom_mask] / y[bottom_mask]) * (h - 1)
            cube_faces[i, j + cube_size*5] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
    #
    return cube_faces


# Usage example
image_path = 'path/to/300_degree_image.jpg'
cube_size = 512
img = cv2.imread(image_path)
cube_faces = equirectangular_to_cube(img, cube_size)

# Save the cube faces as separate images
front = cube_faces[:, :cube_size]
right = cube_faces[:, cube_size:cube_size*2]
back = cube_faces[:, cube_size*2:cube_size*3]
left = cube_faces[:, cube_size*3:cube_size*4]
top = cube_faces[:, cube_size*4:cube_size*5]
bottom = cube_faces[:, cube_size*5:]

cv2.imwrite("front.jpg", front)
cv2.imwrite("right.jpg", right)
cv2.imwrite("back.jpg", back)
cv2.imwrite("left.jpg", left)
cv2.imwrite("top.jpg", top)
cv2.imwrite("bottom.jpg", bottom)

它使用

np.linspace(-np.pi, np.pi, num=w, dtype=np.float32)

定义 360 度图像的映射坐标。如何修改它以适应视野的缩小？

图片：

我尝试过的：

# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    #
    extendedSize = [0,0]
    extendedSize[0] = int(360/300 * inSize[0])
    extendedSize[1] = inSize[1]
    edge = int(extendedSize[0]/4)   # theoretical length of each edge
    #
    for i in range(outSize[0]):
        face = int(i/edge) # 0 - back, 1 - left 2 - front, 3 - right
        if face==2:
            rng = range(0,edge*3)
        else:
            rng = range(edge,edge*2)
        #
        for j in rng:
            if j<edge:
                face2 = 4 # top
            elif j>=2*edge:
                face2 = 5 # bottom
            else:
                face2 = face
            #
            (x,y,z) = outImgToXYZ(i,j,face2,edge)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2
            # source img coords
            uf = ( 2.0*edge*(theta + pi)/pi )
            vf = ( 2.0*edge * (pi/2 - phi)/pi)

            if uf < inSize[0]:
                # Use bilinear interpolation between the four surrounding pixels
                ui = floor(uf)  # coord of pixel to bottom left
                vi = floor(vf)
                u2 = clip(ui+1, 0, inSize[0]-1) # Clip u2 to stay within the valid range
                v2 = clip(vi+1, 0, inSize[1]-1) # Clip v2 to stay within the valid range
                mu = uf-ui      # fraction of way across pixel
                nu = vf-vi
                # Pixel values of four corners
                A = inPix[ui,vi]
                B = inPix[u2,vi]
                C = inPix[ui,v2]
                D = inPix[u2,v2]
                # interpolate
                (r,g,b) = (
                    A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
                    A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
                    A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )
            else:
                (r,g,b) = (0,0,0)
            #
            outPix[i,j] = (int(round(r)),int(round(g)),int(round(b)))

这就是变化：

extendedSize = [0,0]
extendedSize[0] = int(360/300 * inSize[0])
extendedSize[1] = inSize[1]
edge = int(extendedSize[0]/4)   # theoretical length of each edge

更新：

我已经部分工作了。任何反馈都会非常有帮助。

我在下面添加了更新的代码


# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    extendedSize = imgIn.size
    extendedSize = (int(360/300 * inSize[0]), inSize[1])
    edge = extendedSize[0]/4   # theoretical length of edge
    # edge = inSize[0]/4   # the length of each edge in pixels
    for i in range(outSize[0]):
        # print(i)
        face = int(i/edge) # 0 - back, 1 - left 2 - front, 3 - right
        if face==2:
            rng = range(0, int(edge*3))
        else:
            rng = range(int(edge), int(edge*2))
        # 
        for j in rng:
            if j<edge:
                face2 = 4 # top
            elif j>=2*edge:
                face2 = 5 # bottom
            else:
                face2 = face
            # 
            (x,y,z) = outImgToXYZ(i,j,face2,edge)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2
            # source img coords
            uf = ( 2.0*edge*(theta + pi)/pi )
            vf = ( 2.0*edge * (pi/2 - phi)/pi)
            # 
            if uf < inSize[0] :
                # Use bilinear interpolation between the four surrounding pixels
                ui = floor(uf)  # coord of pixel to bottom left
                vi = floor(vf)
                u2 = ui+1       # coords of pixel to top right
                v2 = vi+1
                mu = uf-ui      # fraction of way across pixel
                nu = vf-vi
                # 
                # Clip coordinates to stay within the valid range
                ui = max(0, min(ui, extendedSize[0]-1))
                u2 = max(0, min(u2, extendedSize[0]-1))
                vi = max(0, min(vi, extendedSize[1]-1))
                v2 = max(0, min(v2, extendedSize[1]-1))
                # # 
                # # Pixel values of four corners
                A = inPix[ui % inSize[0],clip(vi,0,inSize[1]-1)]
                B = inPix[u2 % inSize[0],clip(vi,0,inSize[1]-1)]
                C = inPix[ui % inSize[0],clip(v2,0,inSize[1]-1)]
                D = inPix[u2 % inSize[0],clip(v2,0,inSize[1]-1)]
                # interpolate
                (r,g,b) = (
                    A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
                    A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
                    A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )
                # 
                # (r,g,b) = (0,0,0) 
            else:
                (r,g,b) = (0,0,0)
            # 
            # Clip the coordinates to stay within the output image dimensions
            i_clipped = max(0, min(i, outSize[0]-1))
            j_clipped = max(0, min(j, outSize[1]-1))
            outPix[i_clipped,j_clipped] = (int(round(r)),int(round(g)),int(round(b)))

Answer 1

我认为问题来自于 edge 变量。它应该根据 outSize 而不是 inSize 计算。我会用

edge = outSize[0]/4   # theoretical length of edge

一旦解决了这个问题，几行后出现的源坐标uf和vf就不能再依赖于edge变量，必须更改为：

# source img coords
uf = extendedSize[0] * (theta + pi) / (2 * pi)
vf = extendedSize[1] * (pi / 2 - phi) / pi

所以完整的代码就变成了

# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    extendedSize = imgIn.size
    extendedSize = (int(360/300 * inSize[0]), inSize[1])
    edge = outSize[0]/4   # theoretical length of edge
    # edge = inSize[0]/4   # the length of each edge in pixels
    for i in range(outSize[0]):
        # print(i)
        face = int(i/edge) # 0 - back, 1 - left 2 - front, 3 - right
        if face==2:
            rng = range(0, int(edge*3))
        else:
            rng = range(int(edge), int(edge*2))
        # 
        for j in rng:
            if j<edge:
                face2 = 4 # top
            elif j>=2*edge:
                face2 = 5 # bottom
            else:
                face2 = face
            # 
            (x,y,z) = outImgToXYZ(i,j,face2,edge)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2
            # source img coords
            uf = extendedSize[0] * (theta + pi) / (2 * pi)
            vf = extendedSize[1] * (pi / 2 - phi) / pi
            # 
            if uf < inSize[0] :
                # Use bilinear interpolation between the four surrounding pixels
                ui = floor(uf)  # coord of pixel to bottom left
                vi = floor(vf)
                u2 = ui+1       # coords of pixel to top right
                v2 = vi+1
                mu = uf-ui      # fraction of way across pixel
                nu = vf-vi
                # 
                # Clip coordinates to stay within the valid range
                ui = max(0, min(ui, extendedSize[0]-1))
                u2 = max(0, min(u2, extendedSize[0]-1))
                vi = max(0, min(vi, extendedSize[1]-1))
                v2 = max(0, min(v2, extendedSize[1]-1))
                # # 
                # # Pixel values of four corners
                A = inPix[ui % inSize[0],clip(vi,0,inSize[1]-1)]
                B = inPix[u2 % inSize[0],clip(vi,0,inSize[1]-1)]
                C = inPix[ui % inSize[0],clip(v2,0,inSize[1]-1)]
                D = inPix[u2 % inSize[0],clip(v2,0,inSize[1]-1)]
                # interpolate
                (r,g,b) = (
                    A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
                    A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
                    A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )
                # 
                # (r,g,b) = (0,0,0) 
            else:
                (r,g,b) = (0,0,0)
            # 
            # Clip the coordinates to stay within the output image dimensions
            i_clipped = max(0, min(i, outSize[0]-1))
            j_clipped = max(0, min(j, outSize[1]-1))
            outPix[i_clipped,j_clipped] = (int(round(r)),int(round(g)),int(round(b)))

如何将300度等距柱状全景图像转换为立方体面？

问题描述投票：0回答：1

1个回答

最新问题

如何将300度等距柱状全景图像转换为立方体面？

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1