我正在尝试从 URL 自动加载图像,然后将其加载到 numpy 矩阵。为此,我需要使用 requests 和 opencv 库。对于像 uint8 这样编码的标准图像,它以正确的方式工作,并且由于值溢出而损坏了 uint16 图像。这是我现在正在使用的一个简单的最小代码:
import requests
import cv2
import numpy as np
import sys
r = requests.get(sys.argv[1])
img_bin = r.content
image_data = np.asarray(bytearray(img_bin), dtype="uint8")
image = cv2.imdecode(image_data, cv2.IMREAD_UNCHANGED)
# image.dtype -> np.uint8
从本地驱动器加载 uint16 图像工作正常:
import cv2
import sys
png_u16_filepath = sys.argv[1]
png_u8_filepath = sys.argv2[]
image = cv2.imread(png_u16_filepath, cv2.IMREAD_UNCHANGED)
#image.dtype -> np.uint16
image = cv2.imread(png_u8_filepath, cv2.IMREAD_UNCHANGED)
#image.dtype -> np.uint8
但是,我需要从互联网加载图像,然后从内存中加载图像,我无法将它们写入本地图像文件。
经过一番思考,我解析了请求中的字节。
所以我的问题的答案是肯定的,可以从字节确定图像是uint16还是uint8。是的,有一种方法可以转换此类数据以获得具有正确数据类型的正确 np.ndarray 。您只需要阅读 PNG 规范即可。
因此,如果有人想解决类似的问题,以下是如何通过 URL 和请求处理 uint16 PNG 图像:
import struct
import zlib
import numpy as np
color_type_mapping = {
0: 'Grayscale',
2: 'RGB',
3: 'Indexed-color',
4: 'Grayscale with alpha',
6: 'RGBA'
}
color_type_channels = {
0: 1,
2: 3,
3: 1,
4: 2,
6: 4
}
def get_png_size(png_bytes):
# Verify the PNG signature
png_signature = b'\x89PNG\r\n\x1a\n'
if png_bytes[:8] != png_signature:
raise ValueError("Not a valid PNG file")
# Move past the signature
offset = 8
while offset < len(png_bytes):
# Read the length of the chunk data
length = struct.unpack(">I", png_bytes[offset:offset + 4])[0]
offset += 4
# Read the chunk type
chunk_type = png_bytes[offset:offset + 4]
offset += 4
if chunk_type == b'IHDR':
# IHDR chunk found, extract width and height
width = struct.unpack(">I", png_bytes[offset:offset + 4])[0]
height = struct.unpack(">I", png_bytes[offset + 4:offset + 8])[0]
return width, height
# Move to the next chunk (skip data + CRC)
offset += length + 4
raise ValueError("IHDR chunk not found")
def get_png_bit_info(png_bytes, flag):
offsets = {'depth': 8, 'color_type': 9}
# Verify the PNG signature
png_signature = b'\x89PNG\r\n\x1a\n'
if png_bytes[:8] != png_signature:
raise ValueError("Not a valid PNG file")
# Move past the signature
offset = 8
while offset < len(png_bytes):
# Read the length of the chunk data
length = struct.unpack(">I", png_bytes[offset:offset + 4])[0]
offset += 4
# Read the chunk type
chunk_type = png_bytes[offset:offset + 4]
offset += 4
if chunk_type == b'IHDR':
# IHDR chunk found, extract bit depth
bit_depth = png_bytes[offset + offsets[flag]]
return bit_depth
# Move to the next chunk (skip data + CRC)
offset += length + 4
raise ValueError("IHDR chunk not found")
def extract_png_data(png_bytes, bit_depth, color_type, width, height):
""" Returns numpy ndarray """
# Verify the PNG signature
png_signature = b'\x89PNG\r\n\x1a\n'
if png_bytes[:8] != png_signature:
raise ValueError("Not a valid PNG file")
# Move past the signature
offset = 8
idat_data = b''
while offset < len(png_bytes):
# Read the length of the chunk data
length = struct.unpack(">I", png_bytes[offset:offset + 4])[0]
offset += 4
# Read the chunk type
chunk_type = png_bytes[offset:offset + 4]
offset += 4
if chunk_type == b'IDAT':
# IDAT chunk found, append the data
idat_data += png_bytes[offset:offset + length]
# Move to the next chunk (skip data + CRC)
offset += length + 4
if not idat_data:
raise ValueError("IDAT chunk not found")
# Decompress the IDAT data
decompressed_data = zlib.decompress(idat_data)
# Calculate the number of channels based on the color type
channels = color_type_channels[color_type]
# Determine the dtype based on the bit depth
dtype = np.uint8 if bit_depth <= 8 else np.uint16
# Calculate the number of bytes per pixel
bytes_per_pixel = (bit_depth * channels + 7) // 8
# Create a numpy array to hold the image data
image_data = np.zeros((height, width, channels), dtype=dtype)
# Parse the decompressed data into the numpy array
row_bytes = width * bytes_per_pixel
for y in range(height):
# Each row starts with a filter type byte
filter_type = decompressed_data[y * (row_bytes + 1)]
row_data = decompressed_data[y * (row_bytes + 1) + 1: (y + 1) * (row_bytes + 1)]
if bit_depth == 16:
# For 16-bit images, we need to interpret the data as 16-bit integers
row_data = np.frombuffer(row_data, dtype='>u2').astype(np.uint16)
else:
row_data = np.frombuffer(row_data, dtype=dtype)
image_data[y] = row_data.reshape((width, channels))
return image_data
所以用法是:
r = requests.get(URL)
img_bin = r.content
bit_depth = get_png_bit_info(img_bin, 'depth')
color_type = get_png_bit_info(img_bin, 'color_type')
image = extract_png_data(img_bin, bit_depth, color_type, *get_png_size(img_bin))
最终图像是ndarray,通常是RGB或RGBA,具有原始(正确)的dtype。