逐像素图像处理的性能问题（Pillow）

Question

我使用这种方法将普通图片转换为ascii 艺术。但在不强大的处理器上处理即使是小图像也会杀死它们。是否可以对此进行优化？我尝试用 numpy 做一些东西，但我没有得到任何结果。任何有关优化的帮助将不胜感激，谢谢。这是我失败的尝试(

算法取自这里：https://github.com/Akascape/Ascify-Art

我的代码：

from PIL import Image, ImageDraw, ImageFont
import math

def make_magic_old(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
    # Rounding scale
    scaleFactor = round(scale, 3)
    # Calculate the length of the character list
    charLength = len(list(chars))
    # Calculate the interval for converting a pixel value into a character
    interval = charLength / 256
    # Convert the image to RGB
    photo = photo.convert("RGB")
    # Load font
    fnt = ImageFont.truetype("assets/fonts/FiraCode-Bold.ttf", char_size)
    # Get size of the image
    width, height = photo.size
    # Scaling the image
    photo = photo.resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.Resampling.NEAREST)
    # Getting the sizes in a new way after scaling
    width, height = photo.size
    # Load pixels
    pix = photo.load()
    # Create a new image to display the result
    outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
    # Create a drawing tool
    draw = ImageDraw.Draw(outputImage)
    # Replace pixes to text
    for i in range(height):
        for j in range(width):
            r, g, b = pix[j, i]
            # Calculate the average color value
            h = int(r / 3 + g / 3 + b / 3)
            # Convert pixel colors
            pix[j, i] = (h, h, h)
            # Display a symbol instead of a pixel
            draw.text((j * char_width, i * char_height), chars[math.floor(h * interval)], font=fnt, fill=(r, g, b))

    return outputImage

def main():
    photo = Image.open("test.png")
    result_photo = make_magic(photo)
    result_photo.save("result.jpg")
    print("Done!")
    
if __name__ == "__main__":
    main()

尝试使用 numpy 进行优化：

import numpy as np

def make_magic(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
    # Rounding scale
    scaleFactor = round(scale, 3)
    # Calculate the length of the character list
    charLength = len(chars)
    # Convert the image to RGB and then to numpy array
    photo = np.array(photo.convert("RGB"))
    # Load font
    fnt = ImageFont.truetype("assets/fonts/FiraCode-Bold.ttf", char_size)
    # Get size of the image
    height, width, _ = photo.shape
    # Scaling the image
    photo = np.array(Image.fromarray(photo).resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.NEAREST))
    # Getting the sizes in a new way after scaling
    height, width, _ = photo.shape
    # Convert the image to grayscale
    grayscale_photo = np.mean(photo, axis=2).astype(np.uint8)
    # Calculate indices for character selection
    indices = (grayscale_photo * (charLength / 256)).astype(int)
    # Create a new image to display the result
    outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
    # Create a drawing tool
    draw = ImageDraw.Draw(outputImage)
    # Create character array
    char_array = np.array(list(chars))
    # Replace pixels with text
    for i in range(height):
        for j in range(width):
            draw.text((j * char_width, i * char_height), char_array[indices[i, j]], font=fnt, fill=tuple(photo[i, j]))
    return outputImage

之前的图片：

之后的图片：

Answer 1

原始算法确实没有那么糟糕，因此大量加速将非常具有挑战性。对于非常小的图像输出，这里提出的解决方案大约慢 2 倍，对于非常大的输出，这个解决方案大约快 30 倍。

在我的机器上进行测试，即使使用 SSD，

PIL.Image.open

和

PIL.Image.save

对运行时的影响非常大，尤其是对于较小的文件。这是不可避免的，所以我专注于图像创建组件。

该解决方案的基本概念是预先生成所有字母，然后利用

numpy

和

numba

等基本矩阵运算的惊人协同作用将它们平铺到整体图像中。

字体步骤并不是特别适合编译，所以它被留在了 python 领域。如果需要处理很多文件，可以循环调用子函数，省去重复的字体步骤。

这是我想出的解决方案，带有一些额外的测试样板代码：

numba

给出以下结果：

import time from PIL import Image, ImageDraw, ImageFont import math import numba import numpy as np def make_magic_old(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09): # Rounding scale scaleFactor = round(scale, 3) # Calculate the length of the character list charLength = len(list(chars)) # Calculate the interval for converting a pixel value into a character interval = charLength / 256 # Convert the image to RGB photo = photo.convert("RGB") # Load font fnt = ImageFont.truetype("font.ttf", char_size) # Get size of the image width, height = photo.size # Scaling the image photo = photo.resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.Resampling.NEAREST) # Getting the sizes in a new way after scaling width, height = photo.size # Load pixels pix = photo.load() # Create a new image to display the result outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black") # Create a drawing tool draw = ImageDraw.Draw(outputImage) # Replace pixes to text for i in range(height): for j in range(width): r, g, b = pix[j, i] # Calculate the average color value h = int(r / 3 + g / 3 + b / 3) # Convert pixel colors pix[j, i] = (h, h, h) # Display a symbol instead of a pixel draw.text((j * char_width, i * char_height), chars[math.floor(h * interval)], font=fnt, fill=(r, g, b)) return outputImage def make_magic(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09): # Convert the image to RGB photo = photo.convert("RGB") # Load font fnt = ImageFont.truetype("font.ttf", char_size) # Make character masks to tile into output image char_masks = np.empty((len(chars), char_height, char_width, 3), np.ubyte) for i, char in enumerate(chars): tim = Image.new('RGB', (char_width, char_height), color='black') draw = ImageDraw.Draw(tim) draw.text((0, 0), char, font=fnt, fill=(255, 255, 255)) char_masks[i, :] = np.array(tim) # Call the numpy + numba optimized function new_img_array = _make_magic_sub(np.array(photo), char_masks, char_width, char_height, scale) return Image.fromarray(new_img_array, 'RGB') @numba.njit(cache=True, parallel=True) def _make_magic_sub(photo, char_masks, char_width, char_height, scale): interval = 1 / char_masks.shape[0] new_size = (int(photo.shape[0] * scale * char_width / char_height), int(photo.shape[1] * scale), 3) outimage = np.empty((new_size[0] * char_height, new_size[1] * char_width, 3), np.ubyte) for i in numba.prange(new_size[0]): for j in range(new_size[1]): rgb = photo[int(i / new_size[0] * photo.shape[0]), int(j / new_size[1] * photo.shape[1])] / 255 char_num = int(np.floor(np.sum(rgb) / 3 / interval)) outimage[i * char_height: (i + 1) * char_height, j * char_width: (j + 1) * char_width, :] = char_masks[char_num] * rgb return outimage def _gt(s=0.0): return time.perf_counter() - s def main(): photo = Image.open("test.png") N = 10 for scale in [0.01, 0.05, 0.1, 0.2, 0.5, 1.0]: for fun in [make_magic_old, make_magic]: fun(photo) # To skip any caching / compilation times s = _gt() for i in range(N): result_photo = fun(photo, scale=scale) e = _gt(s) print(f'{fun.__name__:16}{scale:4.2f} : {e / N * 1000:7.1f} ms') print() res_old = make_magic_old(photo, scale=0.2) res_new = make_magic(photo, scale=0.2) res_old.save('result_old.png') res_new.save('result_new.png') if __name__ == "__main__": main()

在 Windows 10、i9-10900K、Python 3.11.4 上测试

您的结果可能会有很大差异，我运行的处理器远非“强大的处理器”，但我认为这会对您在大多数多线程处理器上有所帮助，您可以看到我们在最大输出图像上获得了大约 32 倍的速度提升.

新代码的输出：

与旧代码的输出进行比较：

如果您有任何疑问，请告诉我。

逐像素图像处理的性能问题（Pillow）

问题描述投票：0回答：1

1个回答

最新问题

逐像素图像处理的性能问题（Pillow）

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1