我使用这种方法将普通图片转换为ascii 艺术。但在不强大的处理器上处理即使是小图像也会杀死它们。是否可以对此进行优化?我尝试用 numpy 做一些东西,但我没有得到任何结果。任何有关优化的帮助将不胜感激,谢谢。这是我失败的尝试(
算法取自这里:https://github.com/Akascape/Ascify-Art
我的代码:
from PIL import Image, ImageDraw, ImageFont
import math
def make_magic_old(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Rounding scale
scaleFactor = round(scale, 3)
# Calculate the length of the character list
charLength = len(list(chars))
# Calculate the interval for converting a pixel value into a character
interval = charLength / 256
# Convert the image to RGB
photo = photo.convert("RGB")
# Load font
fnt = ImageFont.truetype("assets/fonts/FiraCode-Bold.ttf", char_size)
# Get size of the image
width, height = photo.size
# Scaling the image
photo = photo.resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.Resampling.NEAREST)
# Getting the sizes in a new way after scaling
width, height = photo.size
# Load pixels
pix = photo.load()
# Create a new image to display the result
outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
# Create a drawing tool
draw = ImageDraw.Draw(outputImage)
# Replace pixes to text
for i in range(height):
for j in range(width):
r, g, b = pix[j, i]
# Calculate the average color value
h = int(r / 3 + g / 3 + b / 3)
# Convert pixel colors
pix[j, i] = (h, h, h)
# Display a symbol instead of a pixel
draw.text((j * char_width, i * char_height), chars[math.floor(h * interval)], font=fnt, fill=(r, g, b))
return outputImage
def main():
photo = Image.open("test.png")
result_photo = make_magic(photo)
result_photo.save("result.jpg")
print("Done!")
if __name__ == "__main__":
main()
尝试使用 numpy 进行优化:
import numpy as np
def make_magic(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Rounding scale
scaleFactor = round(scale, 3)
# Calculate the length of the character list
charLength = len(chars)
# Convert the image to RGB and then to numpy array
photo = np.array(photo.convert("RGB"))
# Load font
fnt = ImageFont.truetype("assets/fonts/FiraCode-Bold.ttf", char_size)
# Get size of the image
height, width, _ = photo.shape
# Scaling the image
photo = np.array(Image.fromarray(photo).resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.NEAREST))
# Getting the sizes in a new way after scaling
height, width, _ = photo.shape
# Convert the image to grayscale
grayscale_photo = np.mean(photo, axis=2).astype(np.uint8)
# Calculate indices for character selection
indices = (grayscale_photo * (charLength / 256)).astype(int)
# Create a new image to display the result
outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
# Create a drawing tool
draw = ImageDraw.Draw(outputImage)
# Create character array
char_array = np.array(list(chars))
# Replace pixels with text
for i in range(height):
for j in range(width):
draw.text((j * char_width, i * char_height), char_array[indices[i, j]], font=fnt, fill=tuple(photo[i, j]))
return outputImage
原始算法确实没有那么糟糕,因此大量加速将非常具有挑战性。对于非常小的图像输出,这里提出的解决方案大约慢 2 倍,对于非常大的输出,这个解决方案大约快 30 倍。
在我的机器上进行测试,即使使用 SSD,
PIL.Image.open
和 PIL.Image.save
对运行时的影响非常大,尤其是对于较小的文件。这是不可避免的,所以我专注于图像创建组件。
该解决方案的基本概念是预先生成所有字母,然后利用
numpy
和 numba
等基本矩阵运算的惊人协同作用将它们平铺到整体图像中。
字体步骤并不是特别适合编译,所以它被留在了 python 领域。如果需要处理很多文件,可以循环调用子函数,省去重复的字体步骤。
这是我想出的解决方案,带有一些额外的测试样板代码:numba
给出以下结果:
import time
from PIL import Image, ImageDraw, ImageFont
import math
import numba
import numpy as np
def make_magic_old(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Rounding scale
scaleFactor = round(scale, 3)
# Calculate the length of the character list
charLength = len(list(chars))
# Calculate the interval for converting a pixel value into a character
interval = charLength / 256
# Convert the image to RGB
photo = photo.convert("RGB")
# Load font
fnt = ImageFont.truetype("font.ttf", char_size)
# Get size of the image
width, height = photo.size
# Scaling the image
photo = photo.resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))),
Image.Resampling.NEAREST)
# Getting the sizes in a new way after scaling
width, height = photo.size
# Load pixels
pix = photo.load()
# Create a new image to display the result
outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
# Create a drawing tool
draw = ImageDraw.Draw(outputImage)
# Replace pixes to text
for i in range(height):
for j in range(width):
r, g, b = pix[j, i]
# Calculate the average color value
h = int(r / 3 + g / 3 + b / 3)
# Convert pixel colors
pix[j, i] = (h, h, h)
# Display a symbol instead of a pixel
draw.text((j * char_width, i * char_height), chars[math.floor(h * interval)], font=fnt, fill=(r, g, b))
return outputImage
def make_magic(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Convert the image to RGB
photo = photo.convert("RGB")
# Load font
fnt = ImageFont.truetype("font.ttf", char_size)
# Make character masks to tile into output image
char_masks = np.empty((len(chars), char_height, char_width, 3), np.ubyte)
for i, char in enumerate(chars):
tim = Image.new('RGB', (char_width, char_height), color='black')
draw = ImageDraw.Draw(tim)
draw.text((0, 0), char, font=fnt, fill=(255, 255, 255))
char_masks[i, :] = np.array(tim)
# Call the numpy + numba optimized function
new_img_array = _make_magic_sub(np.array(photo), char_masks, char_width, char_height, scale)
return Image.fromarray(new_img_array, 'RGB')
@numba.njit(cache=True, parallel=True)
def _make_magic_sub(photo, char_masks, char_width, char_height, scale):
interval = 1 / char_masks.shape[0]
new_size = (int(photo.shape[0] * scale * char_width / char_height), int(photo.shape[1] * scale), 3)
outimage = np.empty((new_size[0] * char_height, new_size[1] * char_width, 3), np.ubyte)
for i in numba.prange(new_size[0]):
for j in range(new_size[1]):
rgb = photo[int(i / new_size[0] * photo.shape[0]), int(j / new_size[1] * photo.shape[1])] / 255
char_num = int(np.floor(np.sum(rgb) / 3 / interval))
outimage[i * char_height: (i + 1) * char_height, j * char_width: (j + 1) * char_width, :] = char_masks[char_num] * rgb
return outimage
def _gt(s=0.0):
return time.perf_counter() - s
def main():
photo = Image.open("test.png")
N = 10
for scale in [0.01, 0.05, 0.1, 0.2, 0.5, 1.0]:
for fun in [make_magic_old, make_magic]:
fun(photo) # To skip any caching / compilation times
s = _gt()
for i in range(N):
result_photo = fun(photo, scale=scale)
e = _gt(s)
print(f'{fun.__name__:16}{scale:4.2f} : {e / N * 1000:7.1f} ms')
print()
res_old = make_magic_old(photo, scale=0.2)
res_new = make_magic(photo, scale=0.2)
res_old.save('result_old.png')
res_new.save('result_new.png')
if __name__ == "__main__":
main()
在 Windows 10、i9-10900K、Python 3.11.4 上测试
您的结果可能会有很大差异,我运行的处理器远非“强大的处理器”,但我认为这会对您在大多数多线程处理器上有所帮助,您可以看到我们在最大输出图像上获得了大约 32 倍的速度提升.
新代码的输出: