我在 python 中利用 Wand 来创建一个迭代自身的图像,以便它可以创建填充文本的黑边框框。这是我当前的代码:
import re
from unicodedata import normalize
from docx import Document
from wand.image import Image
from wand.drawing import Drawing
from wand.font import Font
doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
docText.append(para.text)
fullText = "\n".join(docText)
ct = 242
def get(source, begin, end):
try:
start = source.index(len(begin)) + len(begin)
finish = source.index(len(end), len(start))
return source[start:finish]
except ValueError:
return ""
def capitalize(string):
cap = ("".join(j[0].upper() + j[1:]) for j in string)
return cap
def find_matches(text):
return capitalize(
[
m
for m in re.findall(
r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
)
]
)
with Image(width=400, height=1000, pseudo='xc:white') as canvas:
left, top, width, height = 2, 2, 395, 131
for match in find_matches(text=fullText):
ct += 1
with Drawing() as context:
context.fill_color = 'black'
context.rectangle(left=left, top=top, width=width, height=height)
context.fill_color = 'white'
context.rectangle(left=(left+2), top=(top+2), width=(width-4), height=(height-4))
canvas.font = Font('/System/Library/Fonts/timesnewroman.ttf')
context(canvas)
canvas.caption(match + '\n' + str(ct), left=(left+5), top=top, width=(width-10), height=height,
gravity='center')
top += 135
canvas.crop(bottom=top)
canvas.save(filename='patdrawTest.png')
使用此代码,我能够生成以下输出:
问题显然是字体大小缺乏统一性。以下是我的输出应该是什么样的(忽略框中的文本):
我唯一关心的是确保所有文本的字体大小相同(12 号就很好),并且那些边框框会根据文本调整大小。感谢您的帮助!
编辑
根据提供的答案,这是我当前的代码:
import re
from unicodedata import normalize
from docx import Document
from wand.image import Image
from wand.drawing import Drawing
doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
docText.append(para.text)
fullText = "\n".join(docText)
ct = 242
def get(source, begin, end):
try:
start = source.index(len(begin)) + len(begin)
finish = source.index(len(end), len(start))
return source[start:finish]
except ValueError:
return ""
def capitalize(string):
cap = ("".join(j[0].upper() + j[1:]) for j in string)
return cap
def find_matches(text):
return capitalize(
[
m
for m in re.findall(
r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
)
]
)
def to_chunks(words, size):
for idx in range(0, len(words), size):
yield words[idx:idx + size]
def rebuild_text(words, size):
return "\n".join([" ".join(w) for w in to_chunks(words, size)])
target_width = 396
target_height = 0
y_offset = 0
y_padding = 10
x_padding = 5
with Image(width=1000, height=1000, pseudo='xc:white') as img:
for match in find_matches(text=fullText):
ct += 1
with Drawing() as ctx:
ctx.font_size = 16
ctx.text_alignment = 'center'
words = match.split(" ")
words.append("\n" + str(ct))
word_count = len(words)
while True:
temp_text = rebuild_text(words, word_count)
metrics = ctx.get_font_metrics(img, temp_text, multiline=True)
if metrics.text_width > target_width:
word_count -= 1
else:
text = temp_text
target_height = int(metrics.text_height + 0.5)
break
ctx.push()
ctx.fill_color = 'white'
ctx.stroke_width = 4
ctx.stroke_color = 'black'
ctx.rectangle(0, y_offset + y_padding, width=2*x_padding+target_width,
height=2*y_padding+target_height)
ctx.pop()
ctx.text(x_padding + (target_width // 2), 4*y_padding+y_offset, text)
ctx(img)
y_offset = target_height + 4*y_padding
img.trim()
img.save(filename='patdrawdemoTest.png')
从这段代码中,我得到以下输出:
我不太确定如何解决奇怪的间距。第一个和第三个框似乎很棒,但是第一个和第二个框之间有奇怪的空白,两侧的边框不均匀,而且还有两个框应该在那里,如图所示原始帖子。任何帮助将不胜感激!
我唯一关心的是确保所有文本的字体大小相同(12 号就很好),并且那些边框框会根据文本调整大小。感谢您的帮助!
删除
Image.caption
方法(与您想要的相反),并使用 Drawing.text
方法。 Drawing
类还有一个 get_font_metrics
方法来计算最终渲染大小。这允许您设置文本格式,检查它是否有效,然后绘制(或重新格式化并重复)。
举这个粗略的例子...
content = [
"""Donec pretium vulputate sapien nec sagittis aliquam malesuada. Neque aliquam vestibulum morbi blandit cursus risus at ultrices mi.\n111""",
"""Adipiscing elit ut aliquam purus sit amet luctus venenatis. Eget mauris pharetra et ultrices neque ornare aenean. Viverra orci sagittis eu volutpat odio facilisis mauris. Vitae proin sagittis nisl rhoncus mattis rhoncus. Sapien nec sagittis aliquam malesuada bibendum arcu vitae.\n222"""
]
def to_chunks(words, size):
for idx in range(0, len(words), size):
yield words[idx:idx + size]
def rebuild_text(words, size):
return "\n".join([" ".join(w) for w in to_chunks(words, size)])
TARGET_WIDTH = 395
TARGET_HEIGHT = 0
BOX_Y_OFFSET = 0
BOX_Y_PADDING = 10
BOX_X_PADDING = 5
with Image(width=1000, height=1000, background='white') as img:
for text in content:
with Drawing() as ctx:
ctx.font_size = 16
ctx.text_alignment = 'center'
words = text.split(" ")
word_count = len(words)
while True:
tmp_text = rebuild_text(words, word_count)
metrics = ctx.get_font_metrics(img, tmp_text, multiline=True)
if metrics.text_width > TARGET_WIDTH:
word_count -= 1
else:
text = tmp_text
TARGET_HEIGHT = int(metrics.text_height + 0.5)
break
ctx.push()
ctx.fill_color = 'white'
ctx.stroke_width = 4
ctx.stroke_color = 'black'
ctx.rectangle(0, BOX_Y_OFFSET + BOX_Y_PADDING, width=2*BOX_X_PADDING+TARGET_WIDTH, height=2*BOX_Y_PADDING+TARGET_HEIGHT)
ctx.pop()
ctx.text(BOX_X_PADDING + (TARGET_WIDTH // 2), 4*BOX_Y_PADDING+BOX_Y_OFFSET, text)
ctx(img)
BOX_Y_OFFSET = TARGET_HEIGHT + 4*BOX_Y_PADDING
img.trim()
img.save(filename='output.png')
这一切的作用是:
请记住。
我已经能够测试上面的解决方案,虽然它有效,但它会在不同字长的集合上引起问题,因此最好的方法是单独考虑每一行。
我借此机会与社区分享了一个实现,循环问题在这个版本中得到了解决,只是unicode我不知道如何处理,如果有人想解决它,他们'欢迎。
from wand.color import Color
from wand.drawing import Drawing
from wand.image import Image
def get_text_width(context: Drawing, img: Image, text: str) -> float:
return .0 if text == '' else context.get_font_metrics(img, text, multiline=True).text_width
def join_words(words: list[str]) -> str:
return " ".join(words)
def split_text_by_with(context: Drawing, img: Image, text: str, width_max: int):
words = text.split(" ")
word_count = len(words)
start = stop = 0
lines = []
while stop <= word_count:
line_words = words[start:stop]
tmp_text = join_words(line_words)
if get_text_width(context, img, tmp_text) > width_max:
lines.append(join_words(words[start:stop-1]))
start = stop-1
else:
if stop == word_count:
lines.append(join_words(words[start:stop]))
stop += 1
return "\n".join(lines)
content = [
"""Donec pretium vulputate sapien nec sagittis aliquam malesuada. Neque aliquam vestibulum morbi blandit cursus risus at ultrices mi.\n111"""
, """rhoncus mattis rhoncus. Sapien nec sagittis aliquam malesuada bibendum arcu vitae.\n222"""
, """\u1FAB4🪴""" # https://symbl.cc/en/1FAB4/
]
IMG_WIDTH = 1000
IMG_HEIGHT = 2800
STROKE_WIDTH = 4
BOX_Y_PADDING = 10
LIST_MARGIN_BOTTOM = 20
MAX_WIDTH = 800
offset_y = BOX_Y_PADDING
with Image(width=IMG_WIDTH, height=IMG_HEIGHT, background='white') as img:
with Drawing() as context:
for font_size in [16, 21, 32]:
for font_family in ['DejaVu Sans', 'Arial', "Verdana"]:
context.font_size = font_size
context.font_family = font_family
for text in content:
text = split_text_by_with(context, img, text, MAX_WIDTH)
if text != "": # just because "get_font_metrics" get an error for empty string
metrics = context.get_font_metrics(img, text, multiline=True)
# put in middle in the middle of the image
x = int((img.width - metrics.text_width) / 2)
y = offset_y
context.push()
context.text(x, int(y+metrics.ascender+.5), text)
context.pop()
# display rectangle around text for verification
context.push()
context.fill_color = "transparent"
context.stroke_width = STROKE_WIDTH
context.stroke_color = 'grey'
context.rectangle(left=x, top=y, width=metrics.text_width, height=metrics.text_height)
context.pop()
offset_y += int(2*STROKE_WIDTH + metrics.text_height + LIST_MARGIN_BOTTOM + .5)
context(img)
# show that the text never go over MAX_WIDTH
with Drawing() as context:
context.stroke_color = Color('red')
context.stroke_width = 1
x = int((IMG_WIDTH - MAX_WIDTH)/2 + .5)
context.line((x, 0), (x, IMG_HEIGHT))
context.line((x+MAX_WIDTH, 0), (x+MAX_WIDTH, IMG_HEIGHT))
context(img)
img.save(filename='example_text_width.png')
```
[![result][1]][1]
[1]: https://i.sstatic.net/IYQbN28W.png