我有一个Python脚本,旨在解析
docx Doc
以用带有超链接的文本替换代码(可能是无意中写错的)。我已经从文档中找到的字符串中获得了原始值和清理后的值:
extracted_tv = [{'raw': 'TV-FRM-05496', 'cleaned': 'TV-FRM-05496'}, {'raw': 'TV-SOP-25663', 'cleaned': 'TV-SOP-25663'}, {'raw': 'TV--REF----66655', 'cleaned': 'TV-REF-66655'}, {'raw': 'TV-FRM- 054912', 'cleaned': 'TV-FRM-054912'}, {'raw': 'TVXFRM01496', 'cleaned': 'TV-XFRM-01496'}, {'raw': 'TV WI 05496', 'cleaned': 'TV-WI-05496'}]
该 docx 包含以下内容: 电视-FRM-05496 ASDASTV-SOP-25663aS 电视--REF----66655 电视-FRM-054912 电视-XFRM-01496 电视-WI-05496
但是,当执行下面的脚本时,上面除了“TV--REF----66655”之外的所有字符串都被超链接正确替换。
我尝试删除 lower() 函数并尝试连接运行。有什么想法我可能做错了什么或有任何指示吗?我在下面提供了我的尝试。
import docx
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.enum.dml import MSO_THEME_COLOR_INDEX
import docx
def add_hyperlink(document, run, url, name):
"""
Add a hyperlink to a run while preserving the original font size and style,
but applying the hyperlink color and no underline.
:param document: The Document being edited.
:param run: The Run the hyperlink is being added to.
:param url: The url to be added to the link.
:param name: The text for the link to be displayed in the paragraph
:return: None
"""
# Preserve original font size and style
font = run.font
original_size = font.size
original_bold = font.bold
original_italic = font.italic
original_name = font.name
part = document.part
rId = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
# Create the hyperlink element
hyperlink = OxmlElement('w:hyperlink')
hyperlink.set(qn('r:id'), rId)
hyperlink.set(qn('w:history'), '1')
# Create a new run element
new_run = OxmlElement('w:r')
rPr = OxmlElement('w:rPr')
# Set the hyperlink style
rStyle = OxmlElement('w:rStyle')
rStyle.set(qn('w:val'), 'Hyperlink')
rPr.append(rStyle)
# Apply the original font properties to the hyperlink
if original_size:
size = OxmlElement('w:sz')
size.set(qn('w:val'), str(int(original_size.pt * 2))) # Font size in half-points
rPr.append(size)
if original_bold:
bold = OxmlElement('w:b')
bold.set(qn('w:val'), 'true')
rPr.append(bold)
if original_italic:
italic = OxmlElement('w:i')
italic.set(qn('w:val'), 'true')
rPr.append(italic)
if original_name:
rFonts = OxmlElement('w:rFonts')
rFonts.set(qn('w:ascii'), original_name)
rFonts.set(qn('w:hAnsi'), original_name)
rPr.append(rFonts)
# Apply the hyperlink color and remove underline
color = OxmlElement('w:color')
color.set(qn('w:val'), "0000FF") # Blue color
rPr.append(color)
u = OxmlElement('w:u')
u.set(qn('w:val'), 'none') # Remove underline
rPr.append(u)
new_run.append(rPr)
# Set the text for the new run
new_run_text = OxmlElement('w:t')
new_run_text.text = name
new_run.append(new_run_text)
# Append the new run to the hyperlink element
hyperlink.append(new_run)
# Insert the hyperlink before the original run and clear the original run
run._r.addprevious(hyperlink)
run.clear()
return None
# Example usage
intermediate_docx_file = "intermediate_file.docx"
doc = docx.Document(intermediate_docx_file)
# Process paragraphs
for paragraph in doc.paragraphs:
for run in paragraph.runs:
for tv_item in extracted_tv:
raw_tv_number = tv_item['raw']
cleaned_tv_number = tv_item['cleaned']
if raw_tv_number.lower() in run.text.lower():
hyperlink_url = f'https://hyperlink.com/DocNum={cleaned_tv_number}'
add_hyperlink(doc, run, hyperlink_url, cleaned_tv_number)
run.text = run.text.replace(cleaned_tv_number, '') # Remove the TV number from the run
output_docx_file = 'document_with_hyperlinks.docx'
doc.save(output_docx_file)
如果您愿意切换到商业库,您可以考虑使用 Aspose.Words 用超链接替换文档中的文本。我们的想法是用占位符本身替换占位符,使其表示为单个
Run
,然后找到 Run
并插入超链接。这是演示该技术的简单代码:
doc = aw.Document("C:\\Temp\\in.docx")
builder = aw.DocumentBuilder(doc)
# This is placeholder we would like to replace with hyperlink.
word = "test"
hyperlink = "https://www.aspose.com"
# Use Range.replace method to make each searched word a separate Run node.
opt = aw.replacing.FindReplaceOptions()
opt.use_substitutions = True
doc.range.replace(word, "$0", opt)
# Get all runs
runs = doc.get_child_nodes(aw.NodeType.RUN, True).to_array()
for r in runs :
run = r.as_run()
# process the runs with text that matches the searched word.
if run.text == word:
# Inset hyperlink
builder.move_to(run)
builder.insert_hyperlink(word, hyperlink, False)
# remove the matched run, since we inserted hyperlink with the same text
run.remove()
doc.save("C:\\Temp\\out.docx")