如何正确完成标题和图像的注释？

Question

在我的 python 代码中，我添加了函数

process_docx

来注释 header 和段落，还添加了一个函数

annotate_images

来注释 images。

我在输出文档中发现一个问题，即header（以黄色注释）没有得到注释，而images（应该以浅蓝色注释）没有得到注释。只有段落以灰色注释。

在输入的Word文档中，我使用插入标题设置添加了1个标题、2个段落和1张从网络获取的图像。在输出的Word文档中，只有段落被注释。

这是我使用过的输入文档。

这是输出文档。

from docx import Document
from docx.shared import RGBColor
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

def annotate_images(doc):
    for rel in doc.part.rels.values():
        if "image" in rel.target_ref:
            image = rel.target_part
            for paragraph in doc.paragraphs:
                for elem in paragraph._element.iter():
                    if elem.tag.endswith('}inline'):
                        if elem.attrib.get(qn('r:embed')) == rel.rel_id:    
                            set_paragraph_bg_color(paragraph, 'ADD8E6') # light blue
                            break

def process_docx(input_path, output_path):
    doc = Document(input_path)

    for paragraph in doc.paragraphs:
        if paragraph.style.name.startswith('Heading'):
            set_paragraph_bg_color(paragraph, 'FFFF00') # Yellow
        elif paragraph.text.strip(): #Ensures non-empty paragraphs
            set_paragraph_bg_color(paragraph, 'D3D3D3') # Light grey

    annotate_images(doc)
    
    doc.save(output_path)

如果你们不明白，我将提供完整的代码。我看到有一个针对最小的、可重现的示例的全站规则，因此我简洁地问了我的问题。我也在codeproject问过这个问题，但没有结果。

我对标题无能为力，对于图像注释，我通过添加哈希技术尝试了此代码，但它不起作用。

import hashlib

def annotate_images(doc):
    for rel in doc.part.rels.values():
        if "image" in rel.target_ref:
            image = rel.target_part
            image_hash = hashlib.sha256(image.blob).hexdigest()
            for paragraph in doc.paragraphs:
                paragraph_text_hash = hashlib.sha256(paragraph.text.encode('utf-8')).hexdigest()
                if image_hash == paragraph_text_hash:
                    annotate_paragraph(paragraph, 'ADD8E6')

Answer 1

我已经发现 headers 和 images 中未注释的错误在于我用于将它们放入 Word 文档中的方法。

对于标题，只需从样式部分选择标题，对于图像，您必须通过单击插入>插图>图片中的图片来添加它。我已经完成了代码，并在其中添加了方程注释。

这是完整的代码和 IO 快照。我希望这对将来遇到文档注释问题的人有所帮助。

输入文档

输出文档

from docx import Document
from docx.shared import RGBColor
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

def set_paragraph_bg_color(paragraph, color_hex): #Function which adds background colors
    shading_elm = OxmlElement('w:shd')
    shading_elm.set(qn('w:val'), 'clear')
    shading_elm.set(qn('w:color'), 'auto')
    shading_elm.set(qn('w:fill'), color_hex)
    paragraph._element.get_or_add_pPr().append(shading_elm)

def insert_label(paragraph, label): #Labelling on top of the doc element
    label_run = paragraph.insert_paragraph_before().add_run(f'[{label}]')
    label_run.bold = True
    label_run.font.color.rgb = RGBColor(0xFF, 0x00, 0x00)

def annotate_images(doc):
    for paragraph in doc.paragraphs:
            for run in paragraph.runs:
                if run.element.xpath('.//w:drawing') or run.element.xpath('.//w:pict'):
                    insert_label(paragraph, 'This is an image')
                    set_paragraph_bg_color(paragraph, 'ADD8E6')  #Light blue for paragraphs

def process_docx(input_path, output_path): #for heading, equation and paragraphs
    try:
        doc = Document(input_path)

        for paragraph in doc.paragraphs:
            if paragraph.style.name.startswith('Heading'):
                insert_label(paragraph, 'This is a Header')
                set_paragraph_bg_color(paragraph, 'FFFF00') # Yellow for headers
            elif "Equation:" in paragraph.text:
                insert_label(paragraph, 'This is an Equation')
                set_paragraph_bg_color(paragraph, '00FF00')
            elif paragraph.text.strip(): #Ensures non-empty paragraphs
                insert_label(paragraph, 'This is a Paragraph')
                set_paragraph_bg_color(paragraph, 'D3D3D3') # Light grey for paragraphs
            
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        if paragraph.text.strip():
                            set_paragraph_bg_color(paragraph, 'FFFFFF') #White for paragraph gaps

        annotate_images(doc)

        doc.save(output_path)
        print(f"Annotated Document saved to {output_path}")

    except Exception as e:
        print(f"Error processing document: {e}")
    

input_path = 'C://Users//Arnav//Desktop//Document annotate tool//MILKY_WAY.docx'
output_path = 'annotated_document.docx'

process_docx(input_path, output_path)

如何正确完成标题和图像的注释？

问题描述投票：0回答：1

1个回答

最新问题

如何正确完成标题和图像的注释？

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1