如何使用 Python-Docx 创建也适用于 Google Docs 的多级数字列表?

问题描述 投票:0回答:1

我的目标是使用

python-docx
创建嵌套数字列表并上传到 Google 文档。目前,我可以处理 0 级列表并且工作正常,当列表上有多个级别时就会出现问题,这会导致级别文本不准确(一些尝试为空,其他尝试为 1 等)。

尝试将文件上传到 Google 文档时这也是一个问题,因为文件定义不明确,转换效果不佳,在最坏的情况下,Docx 上的文件可以工作,但在 Google 文档上上传时,子项目会出现问题没有关卡文字描述。

我还尝试从 MS Word 本身创建一个 Word 文件并上传到 Google 文档,效果完美,这确保问题出在我的列表构建上

python-docx

我知道有一些关于数字列表的类似问题,但同时大多数问题都没有精确解决问题的答案,而且我的问题是关于嵌套列表+ Google Docs。

这是重现我的问题的最简单方法:

from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

LIST_INDENT = 0.5
MAX_INDENT = 5.5 # To stop indents going off the page
prev_li = None

def list_number(doc, paragraph=None, prev=None, level=None):
    def get_next_abstractNumId(numbering):
        """
        Get the next available abstractNumId by checking the existing abstractNum elements.
        """
        abstract_nums = numbering.findall(qn('w:abstractNum'))
        existing_ids = [int(num.get(qn('w:abstractNumId'))) for num in abstract_nums]
        return max(existing_ids) + 1 if existing_ids else 0

    def get_next_numId(numbering):
        """
        Get the next available numId by checking the existing num elements.
        """
        nums = numbering.findall(qn('w:num'))
        existing_ids = [int(num.get(qn('w:numId'))) for num in nums]
        return max(existing_ids) + 1 if existing_ids else 0

    def create_abstract_num(numbering, level):
        """
        Create an abstract numbering definition.
        """
        abstract_num_id = get_next_abstractNumId(numbering)
        abstract_num = OxmlElement('w:abstractNum')
        abstract_num.set(qn('w:abstractNumId'), str(abstract_num_id))

        lvl = OxmlElement('w:lvl')
        lvl.set(qn('w:ilvl'), str(level))

        start = OxmlElement('w:start')
        start.set(qn('w:val'), '1')
        lvl.append(start)

        numFmt = OxmlElement('w:numFmt')
        numFmt.set(qn('w:val'), 'decimal')
        lvl.append(numFmt)

        lvlText = OxmlElement('w:lvlText')
        lvlText.set(qn('w:val'), '%1.')
        lvl.append(lvlText)

        lvlJc = OxmlElement('w:lvlJc')
        lvlJc.set(qn('w:val'), 'left')
        lvl.append(lvlJc)

        pStyle = OxmlElement('w:pStyle')
        pStyle.set(qn('w:val'), 'LIST_NUMBER')
        lvl.append(pStyle)

        abstract_num.append(lvl)

        numbering.append(abstract_num)
        return abstract_num_id

    def create_num(numbering, abstract_num_id):
        """
        Create a numbering instance linked to the abstract numbering definition.
        """
        num_id = get_next_numId(numbering)

        num = OxmlElement('w:num')
        num.set(qn('w:numId'), str(num_id))

        abstract_numId = OxmlElement('w:abstractNumId')
        abstract_numId.set(qn('w:val'), str(abstract_num_id))
        num.append(abstract_numId)

        numbering.append(num)

        return num_id

    numbering = doc.part.numbering_part.numbering_definitions._numbering

    if prev is None or prev._p.pPr is None or prev._p.pPr.numPr is None or prev._p.pPr.numPr.numId is None:
        level = 0 if level is None else level
        abstract_num_id = create_abstract_num(numbering, level)
        num_id = create_num(numbering, abstract_num_id)
    else:
        level = prev._p.pPr.numPr.ilvl.val if level is None else level
        num_id = prev._p.pPr.numPr.numId.val

    # Apply the numbering to the paragraph
    paragraph._p.get_or_add_pPr().get_or_add_numPr().get_or_add_numId().val = num_id
    paragraph._p.get_or_add_pPr().get_or_add_numPr().get_or_add_ilvl().val = level

def add_new_list_item(doc, text, level, list_style):
    global prev_li
    paragraph = doc.add_paragraph(text, style=list_style)
    paragraph.paragraph_format.left_indent = Inches(min(level * LIST_INDENT, MAX_INDENT))
    paragraph.paragraph_format.line_spacing = 1

    if list_style == 'List Number':
        list_number(doc=doc, paragraph=paragraph, prev=prev_li, level=level)
        prev_li = paragraph

document = Document()
document.add_heading('First Header')
add_new_list_item(doc=document, text='First Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Second Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third Item', level=0, list_style='List Number')

prev_li = None
document.add_heading('Second Header')
add_new_list_item(doc=document, text='First Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='First SubItem', level=1, list_style='List Number')
add_new_list_item(doc=document, text='Second SubItem', level=1, list_style='List Number')
add_new_list_item(doc=document, text='Second Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third SubItem', level=1, list_style='List Bullet')
add_new_list_item(doc=document, text='Fourth Item', level=0, list_style='List Number')

document.save('lists_test.docx')

您可以在此处检查脚本的结果: Docx file result from script 如您所见,最初看起来不错,因为两个列表看起来都有自己的计数器,但是当上传到 Google 文档时,子列表会丢失计数器,如下所示: enter image description here

我想我错过了一些关于

numbering
的观点,我找不到一种方法来设置段落的级别文本,无需重新启动计数,我尝试手动创建一个lvl XML并设置所有与
create_abstract_num()
相同的属性,但 do 似乎有效。

任何人都可以帮我找到这个问题的解决方案吗?如何为其他嵌套列表正确设置正确的数值?

python python-3.x docx python-docx
1个回答
0
投票

Python-docx 在编号方面不是很完整。所有支持的编号仅是单级的,而不是多级的。添加多级编号将导致使用 XML 方法添加至少一个多级抽象编号的低级代码。在您的情况下,它甚至会是两个,因为您希望使用十进制编号 g 以及第二级的项目符号。使用一种抽象编号是不可能实现这一点的。

我要做的是使用内置编号

List Number 2
以及
List Bullet 2
作为第二级。这几乎完全符合您想要的结果。而且,至少对我来说,它也可以使用 Google Docs。它使用带有 python-docx 的多个编号列表的方法在第二个标头之后重新开始编号。

代码:

from docx import Document
from docx.text.paragraph import Paragraph

def prepare_numberings(document: Document, style_name: str) -> int:
    #prepare the numberings to have a new numbering, which points to the same abstract numbering, 
    #the style style_name also points to but has start override set
    styles = document.styles
    #get numId to which style style_name links
    num_id_list_number = -1
    for style in styles:
        if (style.name == style_name):
            num_id_list_number = style._element.pPr.numPr.numId.val
    #add new numbering linking to same abstractNumId but has startOverride 
    #and get new numId
    num_id_list_number_new = -1
    if (num_id_list_number > -1):        
        ct_numbering = document.part.numbering_part.numbering_definitions._numbering
        ct_num = ct_numbering.num_having_numId(num_id_list_number)
        abstractNumId = ct_num.abstractNumId.val
        ct_num = ct_numbering.add_num(abstractNumId)
        num_id_list_number_new = ct_num.numId
        startOverride = ct_num.add_lvlOverride(0)._add_startOverride()
        startOverride.val = 1
    return num_id_list_number_new;
    
def set_link_to_numId(paragraph: Paragraph, num_id: int):
    if num_id > -1:
        numPr = paragraph._element.pPr._add_numPr()
        numPr._add_numId().val = num_id

def add_new_list_item(doc: Document, text: str, list_style: str, start_new: bool = False):
    p = doc.add_paragraph(text, style=list_style)
    if start_new:
       num_id_list_number_new = prepare_numberings(document, list_style)
       set_link_to_numId(p, num_id_list_number_new)

# Main program logic

document = Document()
document.add_heading('First Header')
add_new_list_item(doc=document, text='First Item', list_style='List Number', start_new=True)
add_new_list_item(doc=document, text='Second Item', list_style='List Number')
add_new_list_item(doc=document, text='Third Item', list_style='List Number')

prev_li = None
document.add_heading('Second Header')
add_new_list_item(doc=document, text='First Item', list_style='List Number', start_new=True)
add_new_list_item(doc=document, text='First SubItem', list_style='List Number 2')
add_new_list_item(doc=document, text='Second SubItem', list_style='List Number 2')
add_new_list_item(doc=document, text='Second Item', list_style='List Number')
add_new_list_item(doc=document, text='Third Item', list_style='List Number')
add_new_list_item(doc=document, text='Third SubItem', list_style='List Bullet 2')
add_new_list_item(doc=document, text='Fourth Item', list_style='List Number')

document.save('lists_test.docx')

结果:

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.