我的目标是使用
python-docx
创建嵌套数字列表并上传到 Google 文档。目前,我可以处理 0 级列表并且工作正常,当列表上有多个级别时就会出现问题,这会导致级别文本不准确(一些尝试为空,其他尝试为 1 等)。
尝试将文件上传到 Google 文档时这也是一个问题,因为文件定义不明确,转换效果不佳,在最坏的情况下,Docx 上的文件可以工作,但在 Google 文档上上传时,子项目会出现问题没有关卡文字描述。
我还尝试从 MS Word 本身创建一个 Word 文件并上传到 Google 文档,效果完美,这确保问题出在我的列表构建上
python-docx
。
我知道有一些关于数字列表的类似问题,但同时大多数问题都没有精确解决问题的答案,而且我的问题是关于嵌套列表+ Google Docs。
这是重现我的问题的最简单方法:
from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
LIST_INDENT = 0.5
MAX_INDENT = 5.5 # To stop indents going off the page
prev_li = None
def list_number(doc, paragraph=None, prev=None, level=None):
def get_next_abstractNumId(numbering):
"""
Get the next available abstractNumId by checking the existing abstractNum elements.
"""
abstract_nums = numbering.findall(qn('w:abstractNum'))
existing_ids = [int(num.get(qn('w:abstractNumId'))) for num in abstract_nums]
return max(existing_ids) + 1 if existing_ids else 0
def get_next_numId(numbering):
"""
Get the next available numId by checking the existing num elements.
"""
nums = numbering.findall(qn('w:num'))
existing_ids = [int(num.get(qn('w:numId'))) for num in nums]
return max(existing_ids) + 1 if existing_ids else 0
def create_abstract_num(numbering, level):
"""
Create an abstract numbering definition.
"""
abstract_num_id = get_next_abstractNumId(numbering)
abstract_num = OxmlElement('w:abstractNum')
abstract_num.set(qn('w:abstractNumId'), str(abstract_num_id))
lvl = OxmlElement('w:lvl')
lvl.set(qn('w:ilvl'), str(level))
start = OxmlElement('w:start')
start.set(qn('w:val'), '1')
lvl.append(start)
numFmt = OxmlElement('w:numFmt')
numFmt.set(qn('w:val'), 'decimal')
lvl.append(numFmt)
lvlText = OxmlElement('w:lvlText')
lvlText.set(qn('w:val'), '%1.')
lvl.append(lvlText)
lvlJc = OxmlElement('w:lvlJc')
lvlJc.set(qn('w:val'), 'left')
lvl.append(lvlJc)
pStyle = OxmlElement('w:pStyle')
pStyle.set(qn('w:val'), 'LIST_NUMBER')
lvl.append(pStyle)
abstract_num.append(lvl)
numbering.append(abstract_num)
return abstract_num_id
def create_num(numbering, abstract_num_id):
"""
Create a numbering instance linked to the abstract numbering definition.
"""
num_id = get_next_numId(numbering)
num = OxmlElement('w:num')
num.set(qn('w:numId'), str(num_id))
abstract_numId = OxmlElement('w:abstractNumId')
abstract_numId.set(qn('w:val'), str(abstract_num_id))
num.append(abstract_numId)
numbering.append(num)
return num_id
numbering = doc.part.numbering_part.numbering_definitions._numbering
if prev is None or prev._p.pPr is None or prev._p.pPr.numPr is None or prev._p.pPr.numPr.numId is None:
level = 0 if level is None else level
abstract_num_id = create_abstract_num(numbering, level)
num_id = create_num(numbering, abstract_num_id)
else:
level = prev._p.pPr.numPr.ilvl.val if level is None else level
num_id = prev._p.pPr.numPr.numId.val
# Apply the numbering to the paragraph
paragraph._p.get_or_add_pPr().get_or_add_numPr().get_or_add_numId().val = num_id
paragraph._p.get_or_add_pPr().get_or_add_numPr().get_or_add_ilvl().val = level
def add_new_list_item(doc, text, level, list_style):
global prev_li
paragraph = doc.add_paragraph(text, style=list_style)
paragraph.paragraph_format.left_indent = Inches(min(level * LIST_INDENT, MAX_INDENT))
paragraph.paragraph_format.line_spacing = 1
if list_style == 'List Number':
list_number(doc=doc, paragraph=paragraph, prev=prev_li, level=level)
prev_li = paragraph
document = Document()
document.add_heading('First Header')
add_new_list_item(doc=document, text='First Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Second Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third Item', level=0, list_style='List Number')
prev_li = None
document.add_heading('Second Header')
add_new_list_item(doc=document, text='First Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='First SubItem', level=1, list_style='List Number')
add_new_list_item(doc=document, text='Second SubItem', level=1, list_style='List Number')
add_new_list_item(doc=document, text='Second Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third Item', level=0, list_style='List Number')
add_new_list_item(doc=document, text='Third SubItem', level=1, list_style='List Bullet')
add_new_list_item(doc=document, text='Fourth Item', level=0, list_style='List Number')
document.save('lists_test.docx')
您可以在此处检查脚本的结果: 如您所见,最初看起来不错,因为两个列表看起来都有自己的计数器,但是当上传到 Google 文档时,子列表会丢失计数器,如下所示:
我想我错过了一些关于
numbering
的观点,我找不到一种方法来设置段落的级别文本,无需重新启动计数,我尝试手动创建一个lvl XML并设置所有与 create_abstract_num()
相同的属性,但 do 似乎有效。
任何人都可以帮我找到这个问题的解决方案吗?如何为其他嵌套列表正确设置正确的数值?
Python-docx 在编号方面不是很完整。所有支持的编号仅是单级的,而不是多级的。添加多级编号将导致使用 XML 方法添加至少一个多级抽象编号的低级代码。在您的情况下,它甚至会是两个,因为您希望使用十进制编号 g 以及第二级的项目符号。使用一种抽象编号是不可能实现这一点的。
我要做的是使用内置编号
List Number 2
以及 List Bullet 2
作为第二级。这几乎完全符合您想要的结果。而且,至少对我来说,它也可以使用 Google Docs。它使用带有 python-docx 的多个编号列表的方法在第二个标头之后重新开始编号。
代码:
from docx import Document
from docx.text.paragraph import Paragraph
def prepare_numberings(document: Document, style_name: str) -> int:
#prepare the numberings to have a new numbering, which points to the same abstract numbering,
#the style style_name also points to but has start override set
styles = document.styles
#get numId to which style style_name links
num_id_list_number = -1
for style in styles:
if (style.name == style_name):
num_id_list_number = style._element.pPr.numPr.numId.val
#add new numbering linking to same abstractNumId but has startOverride
#and get new numId
num_id_list_number_new = -1
if (num_id_list_number > -1):
ct_numbering = document.part.numbering_part.numbering_definitions._numbering
ct_num = ct_numbering.num_having_numId(num_id_list_number)
abstractNumId = ct_num.abstractNumId.val
ct_num = ct_numbering.add_num(abstractNumId)
num_id_list_number_new = ct_num.numId
startOverride = ct_num.add_lvlOverride(0)._add_startOverride()
startOverride.val = 1
return num_id_list_number_new;
def set_link_to_numId(paragraph: Paragraph, num_id: int):
if num_id > -1:
numPr = paragraph._element.pPr._add_numPr()
numPr._add_numId().val = num_id
def add_new_list_item(doc: Document, text: str, list_style: str, start_new: bool = False):
p = doc.add_paragraph(text, style=list_style)
if start_new:
num_id_list_number_new = prepare_numberings(document, list_style)
set_link_to_numId(p, num_id_list_number_new)
# Main program logic
document = Document()
document.add_heading('First Header')
add_new_list_item(doc=document, text='First Item', list_style='List Number', start_new=True)
add_new_list_item(doc=document, text='Second Item', list_style='List Number')
add_new_list_item(doc=document, text='Third Item', list_style='List Number')
prev_li = None
document.add_heading('Second Header')
add_new_list_item(doc=document, text='First Item', list_style='List Number', start_new=True)
add_new_list_item(doc=document, text='First SubItem', list_style='List Number 2')
add_new_list_item(doc=document, text='Second SubItem', list_style='List Number 2')
add_new_list_item(doc=document, text='Second Item', list_style='List Number')
add_new_list_item(doc=document, text='Third Item', list_style='List Number')
add_new_list_item(doc=document, text='Third SubItem', list_style='List Bullet 2')
add_new_list_item(doc=document, text='Fourth Item', list_style='List Number')
document.save('lists_test.docx')
结果: