使用 Python 使用 docx 模块检查 .docx 形式的复选框

问题描述 投票:0回答:3

我正在尝试使用Python 2.7的docx模块填写word文档表单。 我可以很好地修改文本元素,但我很难弄清楚如何检查是或否复选框。

我如何勾选表格中的复选框。 我尝试了几种不同的方法,但我认为这一切都让我不知道 docx xml 在复选框方面是如何构造的。

我是否可以使用书签属性来查找特定的复选框并选中它,如下图所示?

enter image description here

我已将测试表的副本上传到 Google 云端硬盘此处

python python-2.7 arcgis python-docx
3个回答
7
投票

好吧,经过多次挫折后,我终于弄清楚了如何选中复选框。 复选框元素内有一个元素表示该框是否被选中。 我基本上能够使用以下函数创建该元素。

from docx.oxml import OxmlElement
from docx.oxml.ns import qn

def checkedElement():
    elm = OxmlElement('w:checked')
    elm.set(qn('w:val'),"true")
    return elm

我可以使用以下函数找到表格单元格中的所有复选框。 由于“是”始终是每个单元格中的第一个复选框,因此我可以将“是”检查的索引设置为 0,将“否”检查设置为索引 1,然后我可以将选中的元素附加到复选框元素中:

def yesNoCheck(yes_no,tableIdx,coords):
    print coords, yes_no
    if yes_no == 'y':
        index = 0
        x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
        x[index].append(checkedElement())
    elif yes_no == 'n':
        index = 1
        x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
        x[index].append(checkedElement())
    else:
        print "value was neither yes or no"
        pass

这是我迄今为止编写的完整代码。 我有很多重构要做,但到目前为止效果很好。我的 .docx 模板中有两个表,字典 table1 和 table2 包含单元格行和列坐标。 该脚本用于使用 ESRI 的 Survey123 发布的数据填写所需的表格。

from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import arcpy
import datetime
import os

table1 = {
    'BusinessName':[2,3],
    'LicenseNumber':[2,14],
    'OwnerName':[3,3],
    'PhoneNumber':[3,14],
    'BusinessAddress':[4,5],
    'County':[4,14],
    'City':[5,1],
    'St':[5,8],
    'Zip':[5,15],
    'LicenceExpired':[6,1], #CheckBox
    'DateExpired':[6,15],
    'LicenceRenewal':[7,1], #CheckBox
    'NumberDisplayed':[8,1], #CheckBox
    'NameAddDisplayed':[10,1], #CheckBox
    'VehicleInfoMatches':[12,1], #CheckBox
    'DischargeValveCapped':[14,1], #CheckBox
    'DischargeValveCapChained':[15,1], #CheckBox
    'HoseDisinfectCarried':[16,1], #CheckBox
    'VehicleAndTankClean':[17,1], #CheckBox
    'FreeOfLeaks':[18,1] #CheckBox
}

table2 = {
    'LandApplyWaste':[1,1], #Yes/No CheckBox
    'LocationDescriptionAccurate':[6,1], #Yes/No CheckBox
    'LocationDescriptionAccDesc':[6,5], #text
    'Slope':[7,1], #Yes/No CheckBox
    'DistanceNearestResidence':[8,1], #Yes/No CheckBox
    'DistanceNearestWell':[9,1], #Yes/No CheckBox
    'DistanceNearestStreamLakeEtc':[10,1], #Yes/No CheckBox
    'SeptageIncorporated':[11,1], #Yes/No CheckBox
    'InjectedIncorporated':[12,3], #Yes/No CheckBox, dependent on the septage incorporated being yes
    'SeptageStabilized':[13,1], #Yes/No CheckBox
    'HowIsLimeMixed':[14,3], #text dependent on if lime was used
    'ConfiningLayerOrGroundwater':[15,1], #Yes/No CheckBox
    'ConfiningLayerOrGroundwaterDesc':[16,3], #text
    'CropGrown':[17,1], #Yes/No CheckBox
    'CropGrownHowVerified':[19,3], #text
    'LandAppCompliance':[20,1], #Yes/No CheckBox
    'AdditionalComments':[22,3],
    'SignDate':[22,13]
}

def checkedElement():
    elm = OxmlElement('w:checked')
    elm.set(qn('w:val'),"true")
    return elm

def yesNoCheck(yes_no,tableIdx,coords):
    print coords, yes_no
    if yes_no == 'y':
        index = 0
        x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
        x[index].append(checkedElement())
    elif yes_no == 'n':
        index = 1
        x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
        x[index].append(checkedElement())
    else:
        print "value was neither yes or no"
        pass

def disposalMethodCheck(method, locationDec):
    vals = {
        'WastewaterTreatmentFacility':[20,1],
        'LandApplication':[22,1],
        'SanitaryLandfill':[24,1],
        'SeptageLagoonOrDryingBed':[26,1]
    }
    if method != None:
        row,col = vals[method]
        checkBoxElm = doc.tables[0].cell(row,col)._element.xpath('.//w:checkBox')[0]
        print "{0} Checked!".format(method)
        checkBoxElm.append(checkedElement())
        editTxt(locationDec,0,[row,6]) 

def editTxt(text, tblIdx, coords, alignment = WD_ALIGN_PARAGRAPH.LEFT, bold=True):
    print text, coords
    field = doc.tables[tblIdx].cell(coords[0],coords[1]).paragraphs[0]
    field.text = text
    field.alignment = alignment
    field.runs[0].font.bold = bold

def addSig(sigJpgPath):
    para = doc.tables[1].row_cells(23)[0].paragraphs[0]
    para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = para.add_run()
    run.add_picture(sigJpgPath,width=Inches(1.34),height=Inches(.35))

fc = r"E:\PumperTruckInspectionFeatureClass"
arcpy.MakeFeatureLayer_management (fc, "PumperTruckInspections")
attach = r"PumperTruckInspection__ATTACH" #Where signatures are stored

def rows_as_dicts(cursor):
    colnames = cursor.fields
    for row in cursor:
        yield dict(zip(colnames, row))

def dateString(date):
    if date != None:
        d = date.strftime('%m/%d/%Y')
        return d
    else:
        print "no date"
        return ''

def checkBusName(name):
    if name != None:
        return name
    else:
        return 'unknown'

with arcpy.da.SearchCursor(fc, '*') as sc:
    for row in rows_as_dicts(sc):
        doc = Document(r"path\to\TEMPLATE.docx")

        t = datetime.datetime.now().strftime('%Y-%m-%d')
        newDocName = checkBusName(row['BusinessName']) + t + '.docx'


        editTxt(row['BusinessName'],0,table1['BusinessName'])
        editTxt(row['LicenseNumber'],0,table1['LicenseNumber'])
        editTxt(row['OwnerName'],0,table1['OwnerName'])
        editTxt(row['PhoneNumber'],0,table1['PhoneNumber'])
        editTxt(row['BusinessAddress'],0,table1['BusinessAddress'])
        editTxt(row['County'],0,table1['County']) 
        editTxt(row['City'],0,table1['City'])
        editTxt(row['St'],0,table1['St'])
        editTxt(row['Zip'],0,table1['Zip'])
        editTxt(dateString(row['DateExpired']),0,table1['DateExpired'])
        yesNoCheck(row['LicenceExpired'],0, table1['LicenceExpired'])

        yesNoCheck(row['LicenceRenewal'],0, table1['LicenceRenewal'])
        yesNoCheck(row['NumberDisplayed'],0, table1['NumberDisplayed'])
        yesNoCheck(row['NameAddDisplayed'],0, table1['NameAddDisplayed'])
        yesNoCheck(row['VehicleInfoMatches'],0, table1['VehicleInfoMatches'])
        yesNoCheck(row['DischargeValveCapped'],0, table1['DischargeValveCapped'])
        yesNoCheck(row['DischargeValveCapChained'],0, table1['DischargeValveCapChained'])
        yesNoCheck(row['HoseDisinfectCarried'],0, table1['HoseDisinfectCarried'])
        yesNoCheck(row['VehicleAndTankClean'],0, table1['VehicleAndTankClean'])
        yesNoCheck(row['FreeOfLeaks'],0, table1['FreeOfLeaks'])
        disposalMethodCheck(row['DisposalMethod'],row['DisposalLocation'])
        if row['DisposalMethod'] == 'LandApplication':
            yesNoCheck(row['LandApplyWaste'],1,table2['LandApplyWaste'])
            yesNoCheck(row['LocationDescriptionAccurate'],1,table2['LocationDescriptionAccurate'])
            editTxt(row['LocationDescriptionAccDesc'],1,table2['LocationDescriptionAccDesc'])
            yesNoCheck(row['Slope'],1,table2['Slope'])
            yesNoCheck(row['DistanceNearestResidence'],1,table2['DistanceNearestResidence'])

            yesNoCheck(row['DistanceNearestWell'],1,table2['DistanceNearestWell'])
            yesNoCheck(row['DistanceNearestStreamLakeEtc'],1,table2['DistanceNearestStreamLakeEtc'])
            yesNoCheck(row['SeptageIncorporated'],1,table2['SeptageIncorporated'])
            yesNoCheck(row['InjectedIncorporated'],1,table2['InjectedIncorporated']) #might need a new method since its not yes/no
            yesNoCheck(row['SeptageStabilized'],1,table2['SeptageStabilized'])
            editTxt(row['HowIsLimeMixed'],1,table2['HowIsLimeMixed'])
            yesNoCheck(row['ConfiningLayerOrGroundwater'],1,table2['ConfiningLayerOrGroundwater'])
            editTxt(row['ConfiningLayerOrGroundwaterDescript'],1,table2['ConfiningLayerOrGroundwaterDescript'])
            yesNoCheck(row['CropGrown'],1,table2['CropGrown'])
            editTxt(row['CropGrownHowVerified'],1,table2['CropGrownHowVerified'])
            yesNoCheck(row['LandAppCompliance'],1,table2['LandAppCompliance'])
        editTxt(row['AdditionalComments'],1,table2['AdditionalComments'],bold=False)
        where = "REL_GLOBALID = '{0}'".format(row['GlobalID'])
        from pprint import pprint
        with arcpy.da.SearchCursor(attach,['DATA', 'ATT_NAME', 'ATTACHMENTID'],where_clause=where) as cursor:
            for r in rows_as_dicts(cursor):
                pprint(r)
                name = r['ATT_NAME']
                attachment = r['DATA']
                if name.split('_')[0] == 'InspectorSignature':
                    imagePath = os.path.join(name.split('_')[0] + "_" + )
                    open(("sig.jpeg"), 'wb').write(attachment.tobytes())
                    addSig("sig.jpeg")

                    break

        editTxt(dateString(row['SignDate']),1,table2['SignDate'],alignment = WD_ALIGN_PARAGRAPH.CENTER,bold=False)
        doc.save(newDocName)
        del doc

2
投票

我刚刚在word中创建了一个选中的复选框,然后重新创建了xml代码。将整个内容编译到函数中,您只需将段落作为参数传递即可。

import docx
from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn


def addCheckedbox(para, box_id, name):

    run = para.add_run()
    tag = run._r
    start = docx.oxml.shared.OxmlElement('w:bookmarkStart')
    start.set(docx.oxml.ns.qn('w:id'), str(box_id - 1))
    start.set(docx.oxml.ns.qn('w:name'), "_GoBack")

    run2 = para.add_run()
    tag2 = run2._r
    fld = docx.oxml.shared.OxmlElement('w:fldChar')
    fld.set(docx.oxml.ns.qn('w:fldCharType'), 'begin')


    checker = docx.oxml.shared.OxmlElement('w:checkBox')
    sizer = docx.oxml.shared.OxmlElement('w:sizeAuto')
    checkValue = docx.oxml.shared.OxmlElement('w:default')
    checkValue.set(docx.oxml.ns.qn('w:val'), '1')
    checker.append(sizer)
    checker.append(checkValue)
    start.append(checker)    
    tag.append(start)

    run3 = para.add_run()
    tag3 = run3._r
    instr = docx.oxml.OxmlElement('w:instrText')
    instr.text = 'FORMCHECKBOX'
    tag3.append(instr)

    run4 = para.add_run()
    tag4 = run4._r
    fld2 = docx.oxml.shared.OxmlElement('w:fldChar')
    fld2.set(docx.oxml.ns.qn('w:fldCharType'), 'end')
    tag4.append(fld2)

    run5 = para.add_run()
    tag5 = run5._r
    end = docx.oxml.shared.OxmlElement('w:bookmarkEnd')
    end.set(docx.oxml.ns.qn('w:id'), str(box_id))
    end.set(docx.oxml.ns.qn('w:name'), name)
    tag5.append(end)

    return

0
投票

这是较新版本 Word 的更新版本。选中/取消选中的关键位于以下行:

checked.set(qn('w14:val'), '1')
。选中“1”,取消选中“0”。

import docx

try:

    # Create an instance of a Word document
    doc = docx.Document()
    # Save the empty Word document
    doc.save(r"C:\My Documents\checkbox_test_document.docx")
    
except PermissionError:
    
    print("Need to close the open document")
    
try:

    # Create an instance of a Word document
    doc = docx.Document()
    # Save the empty Word document (overwrites an existing document of the same name)
    doc.save(r"C:\My Documents\checkbox_test_document.docx")

    # Create a new instance of the empty Word document that has been created
    doc = docx.Document(r"C:\My Documents\checkbox_test_document.docx")
    
# Raise an exception if a previously generated Word document is open
except PermissionError:

    print("Need to close the open document")

check1 = "text for checkbox1"
check2 = "text for checkbox2"

checklist = [check1, check2]

def add_checlist_to_docx(document, checklist_item):  
    from docx.oxml.shared import OxmlElement, qn
    
    paragraph = document.add_paragraph()
    tag = paragraph._p
    
    sdt = OxmlElement('w:sdt')
    sdtPr = OxmlElement('w:sdtPr')
    checkbox = OxmlElement('w14:checkbox')
    checked = OxmlElement('w14:checked')
    checked.set(qn('w14:val'), '1')
    checkedState = OxmlElement('w14:checkedState')
    checkedState.set(qn('w14:val'), '2612')
    checkedState.set(qn('w14:font'), 'MS Gothic')
    uncheckedState = OxmlElement('w14:uncheckedState')
    uncheckedState.set(qn('w14:val'), '2610')
    uncheckedState.set(qn('w14:font'), 'MS Gothic')

    sdtContent = OxmlElement('w:sdtContent')
    r_box = OxmlElement('w:r')
    rPr = OxmlElement('w:rPr')
    rFonts = OxmlElement('w:rFonts')
    rFonts.set(qn('w:ascii'), 'MS Gothic')
    rFonts.set(qn('w:eastAsia'), 'MS Gothic')
    rFonts.set(qn('w:hAnsi'), 'MS Gothic')
    rFonts.set(qn('w:hint'), 'eastAsia')
    t_box = OxmlElement('w:t')

    r_text = OxmlElement('w:r')
    t_text = OxmlElement('w:t')
    t_text.set(qn('xml:space'), 'preserve')
    
    checkbox.append(checked)
    checkbox.append(checkedState)
    checkbox.append(uncheckedState)
    sdtPr.append(checkbox)
    sdt.append(sdtPr)
    rPr.append(rFonts)
    t_box.text = '☐'
    r_box.append(rPr)
    r_box.append(t_box)
    sdtContent.append(r_box)
    sdt.append(sdtContent)
    tag.append(sdt)
    t_text.text = checklist_item
    r_text.append(t_text)
    tag.append(r_text)
    return
    
for check in checklist:
    add_checlist_to_docx(doc, check)

doc.save(r"C:\My Documents\checkbox_test_document.docx")
© www.soinside.com 2019 - 2024. All rights reserved.