以正确的顺序/顺序将元素插入到 XML 代码中? Python代码

问题描述 投票:0回答:1

我是 Python 新手,我正在尝试创建一个脚本来打开 XML 文件并将 XML 元素插入到 XML 元素序列中的正确位置。

到目前为止,我有这个代码:

import os
import xml.etree.ElementTree as ET


def process_xml_files(source_dir, target_dir):
    # Create the target directory if it doesn't exist
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # Iterate through all files in the source directory
    for filename in os.listdir(source_dir):
        if filename.endswith('.xml'):
            file_path = os.path.join(source_dir, filename)
            tree = ET.parse(file_path)
            root = tree.getroot()

            # Process each platformID group
            for platformID in ['0', '1', '3']:
                elements = root.findall(f".//namerecord[@platformID='{platformID}']")
                if elements:
                    nameID_elements = {int(el.get('nameID')): el for el in elements}
                    if 8 not in nameID_elements:
                        # Create a new element with nameID="8"
                        new_element = ET.Element('namerecord', attrib={
                            'nameID': '8',
                            'platformID': platformID,
                            'platEncID': elements[0].get('platEncID'),
                            'langID': elements[0].get('langID'),

                        })
                        new_element.text = "\r" "    " "New Entry" "\r" "    "

                        # Insert the new element in the correct position
                        sorted_nameIDs = sorted(list(nameID_elements.keys()) + [8])
                        insert_index = sorted_nameIDs.index(8)

                        # Find the parent element
                        parent = root.find(".//name")
                        parent.insert(insert_index, new_element)

            # Write the modified XML to the target directory
            target_file_path = os.path.join(target_dir, filename)
            tree.write(target_file_path, encoding='utf-8', xml_declaration=True)


source_directory = input("Enter the source directory: ")
target_directory = input("Enter the target directory: ")
process_ttx_files(source_directory, target_directory)

这是需要插入的输入文件的示例。 请注意,platformID 分组在一起并按 nameID 的顺序排列:

<?xml version="1.0" encoding="UTF-8"?>
<xmlalter xmlaVersion="Alpha">

  <name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
      Kitten Calico
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
      Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
  </name>

</xmlalter>

此 Python 代码生成了以下内容。 请注意,新条目并未按 nameID 按顺序分组在 platformID 中:

<?xml version='1.0' encoding='utf-8'?>
<xmlalter xmlaVersion="Alpha">

  <name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="8" platformID="3" platEncID="1" langID="0x409">
    New Entry
    </namerecord><namerecord nameID="8" platformID="1" platEncID="0" langID="0x0">
    New Entry
    </namerecord><namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
    New Entry
    </namerecord><namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
      Kitten Calico
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
      Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
  </name>

</xmlalter>

我试图获得如下所示的 XML 输出:

<?xml version='1.0' encoding='utf-8'?>
<xmlalter xmlaVersion="Alpha">

  <name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
     Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
     Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
     Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
     KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
     KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
     1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
    KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
    New Entry
    </namerecord>
    
    <namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
     Safe
    </namerecord>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
     Kitten
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
     Calico
    </namerecord>
    <namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
     KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
     KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
     1.00
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
     KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="1" platEncID="0" langID="0x0" unicode="True">
    New Entry
    </namerecord>

    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
     Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
     Kitten Calico
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
     Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
     KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
     KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
     1.00
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
     KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="3" platEncID="1" langID="0x409">
    New Entry
    </namerecord>
  </name>

</xmlalter>

此外,一些 XML 文件将包含

nameID="7"
和超过 8 的 nameID,如
nameID="9"
nameID="10"
等,因此
nameID=8
不一定位于每个 platformID 的末尾,而是位于连续的 nameID 元素之间.

我不知道如何编写代码来实现这一点。 任何帮助将不胜感激,谢谢!

python xml insert sequential
1个回答
0
投票

文档已经排序,因此插入索引应该是具有 nameID 的最后一个元素的索引 < 8. Additionally,

element.tail
字段用于添加适当的缩进。

import xml.etree.ElementTree as ET
doc = ET.parse("/home/lmc/tmp/tmp2.xml")
elements = doc.getroot().findall(".//namerecord[@platformID='0']")

nameID_elements = [int(el.get('nameID')) for el in elements if int(el.get('nameID')) < 8]

elpar = doc.getroot().find(".//namerecord[@platformID='0']...")

print(elpar)
new_element = ET.Element('namerecord', attrib={
    'nameID': '8',
    'platformID': '0',
    'platEncID': elements[0].get('platEncID'),
    'langID': elements[0].get('langID'),

})

new_element.text = elements[0].tail + " some text" + elements[0].tail
new_element.tail = elements[0].tail

#idx = len(elements)
#idx = elements.index(elements[-1]) + 1
idx = nameID_elements.index(nameID_elements[-1]) + 1

elpar.insert(idx, new_element)

print(ET.tostring(elpar).decode('utf-8'))

显示相关片段

<name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
     some text
    </namerecord>
    <namerecord nameID="9" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
...
  </name>

新元素插入到 6 和 9 之间

相关源文档片段:

    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="9" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
© www.soinside.com 2019 - 2024. All rights reserved.