代码从 XML 文件读取但找不到任何内容

问题描述 投票:0回答:3

我想读取 XML 文件并将信息写入 MySQL 数据库(本地主机)。

桌子:

CREATE TABLE ClimateMonitoring (
    id INT PRIMARY KEY,
    name VARCHAR(255),
    label VARCHAR(255),
    value_type INT,
    offset FLOAT,
    gain FLOAT,
    precision_level INT,
    value VARCHAR(255),
    unit VARCHAR(50),
    sensor_date DATETIME,
    insert_date DATETIME DEFAULT CURRENT_TIMESTAMP
);

代码:

from lxml import etree
import mysql.connector

def parse_and_insert_to_mysql(file_path, db_config):
    conn = None
    cursor = None
    data_found = False
    try:
        # Parse the XML file using lxml
        print("Parsing XML file using lxml...")
        tree = etree.parse(file_path)
        root = tree.getroot()
        print("XML file parsed successfully.")

        # Print the root tag to verify successful parsing
        print(f"Root tag: {root.tag}")

        # Define the namespace
        namespace = {'icom': 'something'}

        # Connect to the MySQL database
        print("Connecting to MySQL database...")
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()
        print("Connected to MySQL database.")

        # Extract data and insert into the database
        for item in root.findall('icom:Item', namespaces=namespace):
            data_found = True  # Set flag to true if at least one item is found
            item_id = item.attrib['id']
            name = item.attrib['name']
            label = item.find('icom:Label', namespaces=namespace).text
            value_element = item.find('icom:Value', namespaces=namespace)
            value_type = value_element.attrib['valueType']
            offset = value_element.attrib['offset']
            gain = value_element.attrib['gain']
            precision_level = value_element.attrib['precision']
            value = value_element.text
            unit = item.find('icom:Unit', namespaces=namespace).text if item.find('icom:Unit', namespaces=namespace) is not None else ''
            sensor_date = '2024-10-05 00:00:00'

            # Debugging: Print each value to verify correctness
            print(f"Preparing to insert: id={item_id}, name={name}, label={label}, value_type={value_type}, offset={offset}, gain={gain}, precision_level={precision_level}, value={value}, unit={unit}, sensor_date={sensor_date}")

            insert_query = (
                "INSERT INTO ClimateMonitoring (id, name, label, value_type, offset, gain, precision_level, value, unit, sensor_date) "
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
            )
            try:
                cursor.execute(insert_query, (item_id, name, label, value_type, offset, gain, precision_level, value, unit, sensor_date))
                print(f"Inserted row with id={item_id}")
            except mysql.connector.Error as e:
                print(f"Failed to insert row with id={item_id}: {e}")
            except Exception as e:
                print(f"Unexpected error when inserting row with id={item_id}: {e}")

        # Check if no data was found
        if not data_found:
            raise ValueError("No insertable data found in the XML file.")

        # Commit the transaction
        print("Committing transaction...")
        conn.commit()
        print("Transaction committed successfully.")

    except etree.XMLSyntaxError as e:
        print(f"Error parsing the XML file: {e}")
    except mysql.connector.Error as e:
        print(f"Error with MySQL database: {e}")
    except ValueError as e:
        print(e)
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Close the database connection
        if cursor:
            cursor.close()
        if conn:
            conn.close()
        print("Database connection closed.")

# Path to your XML file
file_path = r"path"

# MySQL database configuration
db_config = {
    'user': 'root',
    'password': 'password',  # The password you set during installation
    'host': 'localhost',     # Localhost
    'database': 'climate'  # The database you created
}

parse_and_insert_to_mysql(file_path, db_config)

XML 文件:

<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl" href="/simon.xsl" ?>
<icom:something schemaVersion="1.00" appVersion="1" xmlns:icom="a1234">
<Item id="354" name="SinglState">
<Label>Unit Status</Label>
<Value valueType="262144" offset="0.000" gain="1.000" precision="0">Unit On</Value>
<Unit></Unit>
</Item>
<Item id="361" name="LocTemp">
<Label>Return Air Temperature</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">22.1</Value>
<Unit>&#176;C</Unit>
</Item>
<Item id="379" name="Std. Sensor Humidity">
<Label>Return Air Humidity</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">35.1</Value>
<Unit>%rH</Unit>
</Item>
<Item id="380" name="Supply Air Temperature">
<Label>Supply Air Temperature</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">---</Value>
<Unit></Unit>
</Item>
<Item id="356" name="Actual Temperature Setpoint">
<Label>Return Air Temperature Setpoint</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">21.5</Value>
<Unit>&#176;C</Unit>
</Item>
<Item id="547" name="HuSetActDi">
<Label>Return Air Humidity Setpoint</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="0">No</Value>
<Unit></Unit>
</Item>
<Item id="401" name="Supply Setpoint">
<Label>Supply Air Temperature Setpoint</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">5.0</Value>
<Unit>&#176;C</Unit>
</Item>
<Item id="1110" name="Show_Fan">
<Label>Fan Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">On</Value>
<Unit></Unit>
</Item>
<Item id="1111" name="Show_Cool">
<Label>Cooling Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">On</Value>
<Unit></Unit>
</Item>
<Item id="1112" name="Show_FC">
<Label>Freecooling Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1114" name="Show_EHeat">
<Label>Electrical Heater Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1113" name="Show_HotW">
<Label>Hot Water Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1116" name="Show_Dehum">
<Label>Dehumdification Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1115" name="Show_Hum">
<Label>Humidifier Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1118" name="Show_Maint">
<Label>Maintenance Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
</icom:something>

代码连接到数据库,但没有从 XML 文件中获取数据。我需要它来用这些信息填充表格。

python xml xml-parsing lxml
3个回答
2
投票

XML 具有名称空间声明

xmlns:icom="a1234"
,因此您的 Python 名称空间字典应该是例如
namespace = {'icom': 'a1234'}

此外,大多数后代元素都不在命名空间中,因此请使用例如

item.find('Label', ..)
而不是
item.find('icom:Label', ..)

完整的lxml/etree特定代码:

for item in tree.findall('Item', namespaces=namespace):
    data_found = True  # Set flag to true if at least one item is found
    item_id = item.attrib['id']
    name = item.attrib['name']
    label = item.find('Label', namespaces=namespace).text
    value_element = item.find('Value', namespaces=namespace)
    value_type = value_element.attrib['valueType']
    offset = value_element.attrib['offset']
    gain = value_element.attrib['gain']
    precision_level = value_element.attrib['precision']
    value = value_element.text
    unit = item.find('Unit', namespaces=namespace).text if item.find('Unit',
                                                                          namespaces=namespace) is not None else ''
    sensor_date = '2024-10-05 00:00:00'

    # Debugging: Print each value to verify correctness
    print(
        f"Preparing to insert: id={item_id}, name={name}, label={label}, value_type={value_type}, offset={offset}, gain={gain}, precision_level={precision_level}, value={value}, unit={unit}, sensor_date={sensor_date}")

喷出

Preparing to insert: id=354, name=SinglState, label=Unit Status, value_type=262144, offset=0.000, gain=1.000, precision_level=0, value=Unit On, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=361, name=LocTemp, label=Return Air Temperature, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=22.1, unit=°C, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=379, name=Std. Sensor Humidity, label=Return Air Humidity, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=35.1, unit=%rH, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=380, name=Supply Air Temperature, label=Supply Air Temperature, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=---, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=356, name=Actual Temperature Setpoint, label=Return Air Temperature Setpoint, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=21.5, unit=°C, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=547, name=HuSetActDi, label=Return Air Humidity Setpoint, value_type=393216, offset=0.000, gain=1.000, precision_level=0, value=No, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=401, name=Supply Setpoint, label=Supply Air Temperature Setpoint, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=5.0, unit=°C, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1110, name=Show_Fan, label=Fan Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=On, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1111, name=Show_Cool, label=Cooling Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=On, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1112, name=Show_FC, label=Freecooling Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1114, name=Show_EHeat, label=Electrical Heater Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1113, name=Show_HotW, label=Hot Water Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1116, name=Show_Dehum, label=Dehumdification Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1115, name=Show_Hum, label=Humidifier Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1118, name=Show_Maint, label=Maintenance Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00

对我来说肯定能在 XML 中找到数据


1
投票

更新:创建 MariaDB 数据库表并使用

xml.etree.ElementTree
解析 XML。我根据这个side使用了便携式MariaDB安装。 MariaDB 与 MySQL 非常相似,因此它应该无需更改或只需很少的更改即可工作。 为什么这次从
pandas.to_sql()
更新回
xml.etree.ElementTree
? ->
pd.to_sql()
sqlachemy
将数据库列类型定义更改回文本!

import xml.etree.ElementTree as ET
import mariadb
import sys

class Parse_xml:
    def __init__(self,item):
        self.id = self.item_attr(item, "id", "i")
        self.name = self.item_attr(item, "name", "s")
        self.label = self.tag_text(item, "Label")
        self.value = self.tag_text(item, "Value")
        self.v_valueType = self.item_attr(item, "valueType", "i")
        self.v_offset =  self.item_attr(item, "offset", "f")
        self.v_gain = self.item_attr(item, "gain", "f")
        self.v_precision = self.item_attr(item, "precision", "i")
        self.unit = self.tag_text(item, "Unit")
        print(self.id, self.name, self.label, self.v_valueType, self.v_offset, self.v_gain, self.v_precision, self.value, self.unit)

    def tag_text(self, elem_obj, tn):
        tex = []
        for e in elem_obj.iter():
            if e.tag == tn:
                tex.append(e.text)
        return tex[0]

    def item_attr(self, elem_obj, attr,  ty):
        d = []
        for a in elem_obj.iter():
            if a.get(attr) is not None:
                if ty == 'i':
                    d.append(int(a.get(attr)))
                if ty == 's':
                    d.append(a.get(attr))
                if ty == 'f':
                    d.append(float(a.get(attr)))
        return d[0]      
            
if __name__ == "__main__":
    try:
        with mariadb.connect(
            host = "localhost",
            user = input("Enter username: "),
            password = input("Enter password: "),
            port = 3306,
            database = "climate",
        ) as conn:
            
            # Connect Message
            cur = conn.cursor()
            cur.execute("SELECT version();")
            version = cur.fetchall()
            print("Connected to Database Version", version[0][0])
            
            sql = '''CREATE OR REPLACE TABLE climate.climate_monitoring (
                  id int(11) NOT NULL,
                  name varchar(255) DEFAULT NULL,
                  label varchar(255) DEFAULT NULL,
                  values_type int(11) DEFAULT NULL,
                  v_offset float DEFAULT NULL,
                  v_gain float DEFAULT NULL,
                  v_precision int(11) DEFAULT NULL,
                  value varchar(255) DEFAULT NULL,
                  unit varchar(50) DEFAULT NULL,
                  sensor_data DATETIME DEFAULT now(6),
                  insert_date TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP(6),
                  updated_at TIMESTAMP NOT NULL ON UPDATE CURRENT_TIMESTAMP(6),
                  PRIMARY KEY (id));'''
            cur.execute(sql)
            print("climate.climate_monitoring table created or replaced!")
            
            filename = "Example.xml"
            for event, elem in ET.iterparse("Climate_data.xml", events = ('start',)):
                if elem.tag == 'Item':
                    data = Parse_xml(elem)
                    insert_data = (data.id, data.name, data.label, data.v_valueType, data.v_offset, data.v_gain, data.v_precision, data.value, data.unit)
                    cur = conn.cursor()
                    insert_sql = f"INSERT INTO climate_monitoring (id, name, label, values_type, v_offset, v_gain, v_precision, value, unit) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
                    cur.execute(insert_sql, insert_data)
                    conn.commit()
                    print("Data inserted")
                    elem.clear()
    
    except mariadb.Error as e:
        print(f"Error connecting to MariaDB Platform: {e}")
        sys.exit(1)

DBeaver 中的输出: enter image description here


0
投票

您的 XML 是嵌套的。所以你必须将它加载到2个单独的表中。

将 XML 文件上传到 MySQL 可以从中导入该文件的目录(请参阅会话

@@secure_file_priv
设置)。然后使用:

CREATE TABLE ClimateMonitoring (
    id INT PRIMARY KEY,
    name VARCHAR(255),
    Label VARCHAR(255),
    Unit VARCHAR(50),
    sensor_date DATETIME,
    insert_date DATETIME DEFAULT CURRENT_TIMESTAMP
);
LOAD XML INFILE 'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/test.xml' 
INTO TABLE ClimateMonitoring 
ROWS IDENTIFIED BY '<Item>'
SET insert_date = DEFAULT;

CREATE TABLE ClimateMonitoringValue (
    id INT PRIMARY KEY,
    valueType INT,
    offset FLOAT,
    gain FLOAT,
    `precision` INT,
    Value VARCHAR(255),
    FOREIGN KEY (id) REFERENCES ClimateMonitoring (id)
);
LOAD XML INFILE 'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/test.xml' 
INTO TABLE ClimateMonitoringValue 
ROWS IDENTIFIED BY '<Value>';

注意 - 列名称区分大小写。

您还必须显式地将值分配给自动利用的 DATETIME 列。

现在您可以使用

查看和使用整个数据
SELECT *
FROM ClimateMonitoring
JOIN ClimateMonitoringValue USING (id);

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.