我想读取 XML 文件并将信息写入 MySQL 数据库(本地主机)。
桌子:
CREATE TABLE ClimateMonitoring (
id INT PRIMARY KEY,
name VARCHAR(255),
label VARCHAR(255),
value_type INT,
offset FLOAT,
gain FLOAT,
precision_level INT,
value VARCHAR(255),
unit VARCHAR(50),
sensor_date DATETIME,
insert_date DATETIME DEFAULT CURRENT_TIMESTAMP
);
代码:
from lxml import etree
import mysql.connector
def parse_and_insert_to_mysql(file_path, db_config):
conn = None
cursor = None
data_found = False
try:
# Parse the XML file using lxml
print("Parsing XML file using lxml...")
tree = etree.parse(file_path)
root = tree.getroot()
print("XML file parsed successfully.")
# Print the root tag to verify successful parsing
print(f"Root tag: {root.tag}")
# Define the namespace
namespace = {'icom': 'something'}
# Connect to the MySQL database
print("Connecting to MySQL database...")
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()
print("Connected to MySQL database.")
# Extract data and insert into the database
for item in root.findall('icom:Item', namespaces=namespace):
data_found = True # Set flag to true if at least one item is found
item_id = item.attrib['id']
name = item.attrib['name']
label = item.find('icom:Label', namespaces=namespace).text
value_element = item.find('icom:Value', namespaces=namespace)
value_type = value_element.attrib['valueType']
offset = value_element.attrib['offset']
gain = value_element.attrib['gain']
precision_level = value_element.attrib['precision']
value = value_element.text
unit = item.find('icom:Unit', namespaces=namespace).text if item.find('icom:Unit', namespaces=namespace) is not None else ''
sensor_date = '2024-10-05 00:00:00'
# Debugging: Print each value to verify correctness
print(f"Preparing to insert: id={item_id}, name={name}, label={label}, value_type={value_type}, offset={offset}, gain={gain}, precision_level={precision_level}, value={value}, unit={unit}, sensor_date={sensor_date}")
insert_query = (
"INSERT INTO ClimateMonitoring (id, name, label, value_type, offset, gain, precision_level, value, unit, sensor_date) "
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
)
try:
cursor.execute(insert_query, (item_id, name, label, value_type, offset, gain, precision_level, value, unit, sensor_date))
print(f"Inserted row with id={item_id}")
except mysql.connector.Error as e:
print(f"Failed to insert row with id={item_id}: {e}")
except Exception as e:
print(f"Unexpected error when inserting row with id={item_id}: {e}")
# Check if no data was found
if not data_found:
raise ValueError("No insertable data found in the XML file.")
# Commit the transaction
print("Committing transaction...")
conn.commit()
print("Transaction committed successfully.")
except etree.XMLSyntaxError as e:
print(f"Error parsing the XML file: {e}")
except mysql.connector.Error as e:
print(f"Error with MySQL database: {e}")
except ValueError as e:
print(e)
except Exception as e:
print(f"An error occurred: {e}")
finally:
# Close the database connection
if cursor:
cursor.close()
if conn:
conn.close()
print("Database connection closed.")
# Path to your XML file
file_path = r"path"
# MySQL database configuration
db_config = {
'user': 'root',
'password': 'password', # The password you set during installation
'host': 'localhost', # Localhost
'database': 'climate' # The database you created
}
parse_and_insert_to_mysql(file_path, db_config)
XML 文件:
<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl" href="/simon.xsl" ?>
<icom:something schemaVersion="1.00" appVersion="1" xmlns:icom="a1234">
<Item id="354" name="SinglState">
<Label>Unit Status</Label>
<Value valueType="262144" offset="0.000" gain="1.000" precision="0">Unit On</Value>
<Unit></Unit>
</Item>
<Item id="361" name="LocTemp">
<Label>Return Air Temperature</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">22.1</Value>
<Unit>°C</Unit>
</Item>
<Item id="379" name="Std. Sensor Humidity">
<Label>Return Air Humidity</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">35.1</Value>
<Unit>%rH</Unit>
</Item>
<Item id="380" name="Supply Air Temperature">
<Label>Supply Air Temperature</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">---</Value>
<Unit></Unit>
</Item>
<Item id="356" name="Actual Temperature Setpoint">
<Label>Return Air Temperature Setpoint</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">21.5</Value>
<Unit>°C</Unit>
</Item>
<Item id="547" name="HuSetActDi">
<Label>Return Air Humidity Setpoint</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="0">No</Value>
<Unit></Unit>
</Item>
<Item id="401" name="Supply Setpoint">
<Label>Supply Air Temperature Setpoint</Label>
<Value valueType="393216" offset="0.000" gain="1.000" precision="1">5.0</Value>
<Unit>°C</Unit>
</Item>
<Item id="1110" name="Show_Fan">
<Label>Fan Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">On</Value>
<Unit></Unit>
</Item>
<Item id="1111" name="Show_Cool">
<Label>Cooling Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">On</Value>
<Unit></Unit>
</Item>
<Item id="1112" name="Show_FC">
<Label>Freecooling Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1114" name="Show_EHeat">
<Label>Electrical Heater Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1113" name="Show_HotW">
<Label>Hot Water Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1116" name="Show_Dehum">
<Label>Dehumdification Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1115" name="Show_Hum">
<Label>Humidifier Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
<Item id="1118" name="Show_Maint">
<Label>Maintenance Status</Label>
<Value valueType="0" offset="0.000" gain="1.000" precision="0">Off</Value>
<Unit></Unit>
</Item>
</icom:something>
代码连接到数据库,但没有从 XML 文件中获取数据。我需要它来用这些信息填充表格。
XML 具有名称空间声明
xmlns:icom="a1234"
,因此您的 Python 名称空间字典应该是例如namespace = {'icom': 'a1234'}
。
此外,大多数后代元素都不在命名空间中,因此请使用例如
item.find('Label', ..)
而不是item.find('icom:Label', ..)
。
完整的lxml/etree特定代码:
for item in tree.findall('Item', namespaces=namespace):
data_found = True # Set flag to true if at least one item is found
item_id = item.attrib['id']
name = item.attrib['name']
label = item.find('Label', namespaces=namespace).text
value_element = item.find('Value', namespaces=namespace)
value_type = value_element.attrib['valueType']
offset = value_element.attrib['offset']
gain = value_element.attrib['gain']
precision_level = value_element.attrib['precision']
value = value_element.text
unit = item.find('Unit', namespaces=namespace).text if item.find('Unit',
namespaces=namespace) is not None else ''
sensor_date = '2024-10-05 00:00:00'
# Debugging: Print each value to verify correctness
print(
f"Preparing to insert: id={item_id}, name={name}, label={label}, value_type={value_type}, offset={offset}, gain={gain}, precision_level={precision_level}, value={value}, unit={unit}, sensor_date={sensor_date}")
喷出
Preparing to insert: id=354, name=SinglState, label=Unit Status, value_type=262144, offset=0.000, gain=1.000, precision_level=0, value=Unit On, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=361, name=LocTemp, label=Return Air Temperature, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=22.1, unit=°C, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=379, name=Std. Sensor Humidity, label=Return Air Humidity, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=35.1, unit=%rH, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=380, name=Supply Air Temperature, label=Supply Air Temperature, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=---, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=356, name=Actual Temperature Setpoint, label=Return Air Temperature Setpoint, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=21.5, unit=°C, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=547, name=HuSetActDi, label=Return Air Humidity Setpoint, value_type=393216, offset=0.000, gain=1.000, precision_level=0, value=No, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=401, name=Supply Setpoint, label=Supply Air Temperature Setpoint, value_type=393216, offset=0.000, gain=1.000, precision_level=1, value=5.0, unit=°C, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1110, name=Show_Fan, label=Fan Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=On, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1111, name=Show_Cool, label=Cooling Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=On, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1112, name=Show_FC, label=Freecooling Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1114, name=Show_EHeat, label=Electrical Heater Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1113, name=Show_HotW, label=Hot Water Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1116, name=Show_Dehum, label=Dehumdification Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1115, name=Show_Hum, label=Humidifier Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
Preparing to insert: id=1118, name=Show_Maint, label=Maintenance Status, value_type=0, offset=0.000, gain=1.000, precision_level=0, value=Off, unit=None, sensor_date=2024-10-05 00:00:00
对我来说肯定能在 XML 中找到数据
更新:创建 MariaDB 数据库表并使用
xml.etree.ElementTree
解析 XML。我根据这个side使用了便携式MariaDB安装。 MariaDB 与 MySQL 非常相似,因此它应该无需更改或只需很少的更改即可工作。
为什么这次从 pandas.to_sql()
更新回 xml.etree.ElementTree
? -> pd.to_sql()
和 sqlachemy
将数据库列类型定义更改回文本!
import xml.etree.ElementTree as ET
import mariadb
import sys
class Parse_xml:
def __init__(self,item):
self.id = self.item_attr(item, "id", "i")
self.name = self.item_attr(item, "name", "s")
self.label = self.tag_text(item, "Label")
self.value = self.tag_text(item, "Value")
self.v_valueType = self.item_attr(item, "valueType", "i")
self.v_offset = self.item_attr(item, "offset", "f")
self.v_gain = self.item_attr(item, "gain", "f")
self.v_precision = self.item_attr(item, "precision", "i")
self.unit = self.tag_text(item, "Unit")
print(self.id, self.name, self.label, self.v_valueType, self.v_offset, self.v_gain, self.v_precision, self.value, self.unit)
def tag_text(self, elem_obj, tn):
tex = []
for e in elem_obj.iter():
if e.tag == tn:
tex.append(e.text)
return tex[0]
def item_attr(self, elem_obj, attr, ty):
d = []
for a in elem_obj.iter():
if a.get(attr) is not None:
if ty == 'i':
d.append(int(a.get(attr)))
if ty == 's':
d.append(a.get(attr))
if ty == 'f':
d.append(float(a.get(attr)))
return d[0]
if __name__ == "__main__":
try:
with mariadb.connect(
host = "localhost",
user = input("Enter username: "),
password = input("Enter password: "),
port = 3306,
database = "climate",
) as conn:
# Connect Message
cur = conn.cursor()
cur.execute("SELECT version();")
version = cur.fetchall()
print("Connected to Database Version", version[0][0])
sql = '''CREATE OR REPLACE TABLE climate.climate_monitoring (
id int(11) NOT NULL,
name varchar(255) DEFAULT NULL,
label varchar(255) DEFAULT NULL,
values_type int(11) DEFAULT NULL,
v_offset float DEFAULT NULL,
v_gain float DEFAULT NULL,
v_precision int(11) DEFAULT NULL,
value varchar(255) DEFAULT NULL,
unit varchar(50) DEFAULT NULL,
sensor_data DATETIME DEFAULT now(6),
insert_date TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP(6),
updated_at TIMESTAMP NOT NULL ON UPDATE CURRENT_TIMESTAMP(6),
PRIMARY KEY (id));'''
cur.execute(sql)
print("climate.climate_monitoring table created or replaced!")
filename = "Example.xml"
for event, elem in ET.iterparse("Climate_data.xml", events = ('start',)):
if elem.tag == 'Item':
data = Parse_xml(elem)
insert_data = (data.id, data.name, data.label, data.v_valueType, data.v_offset, data.v_gain, data.v_precision, data.value, data.unit)
cur = conn.cursor()
insert_sql = f"INSERT INTO climate_monitoring (id, name, label, values_type, v_offset, v_gain, v_precision, value, unit) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
cur.execute(insert_sql, insert_data)
conn.commit()
print("Data inserted")
elem.clear()
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)
您的 XML 是嵌套的。所以你必须将它加载到2个单独的表中。
将 XML 文件上传到 MySQL 可以从中导入该文件的目录(请参阅会话
@@secure_file_priv
设置)。然后使用:
CREATE TABLE ClimateMonitoring (
id INT PRIMARY KEY,
name VARCHAR(255),
Label VARCHAR(255),
Unit VARCHAR(50),
sensor_date DATETIME,
insert_date DATETIME DEFAULT CURRENT_TIMESTAMP
);
LOAD XML INFILE 'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/test.xml'
INTO TABLE ClimateMonitoring
ROWS IDENTIFIED BY '<Item>'
SET insert_date = DEFAULT;
CREATE TABLE ClimateMonitoringValue (
id INT PRIMARY KEY,
valueType INT,
offset FLOAT,
gain FLOAT,
`precision` INT,
Value VARCHAR(255),
FOREIGN KEY (id) REFERENCES ClimateMonitoring (id)
);
LOAD XML INFILE 'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/test.xml'
INTO TABLE ClimateMonitoringValue
ROWS IDENTIFIED BY '<Value>';
注意 - 列名称区分大小写。
您还必须显式地将值分配给自动利用的 DATETIME 列。
现在您可以使用
查看和使用整个数据SELECT *
FROM ClimateMonitoring
JOIN ClimateMonitoringValue USING (id);