这是一个如何使用 beautifulsoup 从 XML 片段创建数据框的示例:
import pandas as pd
from bs4 import BeautifulSoup
xml_doc = """\
<ESpace Key="ESpace:y7vmLUGpAMoLhw4MvpnAPg" Name="Onboarding_Core" UserProviderEspace="Users" DefaultThemeNewRuntime="ThemeReference:rQ3WUJw0o0+BC9ZDjXD7uA" DefaultTransition="Fade" UseCookies="Yes" WebScreenRenderingMode="HTML5" ModuleType="CrossDevice">
<Actions>
</Actions>
<Entities>
<Entity Key="Entity:X2oBACCeRUeSQxJFoyn1hA" ExposeReadOnly="Yes" Identifier="Attribute:_EXIewz0o024UgjslFt7_g" Public="Yes" Name="Document">
<Attributes>
<Attribute Key="Attribute:_EXIewz0o024UgjslFt7_g" IsMandatory="Yes" Name="Id" DataType="Long Integer" IsAutoNumber="Yes"/>
<Attribute Key="Attribute:ypRA49SJBkexc+ICR12zdw" Length="50" Name="Name" DataType="Text"/>
<Attribute Key="Attribute:mdWB6OaTkE26BwxQRRfbBg" Name="CreatedOn" DataType="Date Time"/>
<Attribute Key="Attribute:W0foZDFa5kme6QySnTNEMA" Name="Binary" DataType="Binary Data"/>
</Attributes>
</Entity>
<Entity Key="Entity:6NuECH7xe06M250tQsPsJA" IsStaticEntity="Yes" IsMultitenant="No" ExposeReadOnly="Yes" Identifier="Attribute:jBExnTjCy0GOtV5fZZG0pw" Public="Yes" Name="DateFormat" Folder="Folder:SErnhCWKOUKpp39VDQVVcg">
<Attributes>
<Attribute Key="Attribute:jBExnTjCy0GOtV5fZZG0pw" IsMandatory="Yes" Name="Id" DataType="Integer" IsAutoNumber="Yes" DeleteRule="Ignore"/>
<Attribute Key="Attribute:Q0S8yDbafEi+YMF0rXnbvg" IsMandatory="Yes" Length="50" Name="Format" DataType="Text" DeleteRule="Ignore"/>
<Attribute Key="Attribute:XUw_zMWoZki2qWuZcYRizw" IsMandatory="Yes" Length="50" Name="Example" DataType="Text" DeleteRule="Ignore"/>
<Attribute Key="Attribute:YpcYFT5YNUKxqaYLZOe6qA" IsMandatory="Yes" Name="Order" DataType="Integer" DeleteRule="Ignore"/>
<Attribute Key="Attribute:3QRHk0Wl7kuwg4gmzIs8yw" IsMandatory="Yes" Name="IsActive" DataType="Boolean" DeleteRule="Ignore"/>
</Attributes>
</Entity>
</Entities>
</ESpace>
"""
soup = BeautifulSoup(xml_doc, "xml")
out = []
for e in soup.select("Entity"):
attrs = {}
for a in e.select("Attribute"):
out.append({"Entity Name": e["Name"], **a.attrs})
df = pd.DataFrame(out)
print(df)
打印:
Entity Name Key IsMandatory Name DataType IsAutoNumber Length DeleteRule
0 Document Attribute:_EXIewz0o024UgjslFt7_g Yes Id Long Integer Yes NaN NaN
1 Document Attribute:ypRA49SJBkexc+ICR12zdw NaN Name Text NaN 50 NaN
2 Document Attribute:mdWB6OaTkE26BwxQRRfbBg NaN CreatedOn Date Time NaN NaN NaN
3 Document Attribute:W0foZDFa5kme6QySnTNEMA NaN Binary Binary Data NaN NaN NaN
4 DateFormat Attribute:jBExnTjCy0GOtV5fZZG0pw Yes Id Integer Yes NaN Ignore
5 DateFormat Attribute:Q0S8yDbafEi+YMF0rXnbvg Yes Format Text NaN 50 Ignore
6 DateFormat Attribute:XUw_zMWoZki2qWuZcYRizw Yes Example Text NaN 50 Ignore
7 DateFormat Attribute:YpcYFT5YNUKxqaYLZOe6qA Yes Order Integer NaN NaN Ignore
8 DateFormat Attribute:3QRHk0Wl7kuwg4gmzIs8yw Yes IsActive Boolean NaN NaN Ignore