这是一个如何使用 beautifulsoup 将 HTML 解析为普通字典的示例。
然后您可以将字典转换为您需要的对象。
from bs4 import BeautifulSoup
html_text = """\
<tr>
<td>
<p>Item 1</p>
</td>
<td>
<p>Definition for Item 1.</p>
<p>This may contain several paragraphs.</p>
</td>
</tr>
<tr>
<td>
<p>Item 2</p>
</td>
<td>
<p>Definition for Item 2.</p>
<p>This may contain several paragraphs.</p>
<p>And another paragraph here.</p>
</td>
</tr>"""
soup = BeautifulSoup(html_text, "html.parser")
out = {}
for row in soup.select("tr"):
header, *definitions = row.select("td")
header = header.get_text(strip=True)
for p in definitions[0].select("p"):
out.setdefault(header, []).append(p.get_text(strip=True))
print(out)
打印:
{
"Item 1": ["Definition for Item 1.", "This may contain several paragraphs."],
"Item 2": [
"Definition for Item 2.",
"This may contain several paragraphs.",
"And another paragraph here.",
],
}