如果您可以将 HTML 保存到文本文件中,您可以使用一些脚本来提取您想要的任何内容。我建议使用美丽汤
pip3 install beautifulsoup4
from bs4 import BeautifulSoup
# load the HTML in a variable
html_content = "Loaded HTML here"
# parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')
# find all divs elements
all_div = soup.find_all('div')
# Loop over elements
for my_div in all_div:
try:
my_div.get("data-link")
except:
print("Attribute not found")