url = f'https://www.screener.in/company/TATAPOWER/consolidated/'
print(url)
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
page = urlopen(req).read()
soup = BeautifulSoup(page, 'html.parser')
table = soup.find_all("table", {"class": "data-table responsive-text-nowrap"})[0]
df = pd.read_html(StringIO(str(table)))[0]
df
above代码工作正常,但是我无法获取其他信息
有人可以帮我吗?
正如已经评论的那样,内容已按需重新加载,但正是这些请求也可以复制以获取内容。
要做到这一点,您必须迭代表的行,并在必要时提出请求。
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = f'https://www.screener.in/company/TATAPOWER/consolidated/'
soup = BeautifulSoup(requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}).text)
soup.select('#quarters table')
keys = ['Item'] + list(soup.select_one('#quarters thead tr').stripped_strings)
data = []
for row in soup.select('#quarters tbody tr')[:-1]:
if row.td.button:
d = requests.get(f'https://www.screener.in/api/company/3371/schedules/?parent={row.td.button.text.strip(" +")}§ion=quarters&consolidated=', headers={'User-Agent': 'Mozilla/5.0'}).json()
first_key = next(iter(d))
data.append({"Item": first_key, **d[first_key]})
else:
data.append(dict(zip(keys,row.stripped_strings)))
pd.DataFrame(data)