我试图从fareham.gov.uk网页上删除一个应用程序,每次我尝试它都会返回错误而不是参考号。有人可以帮我解决这个问题吗?我是网络抓取的新手,每当我尝试谷歌修复一切都没有用。
错误:
Traceback (most recent call last):
File "C:\Users\DBaldwin\Desktop\sel.py", line 39, in <module>
div = soup.select('<div Class="docGridRow"><div Class="detailsCells detailsFieldNames">Reference</div><div Class="detailsCells detailsValues">')
File "C:\Users\DBaldwin\Anaconda3\lib\site-packages\bs4\element.py", line 1477, in select
'Unsupported or invalid CSS selector: "%s"' % token)
ValueError: Unsupported or invalid CSS selector: "<div"
码:
import time
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
url = "http://www.fareham.gov.uk/casetrackerplanning/applicationsearch.aspx"
driver = webdriver.Chrome(executable_path=r"C:\Users\DBaldwin\Desktop\chromedriver.exe")
driver.get(url)
driver.find_element_by_id("lnkAllowCookies").click()
def rerun():
driver.find_element_by_id("BodyPlaceHolder_uxLinkButtonShowAdvancedSearch").click()
time.sleep(3)
driver.find_element_by_id("uxStartDateDecisionTextBox").click()
driver.find_element_by_id("uxStartDateDecisionTextBox").clear()
driver.find_element_by_id("uxStartDateDecisionTextBox").send_keys("1/8/2018")
driver.find_element_by_id("uxStopDateDecisionTextBox").click()
driver.find_element_by_id("uxStopDateDecisionTextBox").clear()
driver.find_element_by_id("uxStopDateDecisionTextBox").send_keys("308/2018")
driver.find_element_by_id("BodyPlaceHolder_uxButtonSearch").click()
time.sleep(3)
rerun()
elements = driver.find_elements_by_class_name("searchResultsCell")
for e in elements:
e.click()
newUrl = driver.current_url
go = urllib.request.urlopen(newUrl)
soup = BeautifulSoup(go.read(), "html.parser")
div = soup.select('<div Class="docGridRow"><div Class="detailsCells detailsFieldNames">Reference</div><div Class="detailsCells detailsValues">')
test = div[0].get_text()
print(test)
driver.back()
rerun()
print("Worked???")
尝试使用下面的代码来获取所需的值
elements = driver.find_elements_by_css_selector(".searchResultsCell a")
links = [link.get_attribute('href') for link in elements]
for link in links:
driver.get(link)
print(driver.find_element_by_css_selector('div.docGridRow').text)