我想从动态网站上的表中检索所有信息,我有以下代码:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import sys
reload(sys)
import re
import csv
from time import sleep
sys.setdefaultencoding('utf-8') #added since it would give error for certain values when using str(i)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
prefs = {'profile.managed_default_content_settings.images':2}
chrome_options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(chrome_options=chrome_options)
maxcr = 1379
listofrows = []
url = "http://biggestbook.com/ui/catalog.html#/itemDetail?itemId=HERY4832YER01&uom=CT"
print(url)
driver.get(url)
wait = WebDriverWait(driver,10)
# Trying to get the table
tableloadwait = (wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".panel-body"))))
table = driver.find_elements_by_css_selector(".panel-body")
print(table)
RowsOfTable = table.get_attribute("tr")
但是,我一直在收到错误,但到目前为止还没有。如何检索表格的信息?非常感谢!
错误:RowsOfTable = table.get_attribute(“tr”)AttributeError:'list'对象没有属性'get_attribute'
以下是获取产品详细信息的代码
tableloadwait = (wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".panel-body"))))
driver.find_element_by_xpath("//span[contains(.,'Product Details')]").click()
rows = driver.find_elements_by_xpath("//span[contains(.,'Product Details')]/ancestor::div[@class='accordion-top-border']//tr[(@ng-repeat='attr in attributes' or @ng-repeat='field in fields') and @class='visible-xs']")
for rowNum in range(len(rows)):
print(rows[rowNum].get_attribute('innerText'))
driver.quit()
我们必须根据您的要求修剪值或打破值。
如果您想根据行文本获取数据,请使用以下内容。
upcData = driver.find_element_by_xpath("//strong[.='UPC']/parent::td").get_attribute('innerText').replace('UPC','').replace('\n','').replace(' ','')
首先使用相应的+按钮展开手风琴,然后选择表格。添加等待项目存在。如果您想要另一个表,请将expandSigns
索引更改为2。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'http://biggestbook.com/ui/catalog.html#/itemDetail?itemId=HERY4832YER01&uom=CT'
driver = webdriver.Chrome()
driver.get(url)
expandSigns = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".glyphicon-plus")))
expandSigns[1].click()
WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "td")))
table = driver.find_element_by_css_selector('table')
html = table.get_attribute('outerHTML')
df = pd.read_html(html)
print(df)
driver.quit()
如果你需要刮,而不是测试,你可以使用请求来获取数据。下面的代码是如何从页面获取数据的示例。
import requests
import re
# Return header page(html) to get token and list key
response = requests.get("http://biggestbook.com/ui/catalog.html#/itemDetail?itemId=HERY4832YER01&uom=CT")
# Get token using regular expression
productRecommToken = re.search("'productRecommToken','(.+)'", response.text)[1]
# Get list of keys using regular expression
listKey = re.search("'listKey',\\['(.*?)'\\]", response.text)[1].split("','")
# Create header with token
headers = {
'Accept': 'application/json, text/plain, */*',
'Referer': 'http://biggestbook.com/ui/catalog.html',
'Origin': 'http://biggestbook.com',
'DNT': '1',
'token': productRecommToken,
'BiggestBook-Handle-Errors-Generically': 'true',
}
# Create parameters with list keys and search values
params = (
('listKey', listKey),
('uom', 'CT'),
('vc', 'n'),
('win', 'HERY4832YER01'),
)
# Return json with all details about product
response = requests.get('https://api.essendant.com/digital/digitalservices/search/v1/items',
headers=headers,
params=params)
data = response.json()
# Get items from json, probably could be more than one
items = data["items"]
# Iterate and get details you need. Check "data" to see all possible details you can get
for i in items:
print(i["manufacturer"])
print(i["description"])
print(i["actualPrice"])
# Get attributes
attributes = i["attributes"]
# Example hot you can get specific one attribute.
thickness = list(filter(lambda d: d['name'] == 'Thickness', attributes))[0]["value"]
# Print all attributes as name = value
for a in attributes:
print(f"{a['name']} = {a['value']}")