我正在从亚马逊网站上抓取数据。目的是获得产品名称和相应的ASIN(亚马逊标准识别号码)。有人可以建议我如何解决这个错误。提前致谢...!!
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
import bs4
from urllib.request import urlopen as url_req
from bs4 import BeautifulSoup as soup
driver = webdriver.Chrome(executable_path='C:\Webdrivers\chromedriver.exe')
my_lists = ['nike','addias']
for list in my_lists:
url1= "https://www.amazon.in/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="
url2 = "&rh=i%3Aaps%2Ck%3A"
driver.get(url1 + str(list) + url2)
new_url = driver.current_url
uclient= url_req(new_url)
page_html = uclient.read()
uclient.close()
#html parser
page_soup = soup(page_html, "html.parser")
try:
title = page_soup.findAll("li", {"class": "s-result-item s-result-card-for-container a-declarative celwidget"})
print(title[0].textstrong text)
except Exception as e:
print("Exceptions_Found",format(e))
sleep(5)
print ("Query link: ",driver.current_url)
driver.back()
sleep(1)
driver.quit()
您的HTML解析器部分有问题。这是解决方案。
page_soup = soup(page_html, "html.parser")
try:
import re
results_list = page_soup.findAll("li", id=re.compile("^result"))
#for getting title of all results
for titles in results_list:
print(titles.find('h2').text)
#for getting title of only first result
for title in results_list[0]:
print(title.find('h2').text)
except Exception as e:
print("Exceptions_Found",format(e))