代码的想法很简单。它将转到搜索页面,“查看”那里的所有产品,逐一单击,抓取所有数据,返回搜索页面并单击下一个产品的链接。所以我正在做这样的事情:
sheet_data = []
pdf_data = []
sizes = []
cotacao_libra() # Assuming this function is already defined
i = 0
driver.get(url_line)
#Cookies
WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,"//button[@title='Accept all cookies']"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//div[@id='zonos']")))
close_button = driver.find_element(By.XPATH, "//a[@class = 'z-close']")
driver.execute_script("arguments[0].click();", close_button)
WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='global-popup']")))
button = driver.find_element(By.XPATH,"//button[@aria-label='Close']")
driver.execute_script("arguments[0].click();", button)
#driver.implicitly_wait(500000)
df_sheet = pd.DataFrame(columns=SHEET_COLUMNS)
pdf = FPDF()
pdf.add_page()
pdf.set_font('Arial', 'B', 12)
driver.maximize_window()
product_list = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located((By.XPATH, "//div[@class = 'product-listing__grid']//div[@class = '_root_129ai_6 product-listing__grid-item']/a"))
)
while True:
for index, value in enumerate(product_list):
product_list = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH, "//div[@class = 'product-listing__grid']//div[@class = '_root_129ai_6 product-listing__grid-item']/a"))
)
driver.execute_script("arguments[0].scrollIntoView();", product_list[index])
driver.save_screenshot("window.png")
image_element = WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.TAG_NAME, "img")))
image = image_element.get_attribute("src")
i += 1
actions.move_to_element(product_list[index]).perform()
driver.execute_script("arguments[0].click();", product_list[index])
driver.implicitly_wait(50)
#driver.save_screenshot("screenproduct.png")
product_name = driver.find_element(By.CLASS_NAME, "ml-meta__title").text
print(product_name)
price = driver.find_element(By.CLASS_NAME, "ml-prices__price").text
price = re.sub("£", "", price)
product_price = float(price)
print(product_price)
product_code = driver.current_url.split('-')[-1]
print(product_code)
size_list = driver.find_element(By.XPATH, "//div[@class = 'ml-size__sizes']")
if size_list:
driver.execute_script("arguments[0].scrollIntoView();", size_list)
for size in size_list.find_elements(By.XPATH, "//button[@class='ml-size__size qa-size-item']"):
product_size = size.text
sizes.append(product_size)
if "-" in size.text:
product_size = size.text.split('-')[0]
sizes.append(product_size)
elif "(" in size.text:
product_size = size.text.split('(')[0]
sizes.append(product_size)
print(sizes)
price_in_reais = calculate_price_in_reais(product_price) # Function needs to be defined
sale_price = calculate_sale_price(product_price) # Function needs to be defined
sheet_data.append({
'Photo': image,
'Code': product_code,
'Description': product_name,
'Purchase': price_in_reais,
'Sale': sale_price,
'Sizes': sizes
})
pdf_data.append({
'Photo': image,
'Code': product_code,
'Description': product_name,
'Price': product_price
})
for index, row in pd.DataFrame(pdf_data).iterrows():
for data in row.values:
pdf.cell(1.6, 0.5, str(data))
pdf.ln()
driver.execute_script("window.history.go(-1)")
print("exited")
driver.implicitly_wait(5000)
botao_vermais = driver.find_element(By.XPATH, "//div[@class = 'product-listing__view-more']/button")
if botao_vermais:
botao_vermais.click()
driver.implicitly_wait(50)
else:
break
有时它会得到大约 5 个产品,然后在最后一个产品中它仍然打印“已退出”,但随后会抛出超时异常。更常见的是,它卡在第一个或第二个产品上,然后抛出超时异常,并在抛出超时异常之前打印“exited”。所以我猜错误是在 go back 命令和循环开始之间。另外,如果我在自动滚动方面做得正确,有人可以给我反馈吗?
尝试避免超时异常并进行一些自动滚动。
一些建议:
确保元素已完全加载
尝试捕捉积木
显式等待
刷新产品列表
虽然正确: 尝试: 产品列表 = WebDriverWait(驱动程序, 20).until( EC.presence_of_all_elements_ located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 Product-listing__grid-item']/a")) )
for index, value in enumerate(product_list):
try:
product_list = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 product-listing__grid-item']/a"))
)
driver.execute_script("arguments.scrollIntoView();", product_list[index])
driver.execute_script("arguments.click();", product_list[index])
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "ml-meta__title")))
product_name = driver.find_element(By.CLASS_NAME, "ml-meta__title").text
price = driver.find_element(By.CLASS_NAME, "ml-prices__price").text
price = re.sub("£", "", price)
product_price = float(price)
product_code = driver.current_url.split('-')[-1]
size_list = driver.find_elements(By.XPATH, "//button[@class='ml-size__size qa-size-item']")
sizes = [size.text.split('-').split('(') for size in size_list]
sheet_data.append({
'Photo': image,
'Code': product_code,
'Description': product_name,
'Purchase': calculate_price_in_reais(product_price),
'Sale': calculate_sale_price(product_price),
'Sizes': sizes
})
driver.execute_script("window.history.go(-1)")
WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 product-listing__grid-item']/a"))
)
print("exited")
except TimeoutException:
print(f"TimeoutException on product {index}. Retrying...")
driver.execute_script("window.history.go(-1)")
WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 product-listing__grid-item']/a"))
)
continue
try:
botao_vermais = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, "//div[@class='product-listing__view-more']/button"))
)
botao_vermais.click()
except TimeoutException:
break
except TimeoutException:
print("TimeoutException on product list. Exiting loop.")
break