selenium 和 python 的超时异常。这个导航有什么问题吗?

问题描述 投票:0回答:1

代码的想法很简单。它将转到搜索页面,“查看”那里的所有产品,逐一单击,抓取所有数据,返回搜索页面并单击下一个产品的链接。所以我正在做这样的事情:

    sheet_data = []
    pdf_data = []
    sizes = []

    cotacao_libra()  # Assuming this function is already defined
    i = 0
    driver.get(url_line)
    
    #Cookies
    WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,"//button[@title='Accept all cookies']"))).click()
    WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//div[@id='zonos']")))
    close_button = driver.find_element(By.XPATH, "//a[@class = 'z-close']")
    driver.execute_script("arguments[0].click();", close_button)
    WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='global-popup']")))
    button = driver.find_element(By.XPATH,"//button[@aria-label='Close']")
    driver.execute_script("arguments[0].click();", button)
    #driver.implicitly_wait(500000)
    
    df_sheet = pd.DataFrame(columns=SHEET_COLUMNS)
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font('Arial', 'B', 12)
    
    driver.maximize_window()

    product_list = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[@class = 'product-listing__grid']//div[@class = '_root_129ai_6 product-listing__grid-item']/a"))
        )
    while True:
        
        for index, value in enumerate(product_list):
            product_list = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[@class = 'product-listing__grid']//div[@class = '_root_129ai_6 product-listing__grid-item']/a"))
        )

            driver.execute_script("arguments[0].scrollIntoView();", product_list[index])
            
            driver.save_screenshot("window.png")
            image_element = WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.TAG_NAME, "img")))
            image = image_element.get_attribute("src")
            i += 1
            
            actions.move_to_element(product_list[index]).perform()
            
            driver.execute_script("arguments[0].click();", product_list[index])
            driver.implicitly_wait(50)
            #driver.save_screenshot("screenproduct.png")
            product_name = driver.find_element(By.CLASS_NAME, "ml-meta__title").text
            print(product_name)
            price = driver.find_element(By.CLASS_NAME, "ml-prices__price").text
            price = re.sub("£", "", price)
            product_price = float(price)
            print(product_price)

            product_code = driver.current_url.split('-')[-1]
            print(product_code)
            
            size_list = driver.find_element(By.XPATH, "//div[@class = 'ml-size__sizes']")
            
            if size_list:
                driver.execute_script("arguments[0].scrollIntoView();", size_list)
                for size in size_list.find_elements(By.XPATH, "//button[@class='ml-size__size qa-size-item']"):
                    product_size = size.text
                    sizes.append(product_size)
                    if "-" in size.text:
                        product_size = size.text.split('-')[0] 
                        sizes.append(product_size)
                    elif "(" in size.text:
                        product_size = size.text.split('(')[0]
                        sizes.append(product_size)
            print(sizes)

            price_in_reais = calculate_price_in_reais(product_price)  # Function needs to be defined
            sale_price = calculate_sale_price(product_price)  # Function needs to be defined

            sheet_data.append({
                    'Photo': image,
                    'Code': product_code,
                    'Description': product_name,
                    'Purchase': price_in_reais,
                    'Sale': sale_price,
                    'Sizes': sizes
                        })
            pdf_data.append({
                    'Photo': image,
                    'Code': product_code,
                    'Description': product_name,
                    'Price': product_price
                        })

            for index, row in pd.DataFrame(pdf_data).iterrows():
                for data in row.values:
                    pdf.cell(1.6, 0.5, str(data))
                pdf.ln()
            
            driver.execute_script("window.history.go(-1)")
            
            print("exited")
            driver.implicitly_wait(5000)
            botao_vermais = driver.find_element(By.XPATH, "//div[@class = 'product-listing__view-more']/button")        
            
        if botao_vermais:
             botao_vermais.click()
             driver.implicitly_wait(50)
            
        else:
             break
            


有时它会得到大约 5 个产品,然后在最后一个产品中它仍然打印“已退出”,但随后会抛出超时异常。更常见的是,它卡在第一个或第二个产品上,然后抛出超时异常,并在抛出超时异常之前打印“exited”。所以我猜错误是在 go back 命令和循环开始之间。另外,如果我在自动滚动方面做得正确,有人可以给我反馈吗?

尝试避免超时异常并进行一些自动滚动。

python selenium-webdriver timeout settimeout
1个回答
0
投票

一些建议:

  • 确保元素已完全加载

  • 尝试捕捉积木

  • 显式等待

  • 刷新产品列表

    虽然正确: 尝试: 产品列表 = WebDriverWait(驱动程序, 20).until( EC.presence_of_all_elements_ located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 Product-listing__grid-item']/a")) )

         for index, value in enumerate(product_list):
             try:
                 product_list = WebDriverWait(driver, 10).until(
                     EC.presence_of_all_elements_located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 product-listing__grid-item']/a"))
                 )
    
                 driver.execute_script("arguments.scrollIntoView();", product_list[index])
                 driver.execute_script("arguments.click();", product_list[index])
                 WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "ml-meta__title")))
    
                 product_name = driver.find_element(By.CLASS_NAME, "ml-meta__title").text
                 price = driver.find_element(By.CLASS_NAME, "ml-prices__price").text
                 price = re.sub("£", "", price)
                 product_price = float(price)
                 product_code = driver.current_url.split('-')[-1]
    
                 size_list = driver.find_elements(By.XPATH, "//button[@class='ml-size__size qa-size-item']")
                 sizes = [size.text.split('-').split('(') for size in size_list]
    
                 sheet_data.append({
                     'Photo': image,
                     'Code': product_code,
                     'Description': product_name,
                     'Purchase': calculate_price_in_reais(product_price),
                     'Sale': calculate_sale_price(product_price),
                     'Sizes': sizes
                 })
    
                 driver.execute_script("window.history.go(-1)")
                 WebDriverWait(driver, 20).until(
                     EC.presence_of_all_elements_located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 product-listing__grid-item']/a"))
                 )
                 print("exited")
    
             except TimeoutException:
                 print(f"TimeoutException on product {index}. Retrying...")
                 driver.execute_script("window.history.go(-1)")
                 WebDriverWait(driver, 20).until(
                     EC.presence_of_all_elements_located((By.XPATH, "//div[@class='product-listing__grid']//div[@class='_root_129ai_6 product-listing__grid-item']/a"))
                 )
                 continue
    
         try:
             botao_vermais = WebDriverWait(driver, 10).until(
                 EC.element_to_be_clickable((By.XPATH, "//div[@class='product-listing__view-more']/button"))
             )
             botao_vermais.click()
         except TimeoutException:
             break
    
     except TimeoutException:
         print("TimeoutException on product list. Exiting loop.")
         break
    
© www.soinside.com 2019 - 2024. All rights reserved.