我试图从 Allen Solly 网站获取 100 件衬衫的清单。因此,我添加了“加载更多”按钮的逻辑,因为网站加载时仅显示 30 到 32 个产品。但是,当单击“加载更多”按钮时,它显示一条错误,指出该项目“无法滚动到视图中”。


堆栈跟踪: RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8 WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5 ElementNotInteractableError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:353:5 webdriverClickElement@chrome://remote/content/marionette/interaction.sys.mjs:166:11 互动.clickElement@chrome://remote/content/marionette/interaction.sys.mjs:135:11 clickElement@chrome://remote/content/marionette/actors/MarionetteCommandsChild.sys.mjs:204:29 receiveMessage@chrome://remote/content/marionette/actors/MarionetteCommandsChild.sys.mjs:84:31


import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

# Set up the Selenium WebDriver for Firefox
driver = webdriver.Firefox()

    # Navigate to the main shirts page
    url = 'https://allensolly.abfrl.in/c/men-shirts'

    # Wait for the page to load completely
    WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, 'ProductCard_productInfo__uZhFN')))

    # Ensure we have at least 100 shirts by clicking the "LOAD MORE" button if needed
    product_info = []
    while len(product_info) < 100:
        # Scroll down to load more products
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(5)  # Wait for new products to load

            # Click the "LOAD MORE" button
            load_more_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="__next"]/main/div/div[7]/div/div[2]/button')))
            print("Clicked 'LOAD MORE' button.")
            # Wait for new content to load
            WebDriverWait(driver, 10).until(EC.staleness_of(load_more_button))

            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
            last_height = new_height

        # Get the updated page source
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        # Find all elements with the product title class and add to the list
        products = soup.find_all('div', class_='ProductCard_productInfo__uZhFN')
        for product in products:
            title_element = product.find('div', class_='ProductCard_title__9M6wy')
            detail_element = product.find('div', class_='ProductCard_description__BQzle')  # Assuming this class holds details
            if title_element and detail_element:
                title = title_element.text.strip()
                detail = detail_element.text.strip()
                product_info.append((title, detail))

        # If we have reached 100 unique products, break the loop
        if len(product_info) >= 100:

    # Print the number of unique products found
    print(f"Number of unique products found: {len(product_info)}")

    # Click on each product detail to open it in a new tab
    data = []  # List to store scraped data
    for idx, (title, detail) in enumerate(product_info[:100], start=1):
        print(f"{idx}. Title: {title}\n   Product: {detail}\n")
            # Find the product description element using the product title
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, f'//div[contains(@class, "ProductCard_title__9M6wy") and contains(text(), "{title}")]/ancestor::div[contains(@class, "ProductCard_productInfo__uZhFN")]'))

            # Scroll the element into view and click it using JavaScript
            driver.execute_script("arguments[0].scrollIntoView(true);", description_element)
            driver.execute_script("arguments[0].click();", description_element)
            print("Opened product description.")
            # Wait for the new tab to open

            # Switch to the new tab
            print("Switched to new tab.")

            # Scraping the product description from the new tab using CSS selector
            product_description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, '#__next > main > div > div:nth-child(1) > div > div.col-sm-5.col-md-5.col-lg-5 > div > div.PDPDetails_prodDescDetails__D7ddV > div.ProductDetails_container__0vRlj.ProductDetails_AS__WcrW_ > p.MuiTypography-root.MuiTypography-body1.ProductDetails_description__7hqm9.css-12a9y8i'))
            product_description = product_description_element.text.strip()

            # Append scraped data to the list
            data.append(('Allen Solly', 'Men', title, detail, product_description))
            # Close the new tab
            print("Closed new tab.")

            # Switch back to the original tab
            print("Switched back to the original tab.")

            # Wait for the page to reload
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, f'//div[contains(@class, "ProductCard_title__9M6wy") and contains(text(), "{title}")]'))
        except Exception as e:
            print(f"Error occurred while processing product detail: {str(e)}")

    # Convert the scraped data into a pandas DataFrame
    df = pd.DataFrame(data, columns=['Brand', 'Category', 'Title', 'Product Detail', 'Product Description'])

    # Export the DataFrame to an Excel file
    df.to_excel('scraped_data.xlsx', index=False)


  1. 我删除了


  2. 我删除了


  3. 我更改了“加载更多”按钮的处理。我添加了一个变量

    ,用于跟踪要加载的页面数。每页包含 32 个产品。

  4. 我更改了页面滚动方法,因为它是一次性滚动整个页面,不会加载每个产品。您需要一次滚动一页才能强制加载所有产品。



import time
import pandas as pd

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

# Set up the Selenium WebDriver for Firefox
driver = webdriver.Chrome()

# Navigate to the main shirts page
url = 'https://allensolly.abfrl.in/c/men-shirts'
wait = WebDriverWait(driver, 10)

# wait for the popup to appear and close
popup_root = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "ct-web-popup-imageonly"))).shadow_root
close_button = popup_root.find_element(By.ID, "close")
driver.execute_script("arguments[0].click()", close_button)

# click LOAD MORE
load_more = wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//button[text()='LOAD MORE']")))
print(f"first len(load_more): {len(load_more)}")
count = 1
max_pages = 3 # each page contains 32 products
while load_more and count < max_pages:
    driver.execute_script("arguments[0].click()", load_more[0])
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.loader-wrapper")))
    wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "div.loader-wrapper")))
    load_more = wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//button[text()='LOAD MORE']")))
    count += 1

# scroll page by page to the bottom
current_position = driver.execute_script("return window.scrollY + window.innerHeight")
height = driver.execute_script("return document.body.clientHeight")
while current_position < height:
    driver.execute_script("window.scrollBy(0, window.innerHeight);")
    current_position = driver.execute_script("return window.scrollY + window.innerHeight")

product_info = []
# Get the updated page source
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')

# Find all elements with the product title class and add to the list
products = soup.find_all('div', class_='ProductCard_productInfo__uZhFN')
for product in products:
    title_element = product.find('div', class_='ProductCard_title__9M6wy')
    detail_element = product.find('div', class_='ProductCard_description__BQzle')  # Assuming this class holds details
    if title_element and detail_element:
        title = title_element.text.strip()
        detail = detail_element.text.strip()
        product_info.append((title, detail))

# Print the number of unique products found
print(f"Number of unique products found: {len(product_info)}")

# Click on each product detail to open it in a new tab
data = []  # List to store scraped data
for idx, (title, detail) in enumerate(product_info[:100], start=1):
    print(f"{idx}. Title: {title}\n   Product: {detail}\n")
        # Find the product description element using the product title
        description_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, f'//div[contains(@class, "ProductCard_title__9M6wy") and contains(text(), "{title}")]/ancestor::div[contains(@class, "ProductCard_productInfo__uZhFN")]'))

        # Scroll the element into view and click it using JavaScript
        driver.execute_script("arguments[0].scrollIntoView(true);", description_element)
        driver.execute_script("arguments[0].click();", description_element)
        print("Opened product description.")
        # Wait for the new tab to open

        # Switch to the new tab
        print("Switched to new tab.")

        # Scraping the product description from the new tab using CSS selector
        product_description_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#__next > main > div > div:nth-child(1) > div > div.col-sm-5.col-md-5.col-lg-5 > div > div.PDPDetails_prodDescDetails__D7ddV > div.ProductDetails_container__0vRlj.ProductDetails_AS__WcrW_ > p.MuiTypography-root.MuiTypography-body1.ProductDetails_description__7hqm9.css-12a9y8i'))
        product_description = product_description_element.text.strip()

        # Append scraped data to the list
        data.append(('Allen Solly', 'Men', title, detail, product_description))
        # Close the new tab
        print("Closed new tab.")

        # Switch back to the original tab
        print("Switched back to the original tab.")

        # Wait for the page to reload
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, f'//div[contains(@class, "ProductCard_title__9M6wy") and contains(text(), "{title}")]'))
    except Exception as close_button:
        print(f"Error occurred while processing product detail: {str(close_button)}")

# Convert the scraped data into a pandas DataFrame
df = pd.DataFrame(data, columns=['Brand', 'Category', 'Title', 'Product Detail', 'Product Description'])

# Export the DataFrame to an Excel file
df.to_excel('scraped_data.xlsx', index=False)

