我正在尝试通过 selenium chrome Web 驱动程序从 Amazon PDP https://www.amazon.co.uk/SheaMoisture-Treatment-silicone-sulfate-transitioning/dp/B01HOD3ZVQ/ 提取价格。
尝试过:
我指的是下面截图中显示的附加价格。
[Buybox价格SS][1]
try:
# Wait for the "Price" element to be present using WebDriverWait
price_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, '//span[contains(@class, "apexPriceToPay")]/span[contains(@class, "a-offscreen")]'))
#EC.presence_of_element_located((By.XPATH, '//*[@id="corePriceDisplay_desktop_feature_div"]/div/div/span[1]/span[1]'))
#EC.presence_of_element_located((By.XPATH, '//*[@id="corePrice_feature_div"]/div/div/span[1]/span[1]'))
)
[1]: https://i.stack.imgur.com/PIn1M.png
这里我使用Edge,你可以使用chrome。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.edge.options import Options
import time
options = Options()
driver_service = Service(EdgeChromiumDriverManager().install())
driver = webdriver.Edge(service=driver_service, options=options)
driver.get(f"https://www.amazon.co.uk/SheaMoisture-Treatment-silicone-sulphate-transitioning/dp/B01HOD3ZVQ/")
time.sleep(10) # Wait for 10 seconds
# Extract RRP
rrp_element = driver.find_element(By.CSS_SELECTOR, "span.a-text-price[data-a-color='secondary']")
rrp = rrp_element.text
# Extract Price
price_element = driver.find_element(By.CSS_SELECTOR, "span.apexPriceToPay")
price = price_element.text
# Extract Savings
savings_element = driver.find_element(By.XPATH, "//td[contains(text(), 'You Save:')]/following-sibling::td/span[@class='a-color-price']")
savings = savings_element.text
# Extract Price per 100 ml
price_per_100ml_element = driver.find_element(By.CSS_SELECTOR, "span.aok-relative span.a-color-price")
price_per_100ml = price_per_100ml_element.text
print(f"RRP: {rrp}")
print(f"Price: {price}")
print(f"You Save: {savings}")
print(f"Price per 100 ml: {price_per_100ml}")