我正在尝试抓取该网站https://support.sw.siemens.com/en-US/。 登录网站后弹出带有 cookie 模型的弹出窗口 Screenshot
如何处理?
我尝试通过 XPATH 缩小到一个按钮并单击一下,最终出现以下错误
2024-10-02 22:24:13,090 - 错误 - 导航错误:消息:元素单击被拦截:元素 在点 (465, 110) 处不可单击。其他元素将收到点击:
import logging
import time # For time tracking
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import os
from bs4 import BeautifulSoup
from docx import Document
# Constants for URLs and directory paths
LOGIN_URL = "https://support.sw.siemens.com/signin"
DOWNLOAD_DIR = 'downloaded_pages'
# Configure the logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.FileHandler("web_scraping.log"), logging.StreamHandler()])
def init_web_driver():
"""
Initializes the WebDriver with Chrome options and returns the driver object.
"""
options = webdriver.ChromeOptions()
#options.add_argument("--headless")
service = Service(ChromeDriverManager().install())
return webdriver.Chrome(service=service, options=options)
def login(driver, email, password):
"""
Logs into the Siemens support page using the provided credentials.
"""
driver.get(LOGIN_URL)
try:
# Step 2: Enter email
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(email)
driver.find_element(By.NAME, "action").click()
# Step 3: Enter password
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "password"))).send_keys(password)
driver.find_element(By.NAME, "action").click()
# Step 4: Wait for successful login
WebDriverWait(driver, 30).until(EC.url_contains("support.sw.siemens.com"))
logging.info("Login successful")
except Exception as e:
logging.error(f"Error during login: {e}")
driver.quit()
raise
def navigate(driver):
"""
Navigates to Teamcenter ITK Function Reference page.
"""
# Click on Cokkies pop up window
driver.implicitly_wait(10) # Implicit wait
# Click the 'Accept All Cookies' button using XPath or CSS Selector
try:
accept_button = driver.find_element(By.XPATH, '//*[@id="uc-center-container"]/div[2]/div/div/div/button[1]')
accept_button.click()
except:
print("Accept All Cookies button not found")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#products-subnav"))).click()
def main(email, password):
"""
Main function to initiate web scraping and calculate total time taken.
"""
start_time = time.time() # Record start time
driver = init_web_driver()
try:
login(driver, email, password)
navigate(driver)
#scrape_links(driver)
except Exception as e:
logging.error(f"An error occurred: {e}")
finally:
driver.quit()
end_time = time.time() # Record end time
total_time = end_time - start_time # Calculate total time
logging.info(f"Total time taken: {total_time:.2f} seconds")
if __name__ == "__main__":
email = "" # Replace with your email
password = "" # Replace with your password
main(email, password)
我知道该网站需要登录,当您以私人模式打开该网站时,您可以查看相同的弹出窗口https://support.sw.siemens.com/en-US/.
我需要一段代码,它会自动单击接受cookie按钮
def handle_cookies(driver, action):
"""
Handle the cookies consent dialog.
:param driver: The Selenium WebDriver instance.
:param action: The action to perform on the cookies dialog ('accept' or 'deny').
"""
shadow_host = driver.find_element(By.ID, "usercentrics-root")
shadow_root = shadow_host.shadow_root
if action == 'accept':
cookies_button = shadow_root.find_element(By.CSS_SELECTOR, "button[data-testid='uc-accept-all-button']")
elif action == 'deny':
cookies_button = shadow_root.find_element(By.CSS_SELECTOR, "button[data-testid='uc-deny-all-button']")
else:
raise ValueError("Invalid cookies action. Use 'accept' or 'deny'.")
cookies_button.click()
cookie 弹出窗口在 Shadow DOM 中打开,并捕获文档上的所有点击事件。 Shadow DOM 元素是封装的,无法通过常规 DOM 遍历方法访问,因此有 Shadow_host.shadow_root。