如何自动从网页下载数据?

问题描述 投票:0回答:1

我正在尝试使用 Python 和 Selenium 自动从网站下载 Excel 文件。我尝试过 XPath,但我认为它比我的代码更复杂。就像按钮中的选择器一样。任何建议都会有用。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time

xpath_ship_movements="//*[@id='Traffic']"
# xpath_ship_movements="xpath_full='/html/body/form/div/div[2]/div[1]/div[2]/ul/li[2]/a'"
xpath_days='//*[@id="rptGrid"]/div/div[2]/div[1]/div[2]/a[1]'
xpath_tools='//*[@id="rptGrid"]/div/div[2]/div[1]/div[2]/a[3]'
xpath_export='//*[@id="MSQ-WEB-0001"]'
url="https://qships.tmr.qld.gov.au/webx/#"


driver = webdriver.Edge()

# Open the webpage
driver.get(url)
driver.maximize_window()

wait = WebDriverWait(driver, 20)

# Wait for and click the "Ship Movements" button
ship_movements_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_ship_movements)))
ship_movements_button.click()

# Wait for and click the "Next 7 days" button
next_7_days_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_days)))
next_7_days_button.click()

# Wait for and click the Tools button
tools_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_tools)))
tools_button.click()

# Wait for and click the Export to Excel option
export_to_excel = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_export)))
export_to_excel.click()

# Wait for the export to complete
time.sleep(10)

# Close the browser
driver.quit()
python selenium-webdriver automation download
1个回答
0
投票

我认为您需要在本地计算机上安装用于浏览器的 WebDriver。例如,如果您使用的是 MS Edge,请从 Microsoft Edge WebDriver 页面下载并安装 WebDriver。

安装完成后,尝试下面修改后的代码(记得替换为已安装驱动的路径):

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def download_excel_file(url, xpath_ship_movements, xpath_days, xpath_tools, xpath_export):
    try:
        # Initialize the WebDriver (replace with the path of installed driver)
        driver = webdriver.Edge(executable_path='path/to/msedgedriver.exe')

        # Open the webpage
        driver.get(url)
        driver.maximize_window()

        wait = WebDriverWait(driver, 20)

        # Wait for and click the "Ship Movements" button
        ship_movements_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_ship_movements)))
        ship_movements_button.click()

        # Wait for and click the "Next 7 days" button
        next_7_days_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_days)))
        next_7_days_button.click()

        # Wait for and click the Tools button
        tools_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_tools)))
        tools_button.click()

        # Wait for and click the Export to Excel option
        export_to_excel = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_export)))
        export_to_excel.click()

        # Wait for the export to complete (you can adjust the sleep time as needed)
        time.sleep(10)

        # Close the browser
        driver.quit()
        print("Excel file downloaded successfully.")
    except Exception as e:
        print(f"Error occurred: {str(e)}")

# Example usage
url = "https://qships.tmr.qld.gov.au/webx/#"
xpath_ship_movements = "//*[@id='Traffic']"
xpath_days = '//*[@id="rptGrid"]/div/div[2]/div[1]/div[2]/a[1]'
xpath_tools = '//*[@id="rptGrid"]/div/div[2]/div[1]/div[2]/a[3]'
xpath_export = '//*[@id="MSQ-WEB-0001"]'

download_excel_file(url, xpath_ship_movements, xpath_days, xpath_tools, xpath_export)
© www.soinside.com 2019 - 2024. All rights reserved.