使用Python从网站下载csv数据

问题描述 投票:0回答:1

我正在尝试从此页面下载 csv 数据https://bancadatistatisticaoas.inail.it/analytics/saw.dll?Portal&PortalPath=%2Fshared%2FBDS%2F_portal%2FINF_Denunciati_totale_gestioni 底部有链接“Esporta”,然后是 dati,然后是 csv。我正在 python 中尝试使用此代码,但我下载的唯一内容是一个 html 文件。我做错了什么?谢谢

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Setup Chrome options
chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
  "download.default_directory": r"C:\Users\ColameoMarcello\Downloads",
  "download.prompt_for_download": False,
})

# Initialize the WebDriver
driver = webdriver.Chrome(options=chrome_options)

try:
    # Navigate to the page
    driver.get("https://bancadatistatisticaoas.inail.it/analytics/saw.dll?Portal&PortalPath=%2Fshared%2FBDS%2F_portal%2FINF_Denunciati_totale_gestioni")

    # Wait for the "Esporta" button and click it
    esporta_button = WebDriverWait(driver, 200).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="d:dashboard~p:uiqor2i5pr25913u~r:cadc28dh9ejqrg6oLinks"]/tbody/tr/td/a'))
    )
    esporta_button.click()

    # Wait for the "Dati" button and click it
    dati_button = WebDriverWait(driver, 40).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="popupMenuItem"]/table/tbody/tr/td[2]'))
    )
    dati_button.click()

    # Wait for the "CSV" button and click it
    csv_button = WebDriverWait(driver, 40).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="popupMenuItem"]/table/tbody/tr/td[2]'))
    )
    csv_button.click()

    # Wait for the download to finish (adjust time as needed)
    # This is a simple sleep; for a more robust solution, check the download directory for a new file
    import time
    time.sleep(10)

finally:
    # Clean up by closing the browser
    driver.quit()
python selenium-webdriver
1个回答
0
投票

这是重构后的工作代码:

# Navigate to the page
driver.get("https://bancadatistatisticaoas.inail.it/analytics/saw.dll?Portal&PortalPath=%2Fshared%2FBDS%2F_portal%2FINF_Denunciati_totale_gestioni")
wait = WebDriverWait(driver, 90)

# Wait for the "Esporta" button and click it
wait.until(EC.element_to_be_clickable((By.NAME, 'ReportLinkMenu'))).click()

# Wait for the "Dati" button and click it
wait.until(EC.element_to_be_clickable((By.XPATH, "//td[text()='Data']"))).click()

# Wait for the "CSV" button and click it
wait.until(EC.element_to_be_clickable((By.XPATH, "//td[text()='CSV']"))).click()
© www.soinside.com 2019 - 2024. All rights reserved.