我尝试用它的url下载excel文件,但我只能得到js代码,我不知道如何获得正确的文件,而不仅仅是js代码。
我的代码:
# -*- coding: utf-8 -*-
from selenium import webdriver
import io
import re
path = 'C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe'
download_url ="http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1540501658076.xls" #url i have
chrome_options = webdriver.ChromeOptions()
#chrome_options.add_argument('--headless') #headless mode
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': 'd:\\new'}
chrome_options.add_experimental_option('prefs', prefs)
client = webdriver.Chrome(path,chrome_options=chrome_options)
try:
client.get(download_url)
except TimeoutError:
print("time too long")
print(client.page_source)
client.quit()
任何帮助都会很感激
虽然打印输出不会改变,但添加一个短暂的等待,以便文件可以下载
# -*- coding: utf-8 -*-
from selenium import webdriver
import time
path = 'C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe'
download_url ="http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1540501658076.xls" #url i have
chrome_options = webdriver.ChromeOptions()
#chrome_options.add_argument('--headless') #headless mode
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': 'd:\\new'}
chrome_options.add_experimental_option('prefs', prefs)
client = webdriver.Chrome(path,chrome_options=chrome_options)
try:
client.get(download_url)
time.sleep(5)
except TimeoutError:
print("time too long")
print(client.page_source)
client.quit()