从数组下载多个文件,然后使用python3

问题描述 投票:0回答:2
#Import desired library's -- Make HTTP Requests / Query DOM ELEMENTS

import requests
from bs4 import BeautifulSoup as bs
import zipfile

# Make requests to NGA site to, response stored in r (DOM)
r = requests.get('https://earth-info.nga.mil/index.php?dir=coordsys&action=gars-20x20-dloads')

# Parse data using Beautiful soup library, and the default HTML parser
soup = bs(r.content, 'html.parser')
# Output is pure RAW HTML DOM
# print(soup)

# Scan Dom tree and places desired href zip files into an array for future downloading -- Files array
files = ['https://earth-info.nga.mil/' + i['href'] for i in soup.select('area')]
# print(files)



#Download Single file from Array
# firstUrl = files[0]

# Download multiple files from Array
for file in files:
    r = requests.get(file, stream=True)
    save_path = '/Users/iga0779/Downloads/%s.zip'%r
    filex = open(save_path, 'wb')
    filex.write(downloadedfile.content)
    filex.close()
我目前在下一步的下一步挂了一点,我选择了下载目录作为我希望文件的位置,但我有点新,不确定如何正确写入目录。

您也可以尝试这个
python python-3.x web-scraping beautifulsoup python-requests
2个回答
0
投票
#Import desired library's -- Make HTTP Requests / Query DOM ELEMENTS import requests from bs4 import BeautifulSoup as bs import zipfile import os from zipfile import ZipFile from io import BytesIO headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0", "Accept-Encoding": "*", "Connection": "keep-alive" } # Make requests to NGA site to, response stored in r (DOM) r = requests.get('https://earth-info.nga.mil/index.php?dir=coordsys&action=gars-20x20-dloads') # Parse data using Beautiful soup library, and the default HTML parser soup = bs(r.content, 'html.parser') # Output is pure RAW HTML DOM # print(soup) # Scan Dom tree and places desired href zip files into an array for future downloading -- Files array files = ['https://earth-info.nga.mil/' + i['href'] for i in soup.select('area')] # print(files) mydirname = r'C:\\Users\\User\\Documents\\Downloads' for url in files: r = requests.get(url, headers=headers,stream=True) if r.status_code == 200: newfoldername = r.url.split('/')[-1] if not os.path.exists(newfoldername): os.mkdir(newfoldername) path_ = os.path.join(mydirname, newfoldername ) zipfile.ZipFile(BytesIO(r.content)).extractall(path_) print('Finished...')

您可以去

0
投票
,还可以下载您的文件:

for file in files:
    with requests.get(file, stream=True) as r:
            r.raise_for_status()

            with open(f'tmpZip/{file.split("/")[-1]}.zip', 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192): 
                    f.write(chunk)

示例
import requests
from bs4 import BeautifulSoup as bs
import zipfile

# Make requests to NGA site to, response stored in r (DOM)
r = requests.get('https://earth-info.nga.mil/index.php?dir=coordsys&action=gars-20x20-dloads')

# Parse data using Beautiful soup library, and the default HTML parser
soup = bs(r.content, 'html.parser')
# Output is pure RAW HTML DOM
# print(soup)

# Scan Dom tree and places desired href zip files into an array for future downloading -- Files array
files = ['https://earth-info.nga.mil/' + i['href'] for i in soup.select('area')]
# print(files)

def download_file(file):
    with requests.get(file, stream=True) as r:
        r.raise_for_status()
        with open(f'tmpZip/{file.split("/")[-1]}.zip', 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                f.write(chunk)
    return f'File: {file.split("/")[-1]}.zip -> downloaded'

#files sliced to first three urls from result, delet [:3] to get all
for file in files[:3]:
    print(download_file(file))

输出
File: 180W60N.zip -> downloaded
File: 180W40N.zip -> downloaded
File: 180W20N.zip -> downloaded


© www.soinside.com 2019 - 2025. All rights reserved.