我已经尝试了很多次将图像下载为 pdf ppt 或 zip 文件,但我没有得到正确的解决方案。希望你能帮助我。附上整个代码,您可以修改它。
我正在尝试抓取slideshare.net并将图像分别输出为pdf、ppt和zip文件。 但我在做的过程中遇到了很多错误。我正在烧瓶应用程序中执行此操作。
不知道是否鼓励在stackoverflow中提供完整的代码。我已经在这里使用 BeautifulSoup 模块完成了。其余的解释我在代码中作为注释给出。
import requests
from bs4 import BeautifulSoup as bs
import os
from fpdf import FPDF
from PIL import Image
import zipfile as zp
url = '<Your URL here>'
response = requests.get(url)
soup = bs(response.content, 'html.parser')
# Find all image tags
img_tags = soup.find_all('img')
# Create 2 lists of image_urls and file names of the image file.
img_urls = []
file_names = []
# Iterate thru img_tags
for x in img_tags:
# Get image_urls and append to the list img_urls
img_url = x.get('src')
if img_url:
img_urls.append(img_url)
#to get the file name from the image url,
#reverse the image url
file_name = img_url[::-1]
# get the reversed file name by slicing
if file_name.find('/') == 0:file_name == file_name[1:]
idx = file_name.find('/')
file_name = file_name[:idx]
#reverse to get the file name, append to list file_names
file_name = file_name[::-1]
file_names.append(file_name)
# Download Images
# Iterate thru img_urls
for i, x in enumerate(img_urls):
# Get the image_file content as binary
img_data = requests.get(x).content
#create a file with file names stored in the list file_names write and save the img content as binary
with open(f'images/{file_names[i]}', 'wb') as img_file:
img_file.write(img_data)
# Create a directory to save in your local computer
if not os.path.exists('images'): os.makedirs('images')
#To save as pdf
pdf = FPDF()
# Iterate thru img_urls
for i in range(len(img_urls)):
pdf.add_page()
pdf.image(f'images/{file_names[i]}', x=10, y=10, w=pdf.w - 20)
pdf.output('images.pdf', 'F')
# Create a PPT with the Images
prs = Presentation()
for i in range(len(img_urls)):
slide_layout = prs.slide_layouts[5] # Use a blank slide layout
slide = prs.slides.add_slide(slide_layout)
img_path = f'images/{file_names[i]}'
slide.shapes.add_picture(img_path, Inches(1), Inches(1), width=Inches(8), height=Inches(6))
prs.save('images.pptx')
# Create zipfile
with zp.ZipFile('images.zip', 'w') as zipf:
for i in range(len(img_urls)):
img_path = f'images/{file_names[i]}'
zipf.write(img_path, os.path.basename(img_path))
prs.save('images.pptx')
# Save image as Pdfs
pdf.add_page()
pdf.image(f'images/{file_name}', x=10, y=10, w=pdf.w - 20)
pdf.output('images.pdf', 'F')
img_path = f'images/{file_name}'
slide.shapes.add_picture(img_path, Inches(1), Inches(1), width=Inches(8), height=Inches(6))
with zipfile.ZipFile('images.zip', 'w') as zipf:
img_path = f'images/{file_name}'
slide_layout = prs.slide_layouts[5] # Use a blank slide layout
slide = prs.slides.add_slide(slide_layout)