这是代码:
import os
from PIL import Image
import fitz
def pdf_to_tiffs(pdf_path, output_folder):
# Check if the output folder exists, and create it if it doesn't
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Open the PDF file
pdf_document = fitz.open(pdf_path)
# Determine the number of digits needed for numbering the pages
num_pages = len(pdf_document)
num_digits = len(str(num_pages))
# Convert each page to TIFF with the desired settings
for i in range(1, num_pages + 1):
page = pdf_document[i - 1] # Adjust index to start from 0
pix = page.get_pixmap()
# Convert pixmap to Pillow Image
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Set DPI (dots per inch)
img_info = img.info
img_info['dpi'] = (300, 300)
# Generate filename with leading zeros based on the maximum number of pages
filename = f"page_{str(i).zfill(num_digits)}.tif"
# Save image with CCITT T.6 compression
tiff_path = os.path.join(output_folder, filename)
img.save(tiff_path, compression='tiff_ccitt', dpi=(300, 300))
pdf_document.close()
# Example usage
pdf_to_tiffs("C:\\Users\\ftp\\flask\\output\\90000026.pdf", "C:\\Users\\ftp\\flask\\output")
Tiff 的提取是完美的,但我无法进行压缩
CCITT T1.6
以及垂直和水平 300 DPI。