目前,我和我的搭档正在编译一个函数,将 LaTeX 数学表达式转换为 PNG 文件,使用 Python 作为编程语言,使用 XeLaTeX 作为 LaTeX 编译器。这是我的代码。有一些字母是用我的母语写的,所以看起来很奇怪):
\`from sympy import \*
import random
import os
import subprocess
#import pdf2image
from pdf2image import convert_from_path
import cv2
from concurrent.futures import ProcessPoolExecutor, as_completed
import pdfplumber
import pymupdf
import wand
from wand.image import Image
#PRACTICE MODE:
#1.TOPIC: TRIGONOMETRY
#Question type 1: Theory
#LaTeX-\>self-compiled function has corresponding PDF
def compile_latex(file_path):
os.chdir(os.path.dirname(file_path))
try:
subprocess.run(\['xelatex', file_path\], check=True)
except subprocess.CalledProcessError as e:
print("Error compiling LaTeX file", e)
exit(1)
pdf_file_path = file_path.replace('.tex', '.pdf')
if not os.path.exists(pdf_file_path):
print("PDF file was not created")
exit(1)
return pdf_file_path
def convert_pdf_to_png(pdf_file_path):
png_file_path = pdf_file_path.replace('.pdf', '.png')
try:
with Image(filename=pdf_file_path, resolution=240) as img:
img.compression_quality = 80
img.save(filename=png_file_path)
print("PDF successfully converted into PNG:", png_file_path)
except Exception as e:
print("Error converting PDF to PNG: ", e)
return png_file_path
def convert_multiple_pdf_to_png(pdf_files):
results=\[\]
with ProcessPoolExecutor() as executor:
futures={executor.submit(convert_pdf_to_png,pdf_file): pdf_file for pdf_file in pdf_files}
for future in as_completed(futures):
result=future.result()
results.append(result)
return results
#Note!: This is the function I wrote myself to compile individual image files. Please read the operating principle.
def MakeFile(phuongan,modauchofilelatex,endtext,count):
#Set variable b, which plays a key role in identifying which is the question file and which are the 4 corresponding answer files. Thanks to it, we can compile successfully and as desired
#READ the conditions CAREFULLY to help the machine know when to print the question file and when to print 4 corresponding answer files.
b=0
folder="C:/Users/HP/Downloads/001_QO_folder"
#pdf_files=[os.path.join(folder,file) for file in os.listdir(folder) if file.endswith('.pdf')]
for i in range(len(phuongan)):
if(b==0):
#Create a ques file, set the count variable to type the question status, because after each print of 5 files containing questions and 4 answers, I will increase count+1.
file_path= "C:/Users/HP/Downloads/001_QO_folder/001_Q0_%s_QUES.tex"%str(count)
#pdf_file_path=os.path.abspath(file_path.replace('.tex','.pdf'))
#png_file_path=os.path.abspath(file_path.replace('.tex','.png'))
file=open("C:/Users/HP/Downloads/001_QO_folder/001_Q0_%s_QUES.tex"%str(count),'w',encoding='utf-8')
file.write(modauchofilelatex+phuongan[0]+endtext)
file.close()
pdf_file_path=os.path.abspath(file_path.replace('.tex','.pdf'))
#dvi_file_path=file_path.replace('.tex','.dvi')
#xdv_file_path=file_path.replace('.tex','.xdv')
compile_latex(file_path)
#Convert pdf to image
#image=convert_from_path(pdf_file_path)
#Save as png format
#image[0].save(png_file_path,'PNG')
#Delete redundant files through their paths
convert_pdf_to_png(pdf_file_path)
#os.remove(file_path)
#os.remove(pdf_file_path)
#Each time the question is printed, the b's variable number will increase from 0 to 1 -> the score is not satisfied -> from there it can be identified when printing out 4 answers.
b=b+1
else:
file_ans=open("C:/Users/HP/Downloads/001_QO_folder/001_Q0_%s_ANS%s.tex"%(str(count),str(i)),'w', encoding='utf-8')
file_ans.write(modauchofilelatex+phuongan[i]+endtext)
file_ans.close()
file_ans_path = "C:/Users/HP/Downloads/001_QO_folder/001_Q0_%s_ANS%s.tex"%(str(count),str(i))
pdf_file_ans_path=file_ans_path.replace('.tex','.pdf')
png_file_ans_path=file_ans_path.replace('.tex','.png')
#dvi_file_ans_path=file_ans_path.replace('.tex','.dvi')
#xdv_file_ans_path=file_ans_path.replace('.tex','.xdv')
compile_latex(file_ans_path)
#imageans=convert_from_path(pdf_file_ans_path)
#imageans[0].save(png_file_ans_path,'PNG')
convert_from_path(pdf_file_ans_path)
#os.remove(file_ans_path)
#os.remove(pdf_file_ans_path)
#results= convert_multiple_pdf_to_png(pdf_files)
#for result in results:
#print(result)
我尝试运行它几次。代码确实按照我的预期将 PDF 转换为 PNG,因为这是我能想到的唯一方法。但它打印出来的是整页。这是我所期望的[在此处输入图像描述](https://i.sstatic.net/BMr9Qvzu.png),但我无法做到这一点。总而言之,有什么办法可以解决这个问题吗?
也许看看 pdfCropMargins :“pdfCropMargins 程序是一个命令行应用程序,用于自动裁剪 PDF 文件的边距。”但它也可以在 python 中工作,如下面的文档示例所示。不过,您可能需要调整函数参数。
from pdfCropMargins import crop
crop(["-p", "20", "-u", "-s", "paper1.pdf"])
crop(["-p", "0", "-gui", "paper2.pdf"])