我用PDF版本2.0.27写了一个去除水印的程序,但是这个方法只能去除内容下面的水印,不能去除上面的水印。是否可以从内容中删除水印并将删除的水印另存为新的 PDF 文件?非常感谢!
我想找到正确的学习代码
我尝试了以下代码,该代码是根据 pymupdf github 讨论查找并删除 PDF 文件中的水印中的代码进行了一些修改。它在当前的 pymupdf 版本上工作正常。
pip install PyMuPDF
import pymupdf
def process_page(page : pymupdf.Page):
"""Process one page."""
# doc = page.parent # the page's owning document
# page.clean_contents() # clean page painting syntax
xref = page.get_contents()[0] # get xref of resulting /Contents
changed = 0 # this will be returned
# read sanitized contents, splitted by line breaks
cont_lines = page.read_contents().splitlines()
print(len(cont_lines))
# print(cont_lines)
for i in range(len(cont_lines)): # iterate over the lines
line = cont_lines[i]
# print(line)
if not (line.startswith(b"/Artifact") and b"/Watermark" in line):
continue # this was not for us
# line number i starts the definition, j ends it:
print(line)
j = cont_lines.index(b"EMC", i)
for k in range(i, j):
# look for image / xobject invocations in this line range
do_line = cont_lines[k]
if do_line.endswith(b"Do"): # this invokes an image / xobject
cont_lines[k] = b"" # remove / empty this line
changed += 1
if changed > 0: # if we did anything, write back modified /Contents
doc.update_stream(xref, b"\n".join(cont_lines))
return changed
fpath = 'your_pdf_file_path/file_name.pdf'
doc = pymupdf.open(fpath)
changed = 0 # indicates successful removals
for page in doc:
changed += process_page(page) # increase number of changes
if changed > 0:
x = "s" if doc.page_count > 1 else ""
print(f"{changed} watermarks have been removed on {doc.page_count} page{x}.")
doc.ez_save(doc.name.replace(".pdf", "-nowm.pdf"))
else:
print("Nothing to change")