我遇到了 openpyxl 的问题,该问题似乎导致我的 Excel 工作簿损坏。我使用的是 openpyxl 版本 3.0.10。
这是我的代码的概述:
import openpyxl
import glob
from tkinter import Tk
from tkinter.filedialog import askdirectory
from openpyxl import __version__
# function to get existing data sources in a workbook
def get_existing_data_sources(file_path):
# Set data sources
data_sources = {}
# Try getting external links
try:
workbook = openpyxl.load_workbook(excel_file_path)
items = workbook._external_links
# Iterate through list and extract url
for index, item in enumerate(items):
# reformat link string
Mystr = workbook._external_links[index].file_link.Target
Mystr = Mystr.replace("file:///","")
# update key-pair
data_sources[index] = Mystr.replace("%20"," ")
except Exception as e:
print(f"An error occurred while extracting data sources: {str(e)}")
# return statement
return(data_sources, workbook)
# Function to modify the existing data sources in the workbook
def modify_data_source_links(data_sources, workbook):
try:
# Flag false
modified = False
# get workbook external links (unformatted links)
items = workbook._external_links
# Copy data sources to have them relinked
modified_data_sources = data_sources.copy()
# Correct data sources to new pattern
for i in modified_data_sources:
modified_data_sources[i] = modified_data_sources[i].replace('<OLD LINK>','<NEW LINK>')
# Enumerate through external links and replace values based on the link index in the cell
for index, item in enumerate(items):
link_str = data_sources.get(index)
if link_str:
# Reformat the new link string
new_link = modified_data_sources[index]
# Reformat newlink (make spaces to %20)
new_link = new_link.replace(" ","%20")
# Replace link target
workbook._external_links[index].file_link.Target = new_link
# flag modified as true
modified = True
if modified:
print("Data source links modified successfully.")
else:
print("No data source links matching the old data source found.")
# Return Error
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
# Path to the workbook
path = askdirectory(title='Select Folder') # shows dialog box and return the path
print(path)
excel_file_paths = glob.glob('{}/*.xlsx'.format(path), recursive = True)
# Process each file
for excel_file_path in excel_file_paths:
# get existing data sources
existing_data_sources, workbook = get_existing_data_sources(excel_file_path)
# if data sources do not exist, prompt message
if not existing_data_sources:
print("No data source links found in the workbook. Moving on...")
# else relink the data sources
else:
print("Existing data source links:")
for i, data_source in enumerate(existing_data_sources, 1):
print(f"{i}. {data_source}")
modify_data_source_links(existing_data_sources, workbook)
# Save workbook
excel_file_path = excel_file_path.replace('\\','/')
workbook.save(excel_file_path)
workbook.close()
print('Modified the links in: {}, moving on...'.format(excel_file_path))
print('Script completed!')
代码运行没有出现任何错误,并且工作簿已成功生成。但是,当我尝试写入工作簿时,Excel 显示一条错误消息,指出文件已损坏并提示进行恢复。在 Excel 工作簿中处理外部链接时,是否有其他人遇到过 openpyxl 的此问题?我是否在代码中遗漏了某些内容,或者是否有解决方法可以防止使用 openpyxl 修改外部链接时工作簿损坏?任何见解或解决方案将不胜感激。谢谢!
试试这个方法: 工作簿 = load_workbook(文件名= file_path,data_only=True,keep_links=False)