我有一个 Excel 文件,该文件必须位于启用宏的工作簿 (.xlsm) 中,该工作簿具有名为
names_to_match
的命名范围。不同函数的输出将获取这些公司名称,将它们与数据库进行匹配,并返回前 5 个匹配项以及每个匹配项的匹配分数、匹配名称和匹配索引。
目标将是按 .xlsm 文件中的名称进行搜索,如果最高匹配分数为 100(假设名称为 A 列)。 B 列将是最高的匹配名称,C 列将是匹配分数 100。
棘手的部分是,如果匹配分数小于 100,则 b 列应该是所有匹配名称的数据验证列表,加上一个名为“创建新条目”的条目。
我遇到的问题是,在努力使用
openpyxl
让它工作几天后,我终于能够使用 xlsxwriter
让它在我的示例文件中工作。但是,当我尝试在实际文件上使用它时,它不起作用,因为 xlsxwriter 不支持 .xlsm 扩展名。
这是我在
openpyxl
中尝试过的代码。可以在openpyxl
中做到这一点吗?如果没有,.xlsm 文件扩展名是否还有其他解决方法?
from sqlupdate import data_frame_from_xlsx_range
import pandas as pd
from openpyxl import Workbook
def read_data():
# Read in the data from the two Excel files
df_names = data_frame_from_xlsx_range('excel_write_test.xlsm', 'names_to_match')
df_db_pull = pd.read_excel('test_with_db_pull.xlsx')
return df_names, df_db_pull
def process_row(row, df_db_pull):
# Find the corresponding name in the test_with_db_pull dataframe
match_row = df_db_pull.loc[df_db_pull['original_name'] == row['Tracker_Name'], :]
# Check if the score is 100
if match_row['score_0'].values[0] == 100:
# Set the value in column B to the match index
row['Possible Matches'] = [match_row['match_name_0'].values[0]]
# Set the value in column C to the score
row['Match Score'] = match_row['score_0'].values[0]
else:
# Get the unique values in the match name columns
match_names = set(
match_row[['match_name_0', 'match_name_1', 'match_name_2',
'match_name_3', 'match_name_4']].values.ravel())
# Remove any NaN values
match_names.discard(float('nan'))
match_names = {x for x in match_names if pd.notnull(x)}
match_names = list(match_names) + ['Create New Database Entry']
# Set the value in column B to a list of the match names
row['Possible Matches'] = match_names
# Set the value in column C to the highest score
row['Match Score'] = match_row[['score_0', 'score_1', 'score_2', 'score_3', 'score_4']].max().values[0]
return row
def create_dropdowns(df):
# Get the workbook and the worksheet
workbook = Workbook()
worksheet = workbook.active
# Write the dataframe to the worksheet
df.to_excel(worksheet, index=False)
# Loop over the dataframe's 'Possible Matches' column and create a data validation for each row
for idx, item in enumerate(df['Possible Matches'], start=2):
# Prepare a list of options
options = item
# Create a data validation object with the list of options
dv = {'validate': 'list',
'source': options}
# Add data validation to the corresponding cell in the worksheet
worksheet.cell(row=idx, column=1).data_validation = dv
# Save the workbook
workbook.save_as('excel_write_test.xlsm')
def main():
df_names, df_db_pull = read_data()
# Process each row
df_names['Possible Matches'] = None
df_names['Match Score'] = None
df_names = df_names.apply(process_row, df_db_pull=df_db_pull, axis=1)
# Create the dropdowns in the Excel file
create_dropdowns(df_names)
if __name__ == '__main__':
main()