我的目标是创建一个方法,该方法采用产品名称并检索包含该产品的所有账单。
这是我的代码:
from openpyxl import Workbook, load_workbook
file_path = 'C:/Users/Mohamed Hamdi/Desktop/bills.xlsx'
total_bills = []
bills_workbook = load_workbook(file_path)
bills_sheet = bills_workbook.active
for row in range(1, bills_sheet.max_row + 1):
bill_info = {'bill number': None, 'history': None, 'products': [], 'total price': None}
products = []
for col in range(1, 7):
cell_value = str(bills_sheet.cell(row=row, column=col).value)
if 'None' not in cell_value: # Check if cell_value is not None and not empty
if "Bill Number" in cell_value:
products.append({'Bill Number': bills_sheet.cell(row=row, column=col + 1).value})
elif "History" in cell_value:
products.append({'History': bills_sheet.cell(row=row, column=col + 1).value})
elif "Total price" in cell_value:
products.append({'Total price': bills_sheet.cell(row=row, column=col + 1).value})
elif col==6:
products.append({'product name' : bills_sheet.cell(row=row, column=3).value})
products.append({'count': bills_sheet.cell(row=row, column=4).value})
products.append({'price': bills_sheet.cell(row=row, column=5).value})
products.append({'total price': bills_sheet.cell(row=row, column=6).value})
def find_rows_between_start_and_end(sheet_data, search_key, search_value, start_element, end_element):
# Initialize the list to store results
result_rows_list = []
# Loop through the sheet data to find all occurrences of the search value
for entries in sheet_data:
found_search_value = False
temp_result_rows = []
for idx, item in enumerate(entries):
if isinstance(item, dict) and search_key in item:
if item[search_key] == search_value:
found_search_value = True
search_index = idx
start_index = None
end_index = None
# Find the first occurrence of the start element before the found element
for j in range(search_index, -1, -1):
if start_element in entries[j].values():
start_index = j
break
# Find the first occurrence of the end element after the start element
if start_index is not None:
for k, sub_item in enumerate(entries[start_index:], start=start_index):
if end_element in sub_item.values():
end_index = k
break
# If both start and end elements are found, extract rows within the range
if start_index is not None and end_index is not None:
temp_result_rows.extend(entries[start_index:end_index + 1])
# If search value is found, add the collected rows to the result list
if found_search_value:
result_rows_list.append(temp_result_rows)
return result_rows_list
search_key = "product name"
search_value = "product"
start_element = "Bill number"
end_element = "Total price" # Corrected end element name
result_rows_list = find_rows_between_start_and_end(products, search_key, search_value, start_element, end_element) # Pass 'data' instead of 'result'
for result_rows in result_rows_list:
for row in result_rows:
print(row)
这就是数组 products 的样子:
[{'Bill number': 31}, {'Date': '2024-04-16 04:39:44'}]
[{'product name': 'product1'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'product name': 'product1'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'Total price': 20}]
[]
[{'Bill number': 32}, {'Date': '2024-04-16 04:41:51'}]
[{'product name': 'product'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'product name': 'product'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'product name': 'product'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'Total price': 30}]
[]
[{'Bill number': 33}, {'Date': '2024-04-16 04:44:10'}]
[{'product name': 'product'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'product name': 'product'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'product name': 'product'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'Total price': 30}]
当我尝试检索账单表时,它没有给我任何信息或不完整的数据。
如果您有像我想要的那样直接检索数据的方法,请分享。
如果我搜索 product1 关键字,我期望的输出:
[{'Bill number': 31}, {'Date': '2024-04-16 04:39:44'}]
[{'product name': 'product1'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'product name': 'product1'}, {'count': 1}, {'price': 10}, {'total price': 10}]
[{'Total price': 20}]
我已更新 以包含“账单编号”31、32、33 的部分。为了使提取的数据更加清晰,我更改了某些行中的值以区分彼此。
假设产品名称是唯一的,您可以轻松使用 Pandas 提取产品“表”,使用该名称作为 Pandas 数据帧中的搜索词。
例如;搜索“product1”,获取行索引,然后将它们提取为数据框,包括上面和下面的“账单编号”和“总价格”数据行。
然后可以使用 Pandas 'to_excel' 将此提取的表直接写入新的或现有的 Excel 工作表
但是,当您希望数据作为使用特定键名称的字典列表时,您可以根据要求处理数据帧的每一行并放入字典列表中。
将输出的数据是这样的(记住一些值已被更改以确保唯一性)。
Data List
[{'Bill Number': '31.0'}, {'Date': '2024-04-16 04:39:44'}]
[{'product name': 'product1'}, {'count': '1'}, {'price': '10.0'}, {'total price': '20.0'}]
[{'product name': 'product1'}, {'count': '2'}, {'price': '20.0'}, {'total price': '40.0'}]
[{'Total price': '20'}]
代码示例
import pandas as pd
def search_df_table(xl_file, sheet, search):
dframe = pd.read_excel(xl_file, sheet_name=sheet)
### Extract indexes where the search term 'product1' in the column 'History'
idx_list = dframe.index[dframe['History'] == search].tolist()
### Also get the rows 1 above and below for Header and Total Price row
dframe = dframe.iloc[idx_list[0] - 1: idx_list[-1] + 2]
### Promote 'Bill number' row to the Header
dframe.columns = dframe.iloc[0]
dframe = dframe[1:]
return dframe
def format_dataframe(dframe):
### List to hold the extracted table
data_list = []
### Convert the extracted dataframe to comma separated string
dframe_csv = dframe.to_csv(header=True, index=False).strip('\n').split('\n')
### List of keys for the product name rows
product_keys = ['product name', 'count', 'price', 'total price']
### Process each data row and the last row with 'Total price',
### Add the key for each value then add to the data_list
for line in dframe_csv:
int_list = []
ln_list = line.strip(",|\r").split(",")
if 'Bill Number' in line:
for x in range(len(ln_list)):
if x % 2 == 1: continue # Skip 2nd element, this is the value for the key (1st element)
key = 'Date' if ln_list[x] == 'History' else ln_list[x] # Change name to 'Date' if 'History'
int_list.append({key: ln_list[x + 1]})
data_list.append(int_list)
elif 'Total price' in line:
data_list.append([{ln_list[0]: ln_list[1]}])
else:
product_values = line.strip(",|\r").split(",")
int_list = [{product_keys[i]: product_values[i]} for i in range(len(product_keys))]
data_list.append(int_list)
return data_list
file_name = 'bills.xlsx'
worksheet = 'Sheet1'
search_term = 'product1'
df = search_df_table(file_name, worksheet, search_term)
### The extracted dataframe can be written to Excel Sheet as is
print("\nWrite data to Excel sheet")
df.to_excel('new.xlsx', index=False, header=True)
### Or convert data into a list of dictionaries
print("Data List")
for row in format_dataframe(df):
print(row)