我有一个包含不同部分的 Excel 文件,每个部分都有其标题,我提供了其名称(它们在第二列上还附有一个数字)。在第一种方法中,我想在屏幕上读取并打印每个部分的单元格。这是我使用的代码:
section_headers = [ 'LIBROS', 'CAPÍTULOS DE LIBROS','ARTÍCULOS CIENTÍFICOS (ORIGINALES Y DE REVISIÓN)', 'ARTÍCULOS DE DIVULGACIÓN','PONENCIAS PRESENTADAS','CARTELES PRESENTADOS EN CONGRESOS', 'CURSOS IMPARTIDOS','SEMINARIOS IMPARTIDOS','PROGRAMAS DE RADIO Y TV','PODCASTS','SOFTWARE', 'PATENTES', 'MODELOS DE UTILIDAD', 'PROTOTIPO', 'PRÁCTICAS PROFESIONALES', 'SERVICIO SOCIAL', 'TESIS','OTROS (Ej. Manuales, procesos)']
def process_excel(file_path, section_headers):
df = pd.read_excel(file_path, skiprows=2)
sections = {}
current_section = None
section_data = []
for idx, row in df.iterrows():
if any(header in str(row[0]) for header in section_headers) and isinstance(row[1], (int, float)):
if current_section is not None:
sections[current_section] = pd.DataFrame(section_data)
current_section = row[0] # The first column is the section name
section_data = [] # Reset data for the new section
else:
# Add row data to current section
section_data.append(row)
if current_section is not None:
sections[current_section] = pd.DataFrame(section_data)
for section, data in sections.items():
print(f"\nSection: {section}")
print(data)
现在,当我调用该函数时:
process_excel(file_path='wherever the file is", section_headers)
我什么也没得到。我正在 google colab 上运行它。我已经给 pandas 和 openpyxl 充电了。关于为什么我看不到任何东西(甚至没有错误消息)的任何想法?
将 pandas 导入为 pd
section_headers = [ 'LIBROS', 'CAPÍTULOS DE LIBROS', 'ARTÍCULOS CIENTÍFICOS (ORIGINALES Y DE REVISIÓN)', 'ARTÍCULOS DE DIVULGACIÓN', 'PONENCIAS PRESENTADAS', 'CARTELES PRESENTADOS EN CONGRESOS', “CURSOS IMPARTIDOS”、“SEMINARIOS IMPARTIDOS”、“广播电视节目”、“播客”、 '软件'、'专利'、'实用模型'、'PROTOTIPO'、'专业实践'、 “SERVICIO SOCIAL”、“TESIS”、“OTROS(例如手册、过程)” ]
def process_excel(文件路径,节标题): # 读取Excel文件,跳过不必要的行 df = pd.read_excel(文件路径,skiprows = 2)
# Initialize storage for sections
sections = {}
current_section = None
section_data = []
for idx, row in df.iterrows():
# Check if the row corresponds to a section header
if any(header in str(row[0]) for header in section_headers) and isinstance(row[1], (int, float)):
# Save the previous section's data
if current_section is not None and section_data:
sections[current_section] = pd.DataFrame(section_data, columns=df.columns)
# Start a new section
current_section = row[0]
section_data = [] # Reset for new section
else:
# Add the row to the current section's data
if not row.isnull().all(): # Skip completely empty rows
section_data.append(row.tolist())
# Save the last section
if current_section is not None and section_data:
sections[current_section] = pd.DataFrame(section_data, columns=df.columns)
# Print each section
for section, data in sections.items():
print(f"\nSection: {section}")
print(data)
file_path =“your_excel_file.xlsx” process_excel(文件路径,节标题)