嗨,我正在尝试解析复杂的 csv。 以下是 csv 示例:
start/stop,read/write,address,data
Start,,,
, Write,,
,,22,
,,,0
Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
Stop,,,
我希望从上面的 csv 示例中获得如下所示的输出:
{
'Start': {
'Write': {
'Address': ['22'],
'Data': ['0']
},
'Read': {
'Address': ['22'],
'Data': ['E8', '22']
}
},
'Start repeat': {
'Read': {
'Address': ['22'],
'Data': []
}
}
}
逻辑尝试:
import pandas as pd
import csv
def extract_start_stop_lines(input_csv_path, output_txt_path):
# Read the original CSV file into a DataFrame
df = pd.read_csv(input_csv_path)
# Extract the desired columns
selected_columns = ['start/stop', 'read/write', 'address', 'data']
selected_data = df[selected_columns]
# Display header information
header = selected_data.columns.tolist()
print("Header:", header)
# Write header information to the output text file
with open(output_txt_path, 'w', newline='', encoding='utf-8') as outputfile:
outputfile.write(f"Header: {', '.join(header)}\n")
# Iterate through rows in the DataFrame
for _, row in selected_data.iterrows():
# Extract relevant information
start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
address = str(row['address']).strip() if pd.notna(row['address']) else None
data = str(row['data']).strip() if pd.notna(row['data']) else None
# Display extracted information
print(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}")
# Write the information to the output text file
outputfile.write(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}\n")
# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_txt_path = 'start_stop_lines.txt'
extract_start_stop_lines(input_csv_path, output_txt_path)
在上述代码之后,将生成新的文本文件,这里是文件的示例行:
Header: start/stop, read/write, address, data
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 0
Start/Stop: Start repeat, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Read, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: E8
Start/Stop: None, Operation: None, Address: None, Data: 22
Start/Stop: Stop, Operation: None, Address: None, Data: None
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 2
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: Stop, Operation: None, Address: None, Data: None
通常我想在这里实现的是每当有“Start”时,找到它的操作,无论是“Write”还是“Read”,以及它的地址,然后是它的数据值。
请帮忙。
给出以下输入.csv
col1,col2,col3,col4
Start,,,
, Write,,
,,22,
,,,0
Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
Stop,,,
您可以获得以下结果
{
"start": {
"write": {
"address": [
"22"
],
"data": [
"0"
]
}
},
"start repeat": {
"read": {
"address": [
"22"
],
"data": [
"e8",
"22"
]
}
}
}
使用以下代码
import json
with open('input.csv', 'r') as file:
csv_rows = [line.strip() for line in file.readlines()]
csv_rows_without_header = csv_rows[1:]
mydict = {}
for x in range(len(csv_rows_without_header)):
col1 = csv_rows_without_header[x].split(',')[0].strip().lower()
col2 = csv_rows_without_header[x].split(',')[1].strip().lower()
col3 = csv_rows_without_header[x].split(',')[2].strip().lower()
col4 = csv_rows_without_header[x].split(',')[3].strip().lower()
if 'start' in col1:
mydict[col1] = {}
temp_counter = x+1
last_col2 = None
try:
while 'start' not in csv_rows_without_header[temp_counter].split(',')[0].strip().lower():
subcol1 = csv_rows_without_header[temp_counter].split(',')[0].strip().lower()
subcol2 = csv_rows_without_header[temp_counter].split(',')[1].strip().lower()
subcol3 = csv_rows_without_header[temp_counter].split(',')[2].strip().lower()
subcol4 = csv_rows_without_header[temp_counter].split(',')[3].strip().lower()
if subcol2 != '':
last_col2 = subcol2[:]
mydict[col1][last_col2] = {}
mydict[col1][last_col2]['address'] = []
mydict[col1][last_col2]['data'] = []
if subcol3 != '':
mydict[col1][last_col2]['address'].append(subcol3)
if subcol4 != '':
mydict[col1][last_col2]['data'].append(subcol4)
temp_counter = temp_counter + 1
except IndexError:
pass
pretty_json = json.dumps(mydict, indent=4)
print(pretty_json)