Python数据提取失败

Question

嗨，我正在尝试解析复杂的 csv。以下是 csv 示例：

start/stop,read/write,address,data
 Start,,,
, Write,,
,,22,
,,,0
 Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
 Stop,,,

我希望从上面的 csv 示例中获得如下所示的输出：

{
    'Start': {
        'Write': {
            'Address': ['22'],
            'Data': ['0']
        },
        'Read': {
            'Address': ['22'],
            'Data': ['E8', '22']
        }
    },
    'Start repeat': {
        'Read': {
            'Address': ['22'],
            'Data': []
        }
    }
}

逻辑尝试：

import pandas as pd
import csv

def extract_start_stop_lines(input_csv_path, output_txt_path):
    # Read the original CSV file into a DataFrame
    df = pd.read_csv(input_csv_path)

    # Extract the desired columns
    selected_columns = ['start/stop', 'read/write', 'address', 'data']
    selected_data = df[selected_columns]

    # Display header information
    header = selected_data.columns.tolist()
    print("Header:", header)

    # Write header information to the output text file
    with open(output_txt_path, 'w', newline='', encoding='utf-8') as outputfile:
        outputfile.write(f"Header: {', '.join(header)}\n")

        # Iterate through rows in the DataFrame
        for _, row in selected_data.iterrows():
            # Extract relevant information
            start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
            operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
            address = str(row['address']).strip() if pd.notna(row['address']) else None
            data = str(row['data']).strip() if pd.notna(row['data']) else None

            # Display extracted information
            print(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}")

            # Write the information to the output text file
            outputfile.write(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}\n")

# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_txt_path = 'start_stop_lines.txt'
extract_start_stop_lines(input_csv_path, output_txt_path)

在上述代码之后，将生成新的文本文件，这里是文件的示例行：

Header: start/stop, read/write, address, data
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 0
Start/Stop: Start repeat, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Read, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: E8
Start/Stop: None, Operation: None, Address: None, Data: 22
Start/Stop: Stop, Operation: None, Address: None, Data: None
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 2
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: Stop, Operation: None, Address: None, Data: None

通常我想在这里实现的是每当有“Start”时，找到它的操作，无论是“Write”还是“Read”，以及它的地址，然后是它的数据值。

请帮忙。

Answer 1

给出以下输入.csv

col1,col2,col3,col4
 Start,,,
, Write,,
,,22,
,,,0
 Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
 Stop,,,

您可以获得以下结果

{
    "start": {
        "write": {
            "address": [
                "22"
            ],
            "data": [
                "0"
            ]
        }
    },
    "start repeat": {
        "read": {
            "address": [
                "22"
            ],
            "data": [
                "e8",
                "22"
            ]
        }
    }
}

使用以下代码

import json

with open('input.csv', 'r') as file:
    csv_rows = [line.strip() for line in file.readlines()]

csv_rows_without_header = csv_rows[1:]

mydict = {}

for x in range(len(csv_rows_without_header)):
    col1 = csv_rows_without_header[x].split(',')[0].strip().lower()
    col2 = csv_rows_without_header[x].split(',')[1].strip().lower()
    col3 = csv_rows_without_header[x].split(',')[2].strip().lower()
    col4 = csv_rows_without_header[x].split(',')[3].strip().lower()
    if 'start' in col1:
        mydict[col1] = {}
        temp_counter = x+1
        last_col2 = None
        try:
            while 'start' not in csv_rows_without_header[temp_counter].split(',')[0].strip().lower():
                subcol1 = csv_rows_without_header[temp_counter].split(',')[0].strip().lower()
                subcol2 = csv_rows_without_header[temp_counter].split(',')[1].strip().lower()
                subcol3 = csv_rows_without_header[temp_counter].split(',')[2].strip().lower()
                subcol4 = csv_rows_without_header[temp_counter].split(',')[3].strip().lower()
                if subcol2 != '':
                    last_col2 = subcol2[:]
                    mydict[col1][last_col2] = {}
                    mydict[col1][last_col2]['address'] = []
                    mydict[col1][last_col2]['data'] = []
                if subcol3 != '':
                    mydict[col1][last_col2]['address'].append(subcol3)
                if subcol4 != '':
                    mydict[col1][last_col2]['data'].append(subcol4)
                temp_counter = temp_counter + 1
        except IndexError:
            pass
pretty_json = json.dumps(mydict, indent=4)
print(pretty_json)

Python数据提取失败

问题描述投票：0回答：1

1个回答

最新问题

Python数据提取失败

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1