用 C 处理管道同步和进程通信以读取和处理 CSV 数据

Question

我无法使用非标准库或 C 功能进行字符串操作，例如不属于 ANSI C 标准的操作系统特定库。可以使用管道等多处理解决方案。
该程序必须可以在类 UNIX 机器上执行，例如 Linux。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <time.h>
#include <sys/types.h>
#include <sys/wait.h> 
#include <unistd.h>
#include <ctype.h>
#include <wctype.h>

// Structure to represent an object with string, frequency, occurrence, and coordinates
typedef struct {
    char String[30];
    float frequency;
    int occurrence;
    int x;
    int y;
} object;

// Structure for a matrix containing a list of objects and its dimensions
typedef struct {
    object** list;
    int* x;
    int y;
} matrix;

// Structure to represent an alphabet node
struct Alphabet {
    struct Alphabet* subAlphabet[31];
    object* word;
};

// Define Dictionary as a pointer to Alphabet struct
typedef struct Alphabet Dictionary;

// Declaration of the recursive deallocation function
Dictionary* deallocation(Dictionary* dict);

// Function to get the ASCII index of a character
int asciiIndex(char character) {
    // Maps characters A-Z, a-z, !, ?, ., and ' to ASCII values
    if ((character >= 'A' && character <= 'Z')) { 
        return (int)(character - 'A');
    } else if ((character >= 'a' && character <= 'z')) { 
        return (int)(character - 'a');
    } else if (character == '!') {
        return 27;
    } else if (character == '?') {
        return 28;
    } else if (character == '.') {
        return 29;
    } else if (character == '\'') {
        return 30;
    }
    return '\0';
}

// Function to compare two strings
bool compareStrings(char* string1, char* string2) {
    // Checks if the two strings are equal, considering case insensitivity
    if ((int)*string1 == (int)*string2 || ((int)*string1 + 32) == (int)*string2 || ((int)*string1 - 32) == (int)*string2) {
        // Strings are equal or differ only by case
    } else {
        // Strings are different
        return false;
    }

    // If both strings have reached the end, they are equal
    if ((*string1 || *string2) == '\0') {
        return true;
    } else {
        // Otherwise, compare the next characters recursively
        compareStrings((string1 + 1), (string2 + 1));
    }
}

// Function for operational search of a string in the matrix
int operationalSearch(char string[], int index, Dictionary* dict, int y, matrix* m) {
    // Initialize the current dictionary
    Dictionary* currentDict = dict;
    int ASCII;

    // If the end of the string is reached, compare strings or add the new word
    if (string[index] == '\0') {
        printf("%s = %s\n", currentDict->word->String, string);
 
        if (compareStrings(currentDict->word->String, string) == 1) {
            return currentDict->word->y;
        } else {
            // Add the new word to the dictionary
            strcpy(currentDict->word->String, string);
            currentDict->word->y = y;
            printf("stored: %s\n", currentDict->word->String);
            return -1;
        }
    }

    // The word is not present in the matrix, so add it to the dictionary
    ASCII = asciiIndex(string[index]);
    if (currentDict->subAlphabet[ASCII] == NULL) {
        currentDict->subAlphabet[ASCII] = (Dictionary*)calloc(1, sizeof(Dictionary));
        if (currentDict->subAlphabet[ASCII] == NULL) {
            exit(1);
        }
        currentDict->subAlphabet[ASCII]->word = (object*)malloc(sizeof(object));
        if (currentDict->subAlphabet[ASCII]->word == NULL) {
            exit(1);
        }
        // Initialize the allocated memory to zero
        memset(currentDict->subAlphabet[ASCII]->word, 0, sizeof(object));
    }
    currentDict = currentDict->subAlphabet[ASCII];
    return operationalSearch(string, index + 1, currentDict, y, m);
}

// Function to print the string and recursively call itself
void printString(char String[30], matrix m, Dictionary* dict, int y, int generatedWords, int generatedWordCount, FILE* outputFile, int pipe2[], char selectedWord[30]) {
    if (generatedWordCount == generatedWords) { // base case
        fprintf(outputFile, "\n");
        return;
    }

    printf("%s\n", String);
    fprintf(outputFile, "%s", String);
    if (m.list[y][1].frequency == 1.0) {
        y = operationalSearch(m.list[y][1].String, 0, dict, y, &m);
    } else {
        int numElementsRow = m.x[y];
        // Iterate through words and select one based on probability
        srand(clock()); // use clock ticks as seed for random generator
        float randomNum = (float)rand() / RAND_MAX;
        for (int i = 1; i < numElementsRow; ++i) {

            if (randomNum < m.list[y][i].frequency) {
                printf("%s repeated\n", m.list[y][i].String);
                y = operationalSearch(m.list[y][i].String, 0, dict, y, &m);
                break;
            } else {
                // Subtract the probability of the current word
                randomNum -= m.list[y][i].frequency;
            }
        }
    }
    printf("middle: %s\n", m.list[y][0].String);
    fprintf(outputFile, " ");
    return printString(m.list[y][0].String, m, dict, y, generatedWords, generatedWordCount + 1, outputFile, pipe2, selectedWord);
}

// Function to read CSV file and write words to the pipe
void readCSV(FILE* file, int pipe1[]) {
    char buffer[31]; // Buffer to read words
    char character;
    int bytesRead;

    while (1) {
        bytesRead = fscanf(file, "%30[^,\n]", buffer);
        if (bytesRead == 1) {
            write(pipe1[1], buffer, sizeof(buffer));
        } else {
            break;
        }
        printf("%s|", buffer);
        character = fgetc(file); // Read the next character

        if (character == '\n') {
            strcpy(buffer, "\n"); // Add a newline to the buffer
            printf("%s", buffer);
            write(pipe1[1], buffer, sizeof(buffer));
        } else if (character == EOF) {
            break;
        }
    }

    close(pipe1[1]);
}

// Function to create the matrix and populate it from the pipe data
void createMatrix(matrix* m, int pipe1[], int pipe2[], Dictionary* dict) {
    char string1[31];
    int* capitalIndexes = NULL;
    int capitals = 0;
    int columnIndex = 0;
    int rowIndex = 0;

    capitalIndexes = malloc(sizeof(int));
    if (capitalIndexes == NULL) {
        perror("Memory allocation error");
        exit(EXIT_FAILURE);
    }

    while (read(pipe1[0], string1, sizeof(string1)) > 0) {
        printf("%c", string1[0]);
        if (isalpha(string1[0]) || iswalpha(string1[0])) {
            if (m->x[m->y - 1] - 1 != 0) { // if the previous string stored in the matrix is the key
                m->x[m->y - 1]++;
                m->list[m->y - 1] = realloc(m->list[m->y - 1], m->x[m->y - 1] * sizeof(object));
            }

            strcpy(m->list[m->y - 1][m->x[m->y - 1] - 1].String, string1);

        } else if (isdigit(string1[0])) {
            m->list[m->y - 1] = realloc(m->list[m->y - 1], m->x[m->y - 1] * sizeof(object));
        } else if (string1[0] == '\n') {
            m->y++;
            m->list = realloc(m->list, m->y * sizeof(object*));
            m->x = realloc(m->x, m->y * sizeof(int));
            m->list[m->y - 1] = NULL;
            m->x[m->y - 1] = 0;
        }
    }

    printf("c\n\n\n");
    // Print the matrix
    for (int i = 0; i < m->y - 1; i++) {
        for (int j = 0; j < m->x[i]; j++) {
            if (j == 0) {
                printf("%s ", m->list[i][j].String);
            } else {
                printf(",%s,%.4f ", m->list[i][j].String, m->list[i][j].frequency); // Print the frequency value of the element
            }
        }
        printf("\n");
    }

    close(pipe1[0]);
    close(pipe2[1]);
}

// Main function
int main() {
    Dictionary* dict = (Dictionary*)calloc(1, sizeof(Dictionary));
    dict->word = (object*)malloc(sizeof(object));

    FILE *file = fopen("controprova.csv", "r");
    if (file == NULL) {
        fprintf(stderr, "Error opening file.\n");
        return 1;
    }
    FILE *outputFile = fopen("prova1.txt", "w");

    char selectedWord[30] = "";
    int generatedWords = 30;

    int pipe1[2], pipe2[2];

    if (pipe(pipe1) == -1 || pipe(pipe2) == -1) {
        exit(EXIT_FAILURE);
    }

    // Initialize the matrix structure
    matrix* m = (matrix*)malloc(sizeof(matrix));
    m->list = NULL;
    m->x = NULL;
    m->y = 0;

    pid_t pid1, pid2, pid3;

    pid1 = fork();
    if (pid1 == -1) {
        perror("Fork error");
        return 1;
    } else if (pid1 == 0) {
        printf("Process 1: %d\n\n", getpid());
        close(pipe1[0]);
        readCSV(file, pipe1);
        exit(0);
    }

    pid2 = fork();
    if (pid2 == -1) {
        printf("Fork error");
        return 1;
    } else if (pid2 == 0) {
        printf("\n\nProcess 2: %d\n\n", getpid());
        close(pipe1[1]); // Close pipe1[1] in the child process
        close(pipe2[0]);
        createMatrix(m, pipe1, pipe2, dict);
        exit(0);
    }

    pid3 = fork();
    if (pid3 == -1) {
        perror("Fork error");
        return 1;
    } else if (pid3 == 0) {
        printf("\n\nProcess 3: %d\n", getpid());
        close(pipe2[1]);
        // ... (code for process 3) ...
        exit(0);
    }

    free(m->list);
    free(m->x);
    free(m);

    // Deallocate the dictionary (words and structure)
    for (int i = 0; i < 30; i++) {
        dict->subAlphabet[i] = deallocation(dict->subAlphabet[i]);
    }
    free(dict->word);
    free(dict);

    return 0;
}

// Recursive deallocation function
Dictionary* deallocation(Dictionary* dict) {
    if (dict == NULL) {
        return NULL;
    }

    for (int i = 0; i < 30; i++) {
        dict->subAlphabet[i] = deallocation(dict->subAlphabet[i]);
    }
    free(dict->word);
    free(dict);
    return NULL;
}

我是 C 编程初学者，我正在开发一个项目，该项目涉及读取 CSV 文件、处理其内容，然后生成一些输出。该项目涉及三个过程：

第一个进程读取 CSV 文件并将单词写入管道。
第二个进程从管道中读取，构造一个矩阵结构，并存储单词及其频率。
第三个过程（尚未实现）将使用矩阵根据处理后的数据生成输出。

我面临着这些进程同步的问题，特别是确保管道不会过早关闭，这会导致进程之间的通信问题。我需要帮助了解如何正确管理管道读/写操作和进程同步以避免这些问题。此外，我需要确保正确填充矩阵结构并正确处理内存分配。

我尝试实现三个过程来处理从 CSV 文件读取、处理和输出数据。这是我所做的：

读取 CSV（过程 1）：此过程读取 CSV 文件，分割单词，并将它们写入管道。
构建矩阵（过程 2）：此过程从管道中读取单词，构建矩阵结构，并存储单词及其频率。
生成输出（流程3）：此流程尚未实现。它应该使用矩阵根据处理后的数据生成并打印输出。

预期结果：我希望第二个进程能够正确地从管道中读取并构造矩阵。具体来说，我希望矩阵打印在屏幕上，使用嵌套的

for

循环显示每个单词及其相应的频率。

实际结果：矩阵构建不正确。这些进程面临同步问题，可能是由于管道过早关闭或读/写操作处理不正确造成的。这会导致读取和处理的数据不完整或不正确，从而导致打印不正确或空的矩阵。

我需要帮助确保流程之间的正确同步以及管道操作的正确处理，以实现预期结果。

输入 CSV：

.,What,1.0000
What,do,1.0000
do,the,1.0000
the,forecasts,1.0000
forecasts,of,0.6667,weather,0.3333
of,time,1.0000
time,?,0.3333,of,0.3333,uncertain,0.3333
?,Forecasts,1.0000
of,today,0.5000,tomorrow,0.5000
today,time,1.0000
uncertain,!,1.0000
!,Forecasts,1.0000
tomorrow,?,1.0000

Answer 1

有一个问题在

readCSV()

：

char buffer[31];
…
bytesRead = fscanf(file, "%30[^,\n]", buffer);
if (bytesRead == 1)
{
    write(pipe1[1], buffer, sizeof(buffer));

你不知道你读了31个字节，但你写了31个字节，这可能会包含一些垃圾。您需要计算读取了多少字节，并写入了多少数据。但您还需要确定如何在管道上分隔单词 - 接收进程如何知道消息之间的边界？您可以使用 TLV（类型、长度、值）编码或其他一些技术。

数据看起来有点像某种马尔可夫链输入，“给定单词 1 和单词 2，单词 3 跟随的概率为 P3，单词 4 跟随的概率为 P4”。您如何知道何时到达了一行信息的末尾？如果您设计 TLV 编码，则可以对不同类型的数据使用不同的类型代码。

用 C 处理管道同步和进程通信以读取和处理 CSV 数据

问题描述投票：0回答：1

输入 CSV：

1个回答

最新问题

用 C 处理管道同步和进程通信以读取和处理 CSV 数据

问题描述 投票：0回答：1

输入 CSV：

1个回答

最新问题

问题描述投票：0回答：1