我无法使用非标准库或 C 功能进行字符串操作,例如不属于 ANSI C 标准的操作系统特定库。可以使用管道等多处理解决方案。
该程序必须可以在类 UNIX 机器上执行,例如 Linux。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <ctype.h>
#include <wctype.h>
// Structure to represent an object with string, frequency, occurrence, and coordinates
typedef struct {
char String[30];
float frequency;
int occurrence;
int x;
int y;
} object;
// Structure for a matrix containing a list of objects and its dimensions
typedef struct {
object** list;
int* x;
int y;
} matrix;
// Structure to represent an alphabet node
struct Alphabet {
struct Alphabet* subAlphabet[31];
object* word;
};
// Define Dictionary as a pointer to Alphabet struct
typedef struct Alphabet Dictionary;
// Declaration of the recursive deallocation function
Dictionary* deallocation(Dictionary* dict);
// Function to get the ASCII index of a character
int asciiIndex(char character) {
// Maps characters A-Z, a-z, !, ?, ., and ' to ASCII values
if ((character >= 'A' && character <= 'Z')) {
return (int)(character - 'A');
} else if ((character >= 'a' && character <= 'z')) {
return (int)(character - 'a');
} else if (character == '!') {
return 27;
} else if (character == '?') {
return 28;
} else if (character == '.') {
return 29;
} else if (character == '\'') {
return 30;
}
return '\0';
}
// Function to compare two strings
bool compareStrings(char* string1, char* string2) {
// Checks if the two strings are equal, considering case insensitivity
if ((int)*string1 == (int)*string2 || ((int)*string1 + 32) == (int)*string2 || ((int)*string1 - 32) == (int)*string2) {
// Strings are equal or differ only by case
} else {
// Strings are different
return false;
}
// If both strings have reached the end, they are equal
if ((*string1 || *string2) == '\0') {
return true;
} else {
// Otherwise, compare the next characters recursively
compareStrings((string1 + 1), (string2 + 1));
}
}
// Function for operational search of a string in the matrix
int operationalSearch(char string[], int index, Dictionary* dict, int y, matrix* m) {
// Initialize the current dictionary
Dictionary* currentDict = dict;
int ASCII;
// If the end of the string is reached, compare strings or add the new word
if (string[index] == '\0') {
printf("%s = %s\n", currentDict->word->String, string);
if (compareStrings(currentDict->word->String, string) == 1) {
return currentDict->word->y;
} else {
// Add the new word to the dictionary
strcpy(currentDict->word->String, string);
currentDict->word->y = y;
printf("stored: %s\n", currentDict->word->String);
return -1;
}
}
// The word is not present in the matrix, so add it to the dictionary
ASCII = asciiIndex(string[index]);
if (currentDict->subAlphabet[ASCII] == NULL) {
currentDict->subAlphabet[ASCII] = (Dictionary*)calloc(1, sizeof(Dictionary));
if (currentDict->subAlphabet[ASCII] == NULL) {
exit(1);
}
currentDict->subAlphabet[ASCII]->word = (object*)malloc(sizeof(object));
if (currentDict->subAlphabet[ASCII]->word == NULL) {
exit(1);
}
// Initialize the allocated memory to zero
memset(currentDict->subAlphabet[ASCII]->word, 0, sizeof(object));
}
currentDict = currentDict->subAlphabet[ASCII];
return operationalSearch(string, index + 1, currentDict, y, m);
}
// Function to print the string and recursively call itself
void printString(char String[30], matrix m, Dictionary* dict, int y, int generatedWords, int generatedWordCount, FILE* outputFile, int pipe2[], char selectedWord[30]) {
if (generatedWordCount == generatedWords) { // base case
fprintf(outputFile, "\n");
return;
}
printf("%s\n", String);
fprintf(outputFile, "%s", String);
if (m.list[y][1].frequency == 1.0) {
y = operationalSearch(m.list[y][1].String, 0, dict, y, &m);
} else {
int numElementsRow = m.x[y];
// Iterate through words and select one based on probability
srand(clock()); // use clock ticks as seed for random generator
float randomNum = (float)rand() / RAND_MAX;
for (int i = 1; i < numElementsRow; ++i) {
if (randomNum < m.list[y][i].frequency) {
printf("%s repeated\n", m.list[y][i].String);
y = operationalSearch(m.list[y][i].String, 0, dict, y, &m);
break;
} else {
// Subtract the probability of the current word
randomNum -= m.list[y][i].frequency;
}
}
}
printf("middle: %s\n", m.list[y][0].String);
fprintf(outputFile, " ");
return printString(m.list[y][0].String, m, dict, y, generatedWords, generatedWordCount + 1, outputFile, pipe2, selectedWord);
}
// Function to read CSV file and write words to the pipe
void readCSV(FILE* file, int pipe1[]) {
char buffer[31]; // Buffer to read words
char character;
int bytesRead;
while (1) {
bytesRead = fscanf(file, "%30[^,\n]", buffer);
if (bytesRead == 1) {
write(pipe1[1], buffer, sizeof(buffer));
} else {
break;
}
printf("%s|", buffer);
character = fgetc(file); // Read the next character
if (character == '\n') {
strcpy(buffer, "\n"); // Add a newline to the buffer
printf("%s", buffer);
write(pipe1[1], buffer, sizeof(buffer));
} else if (character == EOF) {
break;
}
}
close(pipe1[1]);
}
// Function to create the matrix and populate it from the pipe data
void createMatrix(matrix* m, int pipe1[], int pipe2[], Dictionary* dict) {
char string1[31];
int* capitalIndexes = NULL;
int capitals = 0;
int columnIndex = 0;
int rowIndex = 0;
capitalIndexes = malloc(sizeof(int));
if (capitalIndexes == NULL) {
perror("Memory allocation error");
exit(EXIT_FAILURE);
}
while (read(pipe1[0], string1, sizeof(string1)) > 0) {
printf("%c", string1[0]);
if (isalpha(string1[0]) || iswalpha(string1[0])) {
if (m->x[m->y - 1] - 1 != 0) { // if the previous string stored in the matrix is the key
m->x[m->y - 1]++;
m->list[m->y - 1] = realloc(m->list[m->y - 1], m->x[m->y - 1] * sizeof(object));
}
strcpy(m->list[m->y - 1][m->x[m->y - 1] - 1].String, string1);
} else if (isdigit(string1[0])) {
m->list[m->y - 1] = realloc(m->list[m->y - 1], m->x[m->y - 1] * sizeof(object));
} else if (string1[0] == '\n') {
m->y++;
m->list = realloc(m->list, m->y * sizeof(object*));
m->x = realloc(m->x, m->y * sizeof(int));
m->list[m->y - 1] = NULL;
m->x[m->y - 1] = 0;
}
}
printf("c\n\n\n");
// Print the matrix
for (int i = 0; i < m->y - 1; i++) {
for (int j = 0; j < m->x[i]; j++) {
if (j == 0) {
printf("%s ", m->list[i][j].String);
} else {
printf(",%s,%.4f ", m->list[i][j].String, m->list[i][j].frequency); // Print the frequency value of the element
}
}
printf("\n");
}
close(pipe1[0]);
close(pipe2[1]);
}
// Main function
int main() {
Dictionary* dict = (Dictionary*)calloc(1, sizeof(Dictionary));
dict->word = (object*)malloc(sizeof(object));
FILE *file = fopen("controprova.csv", "r");
if (file == NULL) {
fprintf(stderr, "Error opening file.\n");
return 1;
}
FILE *outputFile = fopen("prova1.txt", "w");
char selectedWord[30] = "";
int generatedWords = 30;
int pipe1[2], pipe2[2];
if (pipe(pipe1) == -1 || pipe(pipe2) == -1) {
exit(EXIT_FAILURE);
}
// Initialize the matrix structure
matrix* m = (matrix*)malloc(sizeof(matrix));
m->list = NULL;
m->x = NULL;
m->y = 0;
pid_t pid1, pid2, pid3;
pid1 = fork();
if (pid1 == -1) {
perror("Fork error");
return 1;
} else if (pid1 == 0) {
printf("Process 1: %d\n\n", getpid());
close(pipe1[0]);
readCSV(file, pipe1);
exit(0);
}
pid2 = fork();
if (pid2 == -1) {
printf("Fork error");
return 1;
} else if (pid2 == 0) {
printf("\n\nProcess 2: %d\n\n", getpid());
close(pipe1[1]); // Close pipe1[1] in the child process
close(pipe2[0]);
createMatrix(m, pipe1, pipe2, dict);
exit(0);
}
pid3 = fork();
if (pid3 == -1) {
perror("Fork error");
return 1;
} else if (pid3 == 0) {
printf("\n\nProcess 3: %d\n", getpid());
close(pipe2[1]);
// ... (code for process 3) ...
exit(0);
}
free(m->list);
free(m->x);
free(m);
// Deallocate the dictionary (words and structure)
for (int i = 0; i < 30; i++) {
dict->subAlphabet[i] = deallocation(dict->subAlphabet[i]);
}
free(dict->word);
free(dict);
return 0;
}
// Recursive deallocation function
Dictionary* deallocation(Dictionary* dict) {
if (dict == NULL) {
return NULL;
}
for (int i = 0; i < 30; i++) {
dict->subAlphabet[i] = deallocation(dict->subAlphabet[i]);
}
free(dict->word);
free(dict);
return NULL;
}
我是 C 编程初学者,我正在开发一个项目,该项目涉及读取 CSV 文件、处理其内容,然后生成一些输出。该项目涉及三个过程:
第一个进程读取 CSV 文件并将单词写入管道。
第二个进程从管道中读取,构造一个矩阵结构,并存储单词及其频率。
第三个过程(尚未实现)将使用矩阵根据处理后的数据生成输出。
我面临着这些进程同步的问题,特别是确保管道不会过早关闭,这会导致进程之间的通信问题。我需要帮助了解如何正确管理管道读/写操作和进程同步以避免这些问题。此外,我需要确保正确填充矩阵结构并正确处理内存分配。
我尝试实现三个过程来处理从 CSV 文件读取、处理和输出数据。这是我所做的:
读取 CSV(过程 1):此过程读取 CSV 文件,分割单词,并将它们写入管道。
构建矩阵(过程 2):此过程从管道中读取单词,构建矩阵结构,并存储单词及其频率。
生成输出(流程3):此流程尚未实现。它应该使用矩阵根据处理后的数据生成并打印输出。
预期结果:我希望第二个进程能够正确地从管道中读取并构造矩阵。具体来说,我希望矩阵打印在屏幕上,使用嵌套的
for
循环显示每个单词及其相应的频率。
实际结果:矩阵构建不正确。这些进程面临同步问题,可能是由于管道过早关闭或读/写操作处理不正确造成的。这会导致读取和处理的数据不完整或不正确,从而导致打印不正确或空的矩阵。
我需要帮助确保流程之间的正确同步以及管道操作的正确处理,以实现预期结果。
.,What,1.0000
What,do,1.0000
do,the,1.0000
the,forecasts,1.0000
forecasts,of,0.6667,weather,0.3333
of,time,1.0000
time,?,0.3333,of,0.3333,uncertain,0.3333
?,Forecasts,1.0000
of,today,0.5000,tomorrow,0.5000
today,time,1.0000
uncertain,!,1.0000
!,Forecasts,1.0000
tomorrow,?,1.0000
有一个问题在
readCSV()
:
char buffer[31];
…
bytesRead = fscanf(file, "%30[^,\n]", buffer);
if (bytesRead == 1)
{
write(pipe1[1], buffer, sizeof(buffer));
你不知道你读了31个字节,但你写了31个字节,这可能会包含一些垃圾。 您需要计算读取了多少字节,并写入了多少数据。 但您还需要确定如何在管道上分隔单词 - 接收进程如何知道消息之间的边界? 您可以使用 TLV(类型、长度、值)编码或其他一些技术。
数据看起来有点像某种马尔可夫链输入,“给定单词 1 和单词 2,单词 3 跟随的概率为 P3,单词 4 跟随的概率为 P4”。 您如何知道何时到达了一行信息的末尾? 如果您设计 TLV 编码,则可以对不同类型的数据使用不同的类型代码。