Linux 中多进程计算一个文件的字母表

问题描述 投票:0回答:1

主程序充当父进程,根据参数指定的进程数使用

fork()
创建子进程。父进程在子进程之间平均分配整个文件大小,为每个子进程分配文件的特定范围以搜索字母字符并计算它们的出现次数。划分基于以下规则:

A.例如,如果文件大小为1000字节,进程数为2,则主进程创建2个子进程。每个子进程将被分配一个文件偏移范围:分别为0-499和500-999。 (文件偏移量从 0 开始。)

B.如果文件大小为 1000 字节,进程数为 3,则分配给每个子进程的文件偏移量范围将为 0–332、333–665 和 666–999。除法的任何余数都包含在最后一个范围内。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>

int count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
    int count = 0;
    char c;
    lseek(fd, start_offset, SEEK_SET);
    
    for (off_t i = start_offset; i <= end_offset; i++) {
        if (read(fd, &c, 1) == 1 && isalpha(c)) {
            count++;
        }
    }
    
    return count;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
        exit(1);
    }

    const char *filename = argv[1];
    int num_children = atoi(argv[2]);
    if (num_children <= 0) {
        fprintf(stderr, "Number of children must be a positive integer.\n");
        exit(1);
    }

    int fd = open(filename, O_RDONLY);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }

    off_t file_size = lseek(fd, 0, SEEK_END);
    close(fd);
    if (file_length <= 0) {
        fprintf(stderr, "File is empty or unreadable.\n");
        exit(1);
    }
    
    int offset_range = file_size / num_children;
    int extra_range = file_size % num_children;

    int pipefd[2];
    if (pipe(pipefd) == -1) {
        perror("Pipe failed");
        exit(1);
    }

    for (int i = 0; i < num_children; ++i) {
        pid_t pid = fork();
        if (pid < 0) {
            perror("Fork failed");
            exit(1);
        } else if (pid == 0) {
            off_t start_offset = i * offset_range;
            off_t end_offset = start_offset + offset_range;
            if (i == num_children - 1) {
                end_offset += extra_range;
            }

            int count = count_alphabet(filename, start_offset, end_offset);
            fprintf(stderr, "Process[%d] has found %d alphabet letters in (%ld ~ %ld).", 
                    getpid(), count, start_offset, end_offset);
                                        
            char child_buf[1000000];
            int written_count = snprintf(child_buf, sizeof(temp), "%d\n", count); 
            write(pipefd[1], child_buf, written_count);
        }
    }

    int total_count = 0;
    char parent_buf[1000000];
    int read_count;
    
    while ((read_count = read(pipefd[0], parent_buf, sizeof(parent_buf) - 1)) > 0) {
        parent_buf[read_count] = '\0'; 
        char *line = strtok(parent_buf, "\n");
        while (line != NULL) {
            total_count += atoi(line);
            line = strtok(NULL, "\n");
        }
    }

    while (waitpid(-1, NULL, 0) > 0);

    fprintf(stderr, "Process[%d] has found %d alphabet letters in %c",
            getpid(), total_count, filename);
    return 0;
}

我按照上面的方法写了代码,但是计数逻辑好像不正确。但是,我不确定如何解决它。请帮助我!

c linux concurrency count multiprocessing
1个回答
0
投票

您的代码中存在多个问题:

在功能中

count_alphabet

  • 返回类型应该是
    off_t
  • 您没有打开源文件,
    fd
    未定义。
  • 变量
    count
    应具有类型
    off_t
    以允许非常大的文件
  • c
    应定义为
    unsigned char
    ,以便
    isalpha(c)
    char
    是有符号类型的架构上对非 ASCII 字节定义行为。
  • 应报告未能读取预期字节数。
  • 应测试并报告在文件内查找失败的情况

在功能中

main

  • 文件应在执行文件内容自动翻译的旧系统上以二进制模式打开。
  • 应接受并正确处理空文件
  • offset_range
    extra_range
    count
    应具有类型
    off_t
  • count
    start_offset
    end_offset
    应转换为
    (long long)
    ,以便在
    off_t
    未定义为类型
    long
    的系统上实现可移植性。
  • 写入管道应以二进制形式执行,以避免读取循环中部分读取时出现转换问题。
  • 写入管道后应退出子进程。
  • 以二进制方式从管道读取,一次读取一个
    int
    会简化读取循环并使其更加可靠。 此循环应等待子退出,测试并处理非退出状态,并仅在成功退出时返回读取计数。
  • 最后一个
    fprintf
    应使用
    %s
    作为
    filename
    参数,使用
    %lld
    表示计数,转换为 `(long long)。

这是修改后的版本:

#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <fcntl.h>

static int open_as_read_binary(const char *filename) {
#ifdef O_BINARY
    return open(filename, O_RDONLY | O_BINARY);
#else
    return open(filename, O_RDONLY);
#endif
}

off_t count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
    int fd = open_as_read_binary(filename);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }
    if (lseek(fd, start_offset, SEEK_SET) < 0) {
        perror("Failed to seek into file");
        exit(1);
    }

    off_t count = 0;
    for (off_t i = start_offset; i < end_offset;) {
        unsigned char c;
        ssize_t nread = read(fd, &c, 1);
        if (nread == 1) {
            if (isalpha(c)) {
                count++;
            }
            i++;
        } else {
            if (nread == 0) {
                fprintf(stderr, "failed to read %lld bytes from %s\n",
                        (long long)(end_offset - start_offset), filename);
                exit(1);
            }
            if (nread < 0) {
                if (errno == EINTR)
                    continue;
            }
            perror("Error reading from file");
            exit(1);
        }
    }
    close(fd);
    return count;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
        exit(1);
    }

    const char *filename = argv[1];
    int num_children = atoi(argv[2]);
    if (num_children <= 0) {
        fprintf(stderr, "Number of children must be a strictly positive integer.\n");
        exit(1);
    }

    int fd = open_as_read_binary(filename);
    if (fd < 0) {
        perror("Failed to open file");
        exit(1);
    }

    off_t file_size = lseek(fd, 0, SEEK_END);
    if (file_size < 0) {
        fprintf(stderr, "File is unseekable.\n");
        exit(1);
    }
    close(fd);

    off_t offset_range = file_size / num_children;
    off_t extra_range = file_size % num_children;

    int pipefd[2];
    if (pipe(pipefd) == -1) {
        perror("Pipe failed");
        exit(1);
    }

    for (int i = 0; i < num_children; ++i) {
        pid_t pid = fork();
        if (pid < 0) {
            perror("Fork failed");
            exit(1);
        }
        if (pid == 0) {
            off_t start_offset = i * offset_range;
            off_t end_offset = start_offset + offset_range;
            if (i == num_children - 1) {
                end_offset += extra_range;
            }
            off_t count = count_alphabet(filename, start_offset, end_offset);
            fprintf(stderr, "Process[%d] has found %lld alphabet letters in (%lld ~ %lld).\n",
                    getpid(), (long long)count, (long long)start_offset, (long long)end_offset);
            // carefully write the count to the pipe
            for (;;) {
                ssize_t write_count = write(pipefd[1], &count, sizeof count);
                if (write_count == sizeof count)
                    return 0;
                if (write_count == -1) {
                    if (errno == EINTR)
                        continue;
                    perror("error writing to the pipe");
                    exit(1);
                }
                fprintf(stderr, "cannot write to the pipe\n");
                exit(1);
            }
        }
    }

    off_t total_count = 0;
    int status;
    int pid;
    while ((pid = waitpid(-1, &status, 0)) >= 0) {
        if (WIFEXITED(status)) {
            int exit_status = WEXITSTATUS(status);
            if (exit_status == 0) {
                for (;;) {
                    off_t count;
                    ssize_t read_count = read(pipefd[0], &count, sizeof count);
                    if (read_count == sizeof count) {
                        total_count += count;
                        break;
                    } else {
                        if (read_count < 0) {
                            if (errno == EINTR)
                                continue;
                            perror("error reading from the pipe");
                            exit(1);
                        }
                        if (read_count == 0)
                            fprintf(stderr, "cannot read from the pipe\n");
                        else
                            fprintf(stderr, "partial read from the pipe\n");
                        exit(1);
                    }
                }
            } else {
                fprintf(stderr, "child process [%d] returned non zero status %d\n",
                        pid, exit_status);
            }
        }
    }
    fprintf(stderr, "Process[%d] has found %lld alphabet letters in %s\n",
            getpid(), (long long)total_count, filename);
    return 0;
}

请注意,即使将二进制块中的计数写入管道也不能保证原子操作:字节在线程之间交错的可能性很小但非零。每次写入操作都应锁定用于写入管道的文件句柄。

© www.soinside.com 2019 - 2024. All rights reserved.