主程序充当父进程,根据参数指定的进程数使用
fork()
创建子进程。父进程在子进程之间平均分配整个文件大小,为每个子进程分配文件的特定范围以搜索字母字符并计算它们的出现次数。划分基于以下规则:
A.例如,如果文件大小为1000字节,进程数为2,则主进程创建2个子进程。每个子进程将被分配一个文件偏移范围:分别为0-499和500-999。 (文件偏移量从 0 开始。)
B.如果文件大小为 1000 字节,进程数为 3,则分配给每个子进程的文件偏移量范围将为 0–332、333–665 和 666–999。除法的任何余数都包含在最后一个范围内。
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>
int count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
int count = 0;
char c;
lseek(fd, start_offset, SEEK_SET);
for (off_t i = start_offset; i <= end_offset; i++) {
if (read(fd, &c, 1) == 1 && isalpha(c)) {
count++;
}
}
return count;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
exit(1);
}
const char *filename = argv[1];
int num_children = atoi(argv[2]);
if (num_children <= 0) {
fprintf(stderr, "Number of children must be a positive integer.\n");
exit(1);
}
int fd = open(filename, O_RDONLY);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
off_t file_size = lseek(fd, 0, SEEK_END);
close(fd);
if (file_length <= 0) {
fprintf(stderr, "File is empty or unreadable.\n");
exit(1);
}
int offset_range = file_size / num_children;
int extra_range = file_size % num_children;
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Pipe failed");
exit(1);
}
for (int i = 0; i < num_children; ++i) {
pid_t pid = fork();
if (pid < 0) {
perror("Fork failed");
exit(1);
} else if (pid == 0) {
off_t start_offset = i * offset_range;
off_t end_offset = start_offset + offset_range;
if (i == num_children - 1) {
end_offset += extra_range;
}
int count = count_alphabet(filename, start_offset, end_offset);
fprintf(stderr, "Process[%d] has found %d alphabet letters in (%ld ~ %ld).",
getpid(), count, start_offset, end_offset);
char child_buf[1000000];
int written_count = snprintf(child_buf, sizeof(temp), "%d\n", count);
write(pipefd[1], child_buf, written_count);
}
}
int total_count = 0;
char parent_buf[1000000];
int read_count;
while ((read_count = read(pipefd[0], parent_buf, sizeof(parent_buf) - 1)) > 0) {
parent_buf[read_count] = '\0';
char *line = strtok(parent_buf, "\n");
while (line != NULL) {
total_count += atoi(line);
line = strtok(NULL, "\n");
}
}
while (waitpid(-1, NULL, 0) > 0);
fprintf(stderr, "Process[%d] has found %d alphabet letters in %c",
getpid(), total_count, filename);
return 0;
}
我按照上面的方法写了代码,但是计数逻辑好像不正确。但是,我不确定如何解决它。请帮助我!
您的代码中存在多个问题:
在功能中
count_alphabet
:
off_t
fd
未定义。count
应具有类型off_t
以允许非常大的文件c
应定义为 unsigned char
,以便 isalpha(c)
在 char
是有符号类型的架构上对非 ASCII 字节定义行为。在功能中
main
:
offset_range
、extra_range
和 count
应具有类型 off_t
count
、start_offset
、end_offset
应转换为 (long long)
,以便在 off_t
未定义为类型 long
的系统上实现可移植性。int
会简化读取循环并使其更加可靠。 此循环应等待子退出,测试并处理非退出状态,并仅在成功退出时返回读取计数。fprintf
应使用 %s
作为 filename
参数,使用 %lld
表示计数,转换为 `(long long)。这是修改后的版本:
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <fcntl.h>
static int open_as_read_binary(const char *filename) {
#ifdef O_BINARY
return open(filename, O_RDONLY | O_BINARY);
#else
return open(filename, O_RDONLY);
#endif
}
off_t count_alphabet(const char *filename, off_t start_offset, off_t end_offset) {
int fd = open_as_read_binary(filename);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
if (lseek(fd, start_offset, SEEK_SET) < 0) {
perror("Failed to seek into file");
exit(1);
}
off_t count = 0;
for (off_t i = start_offset; i < end_offset;) {
unsigned char c;
ssize_t nread = read(fd, &c, 1);
if (nread == 1) {
if (isalpha(c)) {
count++;
}
i++;
} else {
if (nread == 0) {
fprintf(stderr, "failed to read %lld bytes from %s\n",
(long long)(end_offset - start_offset), filename);
exit(1);
}
if (nread < 0) {
if (errno == EINTR)
continue;
}
perror("Error reading from file");
exit(1);
}
}
close(fd);
return count;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <filename> <number_of_children>\n", argv[0]);
exit(1);
}
const char *filename = argv[1];
int num_children = atoi(argv[2]);
if (num_children <= 0) {
fprintf(stderr, "Number of children must be a strictly positive integer.\n");
exit(1);
}
int fd = open_as_read_binary(filename);
if (fd < 0) {
perror("Failed to open file");
exit(1);
}
off_t file_size = lseek(fd, 0, SEEK_END);
if (file_size < 0) {
fprintf(stderr, "File is unseekable.\n");
exit(1);
}
close(fd);
off_t offset_range = file_size / num_children;
off_t extra_range = file_size % num_children;
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Pipe failed");
exit(1);
}
for (int i = 0; i < num_children; ++i) {
pid_t pid = fork();
if (pid < 0) {
perror("Fork failed");
exit(1);
}
if (pid == 0) {
off_t start_offset = i * offset_range;
off_t end_offset = start_offset + offset_range;
if (i == num_children - 1) {
end_offset += extra_range;
}
off_t count = count_alphabet(filename, start_offset, end_offset);
fprintf(stderr, "Process[%d] has found %lld alphabet letters in (%lld ~ %lld).\n",
getpid(), (long long)count, (long long)start_offset, (long long)end_offset);
// carefully write the count to the pipe
for (;;) {
ssize_t write_count = write(pipefd[1], &count, sizeof count);
if (write_count == sizeof count)
return 0;
if (write_count == -1) {
if (errno == EINTR)
continue;
perror("error writing to the pipe");
exit(1);
}
fprintf(stderr, "cannot write to the pipe\n");
exit(1);
}
}
}
off_t total_count = 0;
int status;
int pid;
while ((pid = waitpid(-1, &status, 0)) >= 0) {
if (WIFEXITED(status)) {
int exit_status = WEXITSTATUS(status);
if (exit_status == 0) {
for (;;) {
off_t count;
ssize_t read_count = read(pipefd[0], &count, sizeof count);
if (read_count == sizeof count) {
total_count += count;
break;
} else {
if (read_count < 0) {
if (errno == EINTR)
continue;
perror("error reading from the pipe");
exit(1);
}
if (read_count == 0)
fprintf(stderr, "cannot read from the pipe\n");
else
fprintf(stderr, "partial read from the pipe\n");
exit(1);
}
}
} else {
fprintf(stderr, "child process [%d] returned non zero status %d\n",
pid, exit_status);
}
}
}
fprintf(stderr, "Process[%d] has found %lld alphabet letters in %s\n",
getpid(), (long long)total_count, filename);
return 0;
}
请注意,即使将二进制块中的计数写入管道也不能保证原子操作:字节在线程之间交错的可能性很小但非零。每次写入操作都应锁定用于写入管道的文件句柄。