我有一个读取和解析文件的函数,但是 valid_lines 计数器没有增加,尽管它下面的 printf 按预期工作。我已经尝试注释掉函数的不同部分,看来
if (sscanf...
块可能导致了这个问题。
void read_file(char *filename, int *city_num_lines, double *city_min, double *city_max, double *city_avg) {
// files are in data_files/ directory
char path[LINE_MAX_LENGTH] = "data_files/";
strcat(path, filename);
FILE *file = fopen(path, "r");
// check if file exists
if (file == NULL) {
printf("Error: Could not open file %s\n", filename);
return;
}
char buffer[LINE_MAX_LENGTH];
int valid_lines = 0;
double min = BIG_NUM;
double max = -BIG_NUM;
double sum = 0;
double avg = 0;
// read line by line
while (fgets(buffer, LINE_MAX_LENGTH, file) != NULL) {
// skip first line
if (buffer[0] == 'm') {
continue;
}
// separate by tab (\t)
double cur_max, cur_min;
char delim;
if (sscanf(buffer, "%lf%[\t]%lf", &cur_max, &delim, &cur_min) != 3) {
// printf("skipped line %s", buffer);
continue;
}
// increment line count
valid_lines++;
printf("num_lines incremented\n");
// update min and max
if (cur_max > max) {
max = cur_max;
printf("max: %lf\n", max);
}
if (cur_min < min) {
min = cur_min;
printf("min: %lf\n", min);
}
sum += (cur_max + cur_min) / 2;
// printf("%lf - %lf\n", min, max);
}
printf("num_lines: %d\n", valid_lines);
// calculate average
avg = sum / valid_lines;
// update city values
*city_num_lines = valid_lines;
*city_min = min;
*city_max = max;
*city_avg = avg;
fclose(file); // close file
}
这里是控制台输出(最后几行,因为它永远持续下去)
...
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines: 1
有人知道为什么会这样吗?
更新: 问题的根本原因是 sscanf 中的
"%lf%[\t]%lf"
,因为我只给它 1 个字符空间但是它以 NUL 字符结尾,导致它影响内存的另一部分,在这种情况下是 valid_lines
变量.
%[\t]
转换将写入 TAB 字符,加上一个终止 NUL 字符,但你只给了它一个字符的空间。
OP 的代码会导致缓冲区溢出,因为
char delim
对于 "\t"
的 string来说太小了。
请勿在没有
width的情况下使用
"%[...]"
或"%s"
。
// separate by tab (\t)
double cur_max, cur_min;
// char delim;
char delim[2];
//if (sscanf(buffer, "%lf%[\t]%lf", &cur_max, &delim, &cur_min) != 3) {
if (sscanf(buffer, "%lf%1[\t]%lf", &cur_max, delim, &cur_min) != 3) {
// printf("skipped line %s", buffer);
continue;
}
Above 将在数字文本之间接受 1 个或多个
"\t"
,因为 "%1[\t]"
需要 1 个且仅 1 个 '\t'
,而以下 "%lf"
将消耗 0 个或多个前导空格,包括制表符 - 然后是数字。
如果代码需要使用
scanf()
,读取1个且数字之间只有1个tab,可以考虑使用"%n"
来记录扫描的偏移量。使用 "%n%*1[\t] %n"
,除 1 个选项卡之外的任何内容都将失败 n1 + 1 != n2
.
int n1, n2;
if (sscanf(buffer, "%lf%n%*1[\t] %n%lf", &cur_max, &n1, &n2, &cur_min) != 2 ||
n1 + 1 != n2) {
printf("skipped line <%s>\n", buffer);
}
或者,使用
strtod()
、strchr()
、strtok()
、strcspn()
等进行解析
检查文件名的大小否则
strcat()
可能会导致缓冲区溢出。使用 snprintf()
可能更容易,然后检查生成的 path
是否太长(定义为 LINE_MAX_LENGTH -1 或更长)。
fgets()
将返回长行的多个部分读取。严格的解析器会捕捉到这一点。
delim
是 1 个字节,但您至少需要 2 个字节。此外,在读取字符串时,始终使用最大字段宽度。我建议你像这样忽略带有 '*' 的分隔符:
sscanf(buffer, "%lf%*[\t]%lf", &cur_max, &cur_min) != 2
scanf()
将忽略前导空格,因此您将永远无法按照您所说的要求严格解析您的输入。
你可能想在做之前防止 valid_line == 0
avg = sum / valid_lines
.
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define BIG_NUM 42
#define LINE_MAX_LENGTH 100
void read_file(char *filename, int *city_num_lines, double *city_min, double *city_max, double *city_avg) {
char path[LINE_MAX_LENGTH];
snprintf(path, LINE_MAX_LENGTH, "data_files/%s", filename);
if(strlen(path) + 1 == LINE_MAX_LENGTH) {
printf("path name too long\n");
return;
}
FILE *file = fopen(path, "r");
if (!file) {
printf("Error: Could not open file %s\n", filename);
return;
}
char buffer[LINE_MAX_LENGTH];
*city_num_lines = 0;
*city_min = BIG_NUM;
*city_max = -BIG_NUM;
*city_avg = 0;
while (fgets(buffer, LINE_MAX_LENGTH, file) != NULL) {
if (buffer[0] == 'm') {
continue;
}
char *p = buffer;
char *p2;
// space prefix
if(isspace(*p)) {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
double max = strtod(p, &p2);
// could not parse double, or field is not tab separated
if(!p2 || (*p2 != '\t' && isspace(*p2))) {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
p = p2 + 1;
// 2nd field is space prefixed
if(isspace(*p)) {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
double min = strtod(p, &p2);
// could not parse double, or we are missing a trailing newline
if(!p2 || *p2 != '\n') {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
(*city_num_lines)++;
if(max > *city_max) *city_max = max;
if(min < *city_min) *city_min = min;
*city_avg += (max + min) / 2;
}
*city_avg = *city_num_lines ? *city_avg / *city_num_lines : 0;
fclose(file);
}
int main() {
int city_num_lines;
double city_min;
double city_max;
double city_avg;
read_file("1.txt", &city_num_lines, &city_min, &city_max, &city_avg);
printf("city_num_lines: %d, city_min: %f, city_max: %f, city_avg: %f\n", city_num_lines, city_min, city_max, city_avg);
}
使用示例输入文件 1.txt,只有最后一行有效:
m
x
1 1
2 2
3 3
4 4
5 5
示例会话返回:
skipped line x
skipped line 1 1
skipped line 2 2
skipped line 3 3
skipped line 4 4
city_num_lines: 1, city_min: 5.000000, city_max: 5.000000, city_avg: 5.000000