想要从命令行解析文件名并检查其正确性,例如(1)总长度,(2)预期扩展名,(3)'_'位置以及其他输入值。
顺序应如下:
$check.exe input_file L2A30000_0102051303042026_0001.dat
它应该检查输出文件(L2A30000_0102051303042026_0001.dat)是否按应有的方式键入(不是按精确值,而是按类型和长度)。
// Function to check if a string consists of digits
int isNumeric(const char *str) {
while (*str) {
if (!isdigit(*str)) {
return 0; // Not a digit
}
str++;
}
return 1; // All characters are digits
}
int main(int argc, char *argv[]) {
// Check if the correct number of command line arguments is
provided
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
// Extract the output file name from the command line arguments
const char *outputFileName = argv[2];
// Define the expected format
char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5],
underscore1, numChar4[17], underscore2, numChar5[5],
numChar6[4], extension[4];
int result = sscanf(outputFileName,
"%c%c%c%c%4[0-9]%c%16[0-9]%c%1[0-9]%3[0-9]_%3[0-9]%4[.dat]",
&asciiChar1, &numChar1, &asciiChar2,
&numChar2, numChar3, &underscore1, numChar4, &underscore2,
numChar5, numChar6, extension);
// Debugging print statement
printf("Debug: sscanf result: %d\n", result);
printf("Debug: asciiChar1: %c\n", asciiChar1);
printf("Debug: numChar1: %c\n", numChar1);
printf("Debug: asciiChar2: %c\n", asciiChar2);
printf("Debug: numChar2: %c\n", numChar2);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: underscore1: %c\n", underscore1);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: underscore2: %c\n", underscore2);
printf("Debug: numChar5: %s\n", numChar5);
printf("Debug: numChar6: %s\n", numChar6);
printf("Debug: extension: %s\n", extension);
// Check if the extracted values match the expected format
if (result != 12 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
!isalpha(asciiChar2) || !isdigit(numChar2) ||
strlen(numChar3) != 4 || !isNumeric(numChar3) ||
strlen(numChar4) != 16 || !isNumeric(numChar4) ||
strlen(numChar5) != 4 || !isNumeric(numChar5) ||
strlen(numChar6) != 3 || !isNumeric(numChar6) ||
strlen(extension) != 3 || strcmp(extension, ".dat") != 0) {
printf("Error: Output file format is incorrect.\n");
return 1;
}
// If all checks pass, the output file format is correct
printf("Output file format is correct.\n");
return 0;
}
命令行输入:
.\check.exe inputfile L2A30000_0102051303042026_0001.dat
这是我得到的输出:
Debug: sscanf result: 9
...
Debug: numChar5: 0001
Debug: extension:
Error: Output file format is incorrect.
这是我期待的输出:
Debug: extension:.dat
这部分不起作用。其他部分都OK。想要检查文件名中的
extension
是否为 .dat
。如果没有,它将打印错误消息并退出。
我建议您在格式字符串中引入一些额外的空格并按照以下方式匹配参数:
int result = sscanf(outputFileName,
"%c%c"
"%c%c"
"%4[0-9]"
"%c"
"%16[0-9]"
"%c" // underscore2
"%1[0-9]"
"%3[0-9]_%3[0-9]%4[.dat]",
&asciiChar1, &numChar1,
&asciiChar2, &numChar2,
numChar3,
&underscore1,
numChar4,
&underscore2,
numChar5,
numChar6,
extension
);
所以我们已经完成了第二个下划线。然后期望一个数字 (
char numChar5[5]
),但与变量的大小不匹配。然后再加 3 个数字 (char numChat6[4]
) 就可以了。然后是输入中不存在的第三个下划线。还有 3 个没有匹配参数的数字。 “%4[.dat]”,这会导致缓冲区溢出,因为扩展变量是 char extension[4]
。总共 12 个格式指令和 11 个参数,这是未定义的行为。
您可以通过对固定字符串进行硬编码来简化它:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
int isNumeric(const char *str) {
for(; isdigit(*str); str++);
return !*str;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
const char *outputFileName = argv[2];
char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5], numChar4[17], numChar5[5], extension[4];
int result = sscanf(outputFileName,
"%c%c"
"%c%c"
"%4[0-9]"
"_"
"%16[0-9]"
"_"
"%4[0-9]"
".dat",
&asciiChar1, &numChar1,
&asciiChar2, &numChar2,
numChar3,
numChar4,
numChar5
);
printf("Debug: sscanf result: %d\n", result);
printf("Debug: asciiChar1: %c\n", asciiChar1);
printf("Debug: numChar1: %c\n", numChar1);
printf("Debug: asciiChar2: %c\n", asciiChar2);
printf("Debug: numChar2: %c\n", numChar2);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: numChar5: %s\n", numChar5);
if (result != 7 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
!isalpha(asciiChar2) || !isdigit(numChar2) ||
strlen(numChar3) != 4 || !isNumeric(numChar3) ||
strlen(numChar4) != 16 || !isNumeric(numChar4) ||
strlen(numChar5) != 4 || !isNumeric(numChar5)
) {
printf("Error: Output file format is incorrect.\n");
return 1;
}
printf("Output file format is correct.\n");
return 0;
}
示例运行:
./a.out input_file L2A30000_0102051303042026_0001.dat
Debug: sscanf result: 7
Debug: asciiChar1: L
Debug: numChar1: 2
Debug: asciiChar2: A
Debug: numChar2: 3
Debug: numChar3: 0000
Debug: numChar4: 0102051303042026
Debug: numChar5: 0001
Output file format is correct.
另一种方法是仅解析文件名
is_valid_format()
,可以通过一个小解释器is_valid_format2()
:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
const char *alpha(const char *s) {
if(!s) return NULL;
if(!isalpha(*s)) return NULL;
return s + 1;
}
const char *digits(const char *s, size_t n) {
if(!s) return NULL;
for(size_t i = 0; i < n; i++)
if(!isdigit(s[i])) return NULL;
return s + n;
}
const char *str(const char *s, const char *s2) {
if(!s) return NULL;
size_t n = strlen(s2);
if(strncmp(s, s2, n)) return NULL;
return s + n;
}
int is_valid_filename(const char *s) {
s = alpha(s);
s = digits(s, 1);
s = alpha(s);
s = digits(s, 5);
s = str(s, "_");
s = digits(s, 16);
s = str(s, "_");
s = digits(s, 4);
s = str(s, ".dat");
return s && !*s;
}
int is_valid_filename2(const char *s) {
struct {
enum { ALPHA, DIGITS, STR } type;
union {
int n;
const char *s;
};
} format[] = {
{ ALPHA },
{ DIGITS, .n = 1 },
{ ALPHA },
{ DIGITS, .n = 5 },
{ STR, .s = "_" },
{ DIGITS, .n = 16 },
{ STR, .s = "_" },
{ DIGITS, .n = 4 },
{ STR, .s = ".dat" },
};
size_t n = sizeof format / sizeof *format;
for(size_t i = 0; s && i < n; i++) {
switch(format[i].type) {
case ALPHA:
s = alpha(s);
break;
case DIGITS:
s = digits(s, format[i].n);
break;
case STR:
s = str(s, format[i].s);
break;
}
}
return s && !*s;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
char *result[] = { "invalid", "valid" };
printf("%s\n", result[is_valid_filename(argv[2])]);
printf("%s\n", result[is_valid_filename2(argv[2])]);
}