为什么 sscanf 无法正确提取此字符串格式中的所有数字?

问题描述 投票:0回答:1

我正在解析 GNSS 数据,我使用的模块 QuecTel BG96 有 UART 串行接口。当尝试提取与消息相关的数据时,除了时间之外,大部分数据都可以使用

sscanf
提取。

这是一个示例消息,与下面的代码中使用的相同:

"\r\n+QGPSLOC: 010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0.0,
060224,03\r\n\r\nOK\r\n"

为了完整起见,这里是字段的解释,如手册中所示:

+QGPSLOC:<UTC>,<latitude>,<longitude>,<hdop>,<altit ude>,<fix>,<cog>,<spkm>,<spkn>,<date>,<nsat> OK  

  • UTC 是世界标准时间。 格式:hhmmss.sss(引自GPGGA句子)。

问题在于第一组数字,表示时间为 hhmmss.sss,提取后始终为 0

输出

30960206-010842.00,       32.04415,         5.31028,      1.7, 
55.0, 0002,       150.0060,         0.0000, 00000003

代码

#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

const char* utc_datetime_format =
    "%02u%02u%02u-%02u%02u%02u.%03u";
const char* print_format =
    "%18.18s, %14.5f, %15.5f, %8.1f, %10.1f, %04u, %14.4f, "
    "%14.4f, %08u\n";

typedef struct
{
    uint64_t year : 12;
    uint64_t month : 4;
    uint64_t day : 8;
    uint64_t hour : 8;
    uint64_t minutes : 8;
    uint64_t seconds : 8;
    uint64_t msec : 16;
} DateAndTime_Compressed;

typedef struct
{
    float latitude;
    float longitude;
} GNSS_Coordinates;

typedef struct
{
    GNSS_Coordinates       coordinates;
    float                  horizontal_precision;
    float                  altitude;
    float                  course_over_ground;
    float                  speed_over_ground_mps;  // m/s
    DateAndTime_Compressed utc_time;
    uint8_t                fix_type;
    uint8_t                num_satellites;
} GNSS_Data;

int GNSS_ParseData(char* buf, GNSS_Data* data)
{
    const char* format =
        "%s %2" PRIu16 "%2" PRIu16 "%2" PRIu16 ".%3" PRIu16
        ",%f,%f,%f,%f,%" PRIu8 ",%3" PRIu16 ".%2" PRIu16
        ",%f,%f,%2" PRIu8 "%2" PRIu8 "%2" PRIu8 ",%" PRIu32
        "%s";
    char front_padding[128] = {0}, back_padding[128] = {0};

    uint16_t year, month, day, hour, minute, second,
        millisecond, cog_deg, cog_min;
    float tmp;

    // For some reason cannot extract hhmmss.sss from
    // message directly
    int ret = sscanf(
        buf, format, front_padding, &hour, &minute, &second,
        &millisecond, &(data->coordinates.latitude),
        &(data->coordinates.longitude),
        &data->horizontal_precision, &data->altitude,
        &data->fix_type, &cog_deg, &cog_min,
        &data->speed_over_ground_mps, &tmp, &day, &month,
        &year, &data->num_satellites, back_padding);
    if (ret != 19)
    {
        // Handle Error (but no error occurs here)
        return -1;
    }
    data->utc_time = (DateAndTime_Compressed){
        .year    = year,
        .month   = month,
        .day     = day,
        .hour    = hour,
        .minutes = minute,
        .seconds = second,
        .msec    = millisecond};

    data->speed_over_ground_mps *=
        (1000.0f / 3600.0f);  // kph to mps
    data->course_over_ground =
        (float)cog_deg + (((float)cog_min) / 60.0f) /
                             100.0f;  // ddd.mm to ddd.dd

    return 0;
}

int main(int argc, char** argv)
{
    char msg[] =
        "\r\n+QGPSLOC: "
        "010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0."
        "0,060224,03\r\n\r\nOK\r\n";

    GNSS_Data gnss_data = {0};

    int ret = GNSS_ParseData(
        msg,
        &gnss_data);  // ret = 0 but data.utc_time wrong
                      // (dates are correct, times wrong)
    if (ret != 0)
    {
        printf("Failed to parse data\n");
        exit(-1);
    }
    char utc_date_string[64] = {0};
    snprintf(
        utc_date_string, 32, utc_datetime_format,
        gnss_data.utc_time.year, gnss_data.utc_time.month,
        gnss_data.utc_time.day, gnss_data.utc_time.hour,
        gnss_data.utc_time.minutes,
        gnss_data.utc_time.seconds,
        gnss_data.utc_time.msec);

    printf(
        print_format, utc_date_string,
        gnss_data.coordinates.latitude,
        gnss_data.coordinates.longitude,
        gnss_data.horizontal_precision, gnss_data.altitude,
        gnss_data.fix_type, gnss_data.course_over_ground,
        gnss_data.speed_over_ground_mps,
        gnss_data.num_satellites);

    return 0;
}
c scanf
1个回答
0
投票

sscanf()
的格式字符串中有两种类型的错误:

  1. 您使用
    PRN...
    而不是
    SCN...
    。前者适用于
    printf()
    家族,而您需要使用后者。请参阅下文了解差异。
  2. 格式说明符使用的宽度与变量的宽度不匹配。

所以正确的格式是:

    const char* format =
        "%s %2" SCNu16 "%2" SCNu16 "%2" SCNu16 ".%3" SCNu16
        ",%f,%f,%f,%f,%" SCNu8 ",%3" SCNu16 ".%2" SCNu16
        ",%f,%f,%2" SCNu16 "%2" SCNu16 "%2" SCNu16 ",%" SCNu8
        "%s";

注意:请始终使用编译器的最大警告标志集。


现在,为什么某些时间变量会得到零?

为此,我们假设您在当前系统上使用标准编译器,因此我在 Windows 10 上使用 GCC 进行调查。

这个小测试程序显示了格式常量之间的差异:

#include <inttypes.h>
#include <stdio.h>

int main(void) {
  printf("PRIu16 = \"%s\"\n", PRIu16);
  printf("SCNu16 = \"%s\"\n", SCNu16);
  uint16_t canary1 = 0x1122, value, canary2 = 0x5566;
  // 0x3344 = 13124
  printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
  printf("canary1 @%p = %04X\n", (void*)&canary1, canary1);
  printf("value   @%p = %04X\n", (void*)&value, value);
  printf("canary2 @%p = %04X\n", (void*)&canary2, canary2);
  return 0;
}

使用一组通用警告标志进行编译已经告诉我们出现了问题:

> gcc -Wall -pedantic formats.c -o formats.exe
formats.c: In function 'main':
formats.c:9:45: warning: format '%u' expects argument of type 'unsigned int *', but argument 3 has type 'uint16_t *' {aka 'short unsigned int *'} [-Wformat=]
   printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
                                             ^~~         ~~~~~~
In file included from formats.c:1:
C:/Program Files/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/x86_64-w64-mingw32/include/inttypes.h:92:17: note: format string is defined here
 #define PRIu16 "u"
formats.c:9:45: warning: format '%u' expects argument of type 'unsigned int *', but argument 3 has type 'uint16_t *' {aka 'short unsigned int *'} [-Wformat=]
   printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
                                             ^~~         ~~~~~~
In file included from formats.c:1:
C:/Program Files/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/x86_64-w64-mingw32/include/inttypes.h:92:17: note: format string is defined here
 #define PRIu16 "u"

结果再现了您的观察结果,

canary1
归零:

> formats.exe
PRIu16 = "u"
SCNu16 = "hu"
sscanf() = 1
canary1 @000000000061FE1E = 0000
value   @000000000061FE1C = 3344
canary2 @000000000061FE1A = 5566

我们看到

PRIu16
实际上是
"u"
sscanf()
用于写入
unsigned int
。在我们的系统上,它通常与
uint32_t
具有相同的宽度。所以
sscanf()
高兴地写入了4个字节,覆盖了
canary1

哪个变量被覆盖取决于变量在内存中的顺序以及值的存储顺序。标准没有定义这些顺序/顺序,所以我在两侧使用了两个金丝雀。

这解释了您观察到的零。


但是当

PRIu16
unsigned int
相关联时,为什么
SCNu16
unsigned short int
相关联?

秘密在于可变参数函数上整数参数的“整数提升”。 printf() 就是这样一个可变参数函数。

如果您为 

uint16_t

指定

printf()
值,它将转换为
int
。由于
int
通常足以保存
uint16_t
的所有潜在值,因此转换结果不是
unsigned int
。但是,在
uint16_t
的值范围内,两种类型的位模式是相同的。
所以

printf()

需要
"u"才能正确打印这些值。
另一方面,

scanf()

不得写入超出所提供变量的空间。所以它

需要
知道宽度,从而导致"hu"
    

© www.soinside.com 2019 - 2024. All rights reserved.