如何在 C 中将 csv 文件读入结构体

问题描述 投票:0回答:1

我正在尝试读取以下格式的 CSV 文件:

imdb_id,title,plot,type,rated,year,released_at,added_at,runtime,genre,director,writer,actors,language,country,awards,imdb_rating,imdb_votes

0,tt0147800,10 Things I Hate About You,"A pretty,popular teenager can't go out on a date until her ill-tempered older sister does.",movie,PG-13,1999,31 Mar 1999,"November 12, 2019",97 min,"Comedy, Drama, Romance",Gil Junger,"Karen McCullah, Kirsten Smith","Heath Ledger, Julia Stiles, Joseph Gordon-Levitt, Larisa Oleynik","English, French",USA,2 wins & 13 nominations.,7.3,"283,945"
2,tt0115433,101 Dalmatians,"An evil high-fashion designer plots to steal Dalmatian puppies in order to make an extravagant fur coat, but instead creates an extravagant mess.",movie,G,1996,27 Nov 1996,"November 12, 2019",103 min,"Adventure, Comedy, Crime, Family",Stephen Herek,"Dodie Smith (novel), John Hughes (screenplay)","Glenn Close, Jeff Daniels, Joely Richardson,Joan Plowright","English, Spanish","USA, UK",Nominated for 1 Golden Globe. Another 3 wins &9 nominations.,5.7,"97,785"

并将每一列放入以下结构中:

typedef struct{

    char imdb_id[12];
    char title[50];
    char plot[MAX];
    char type[15];
    char rated[10];
    int year;
    char released_at[50];
    char added_at[MAX];
    char runtime[50];
    char genre[MAX];
    char director[50];
    char writer[MAX];
    char actors[MAX];
    char language[MAX];
    char country[20];
    char awards[50];
    float imdb_rating;
    char imdb_votes[MAX];


} order;

我尝试了以下代码,但第二行国家/地区没有显示。

FILE *f=fopen(file,"r");

    if ( f == NULL)
    {
        printf("Empty\n");
        return 1;
    }


    while (!feof(f))
    {
        int read = fscanf(f, "%11s,%49[^,],\"%2047[^\"]\",%14[^,],%9[^,],%i,%49[^,],\"%2047[^\"]\",%49[^,],\"%2047[^\"]\",%49[^,],\"%2047[^\"]\",\"%2047[^\"]\",\"%2047[^\"]\",%19[^,],%49[^,],%f,\"%2047[^\"]\"\n",
                           or[line].imdb_id,
                           or[line].title,
                           or[line].plot,
                           or[line].type,
                           or[line].rated,
                           & or[line].year,
                           or[line].released_at,
                           or[line].added_at,
                           or[line].runtime,
                           or[line].genre,
                           or[line].director,
                           or[line].writer,
                           or[line].actors,
                           or[line].language,
                           or[line].country,
                           or[line].awards,
                           & or[line].imdb_rating,
                           or[line].imdb_votes);

        line++;
    }

    fclose(f);

我认为问题在于这个语法 -> %19[^,]。

但我不知道该怎么做,因为在第一个 csv 行中,国家/地区只是美国,但第二行是“美国、英国”

arrays c csv scanf
1个回答
0
投票

我建议你读一行并解析它。虽然您可以使用

sscanf()
来解析输出,但您必须至少有条件地处理文本字段。如果您想支持带引号的字段中双引号的转义,则必须切换到其他内容(即如下所示的更通用的解析器):

#define _POSIX_C_SOURCE 200112L
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX 256

typedef struct{
    char imdb_id[12];
    char title[50];
    char plot[MAX];
    char type[15];
    char rated[10];
    int year;
    char released_at[50];
    char added_at[MAX];
    char runtime[50];
    char genre[MAX];
    char director[50];
    char writer[MAX];
    char actors[MAX];
    char language[MAX];
    char country[20];
    char awards[50];
    float imdb_rating;
    char imdb_votes[MAX];
} order;

const char *sep(const char *l, char sep) {
    if(!l || *l != sep) return NULL;
    return l + 1;
}

const char *float_field(const char *l, float *f) {
    if(!l) return NULL;
    char *endptr;
    *f = strtof(l, &endptr);
    if(l == endptr) {
        fprintf(stderr, "float not found %.32s\n", l);
        return NULL;
    }
    if(*f == HUGE_VAL || *f == HUGE_VALF || *f == HUGE_VALL) {
        fprintf(stderr, "float out of range %.32s", l);
        return NULL;
    }
    return endptr;
}

const char *int_field(const char *l, int *i) {
    if(!l) return NULL;
    char *endptr;
    long tmp = strtol(l, &endptr, 10);
    if(l == endptr) {
        fprintf(stderr, "int not found %.32s\n", l);
        return NULL;
    }
    if(tmp < INT_MIN || tmp > INT_MAX) {
        fprintf(stderr, "int out of range %.32s", l);
        return NULL;
    }
    *i = tmp;
    return endptr;
}

const char *text_field(const char *l, size_t n, char field[n]) {
    if(!l) return NULL;
    assert(n > 0);
    if(!*l) return l;
    const char sep = *l == '"' ? '"' : ',';
    if(sep == '"') l++;
    const char *end = strchr(l, sep);
    if(!end) {
        if(sep == ',')
            end = l + strlen(l) + 1;
        else {
            fprintf(stderr, "end quote missing %.32s\n", l);
            return NULL;
        }
    }
    if(end - l > n - 1) {
        fprintf(stderr, "truncating %.*s\n", n - 1, l);
        n--;
    } else
        n = end - l;
    memcpy(field, l, n);
    field[n] = '\0';
    return end + (sep == '"');
}

int main() {
    const char *path = "input.csv";
    FILE *f = fopen(path, "r");
    if(!f) {
        perror(path);
        return 1;
    }
    char *lineptr = NULL;
    size_t n;
    int rv = 0;
    for(size_t i = 0;; i++) {
        int rv = getline(&lineptr, &n, f);
        if(rv == -1)
            break;
        if(i < 2) continue; // ignore header

        order o;
        const char *next = lineptr;
        next = int_field(next, &(int) {0}); // ignore
        next = sep(next, ',');
        next = text_field(next, 12, o.imdb_id);
        next = sep(next, ',');
        next = text_field(next, 50, o.title);
        next = sep(next, ',');
        next = text_field(next, MAX, o.plot);
        next = sep(next, ',');
        next = text_field(next, 15, o.type);
        next = sep(next, ',');
        next = text_field(next, 10, o.rated);
        next = sep(next, ',');
        next = int_field(next, &o.year);
        next = sep(next, ',');
        next = text_field(next, 50, o.released_at);
        next = sep(next, ',');
        next = text_field(next, MAX, o.added_at);
        next = sep(next, ',');
        next = text_field(next, 50, o.runtime);
        next = sep(next, ',');
        next = text_field(next, MAX, o.genre);
        next = sep(next, ',');
        next = text_field(next, 50, o.director);
        next = sep(next, ',');
        next = text_field(next, MAX, o.writer);
        next = sep(next, ',');
        next = text_field(next, MAX, o.actors);
        next = sep(next, ',');
        next = text_field(next, MAX, o.language);
        next = sep(next, ',');
        next = text_field(next, 20, o.country);
        next = sep(next, ',');
        next = text_field(next, 50, o.awards);
        next = sep(next, ',');
        next = float_field(next, &o.imdb_rating);
        next = sep(next, ',');
        next = text_field(next, MAX, o.imdb_votes);
        next = sep(next, '\n');
        if(!next) {
            fprintf(stderr, "failed to parse line %.32s", lineptr);
            rv = 1;
            goto out;
        }
        printf(
            "imdb_id: %s\n"
            "title: %s\n"
            "plot: %s\n"
            "type: %s\n"
            "rated: %s\n"
            "year: %d\n"
            "released_at: %s\n"
            "added_at: %s\n"
            "runtime: %s\n"
            "genre: %s\n"
            "director: %s\n"
            "writer: %s\n"
            "actors: %s\n"
            "language: %s\n"
            "country: %s\n"
            "awards: %s\n"
            "imdb_rating: %f\n"
            "imdb_votes: %s\n"
            "\n",
            o.imdb_id,
            o.title,
            o.plot,
            o.type,
            o.rated,
            o.year,
            o.released_at,
            o.added_at,
            o.runtime,
            o.genre,
            o.director,
            o.writer,
            o.actors,
            o.language,
            o.country,
            o.awards,
            o.imdb_rating,
            o.imdb_votes
        );
    }
out:
    free(lineptr);
    fclose(f);
    return rv;
}

和示例运行:

imdb_id: tt0147800
title: 10 Things I Hate About You
plot: A pretty,popular teenager can't go out on a date until her ill-tempered older sister does.
type: movie
rated: PG-13
year: 1999
released_at: 31 Mar 1999
added_at: November 12, 2019
runtime: 97 min
genre: Comedy, Drama, Romance
director: Gil Junger
writer: Karen McCullah, Kirsten Smith
actors: Heath Ledger, Julia Stiles, Joseph Gordon-Levitt, Larisa Oleynik
language: English, French
country: USA
awards: 2 wins & 13 nominations.
imdb_rating: 7.300000
imdb_votes: 283,945

imdb_id: tt0115433
title: 101 Dalmatians
plot: An evil high-fashion designer plots to steal Dalmatian puppies in order to make an extravagant fur coat, but instead creates an extravagant mess.
type: movie
rated: G
year: 1996
released_at: 27 Nov 1996
added_at: November 12, 2019
runtime: 103 min
genre: Adventure, Comedy, Crime, Family
director: Stephen Herek
writer: Dodie Smith (novel), John Hughes (screenplay)
actors: Glenn Close, Jeff Daniels, Joely Richardson,Joan Plowright
language: English, Spanish
country: USA, UK
awards: Nominated for 1 Golden Globe. Another 3 wins &9 n
imdb_rating: 5.700000
imdb_votes: 97,785

stderr 会告诉您奖项字段正在被截断:

truncating Nominated for 1 Golden Globe. Another 3 wins &9 n
© www.soinside.com 2019 - 2024. All rights reserved.