Struct C/C++ - 读取 WAV 标头 32 位浮点 - 字节偏移错误

问题描述 投票:0回答:2

我使用一个结构体来读取带有 58 字节 wav 标头的音频格式 3(=IEEE FLOAT 32 位)的 Wav 文件。问题:字节 38 以“事实”块开头。这似乎没问题。 DwFactSize 的偏移量应该为 42。问题就开始了!偏移量是44!首先我认为问题出在填充位上。所以我尝试了 uints、unsigned chars 和 attribute((packed))。这不会改变任何事情。

这是wav头的定义:

0 - 3 'RIFF'/'RIFX' 小/大端

4 - 7 wRiffLength 文件长度减去 8 字节 riff 标头

8 - 11 '波浪'

12 - 15 'fmt'

16 - 19 wFmtSize 格式块的长度减去 8 字节标头

20 - 21 wFormatTag 识别 PCM、ULAW 等

22 - 23 个频道

每通道每秒 24 - 27 个 dwSamplesPerSecond 样本

28 - 31 dwAvgBytesPerSec 对于压缩格式来说并不简单

32 - 33 wBlockAlign 基本块大小

34 - 35 wBitsPerSample 对于压缩格式而言并非易事

(直到字节 35,就像通常的 44 字节标头一样)

36 - 37 wExtSize = 0 格式扩展的长度

38 - 41“事实”

42 - 45 dwFactSize = 4 事实块的长度减去 8 字节 标题

46 - 49 dwSamplesWritten 实际写出的样本数

50 - 53“数据”

54 - 57 dwDataLength 数据块的长度减去 8 字节标头


输出:

...

直到这里...正确...

事实[4] 38

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

DwFactSize 44

dwSamplesWritten 48

数据[4] 52

dw数据长度56

快速破解显示偏移问题:

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <cstring>
#include <iostream>
#include <cstddef>

typedef struct  WAV_HEADER
{
unsigned char       RIFF[4];        /* RIFF Header      */ //Magic header
unsigned long       ChunkSize;      /* RIFF Chunk Size  */
unsigned char       WAVE[4];        /* WAVE Header      */
unsigned char       fmt[4];         /* FMT header       */
unsigned long       Subchunk1Size;  /* Size of the fmt chunk: 16=PCM, 18=IEEE Float, 40=Extensible                     */
unsigned short      AudioFormat;    /* Audio format 1=PCM, 3=IEEE Float, 6=mulaw,7=alaw, 257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM, 65534=Extensible */
unsigned short      NumOfChan;      /* Number of channels 1=Mono 2=Stereo                   */
unsigned long       SamplesPerSec;  /* Sampling Frequency in Hz                             */
unsigned long       bytesPerSec;    /* bytes per second */
unsigned short      blockAlign;     /* 2=16-bit mono, 4=16-bit stereo , 6=24-bit stereo,8=32-bit stereo*/
unsigned short      bitsPerSample;  /* Number of bits per sample, inkl. hier=36 bytes  */
unsigned short      wExtSize;       /*  2 byte, wExtSize = 0  the length of the format extension   */
unsigned char       Fact[4];        /* "fact"  string   */
/*Problem here! -> +2bytes*/
unsigned long       DwFactSize;     /* Sampled data length,  inkl. hier bei 44 bytes */
unsigned long       dwSamplesWritten;
unsigned char       Data[4];        /* leerer string falls extensible -> Beginn nicht nach 44 sondern 68!  */
unsigned long       dwDataLength;   /* raw DataLength */
}__attribute__((packed)) wav_hdr;


int getFileSize(FILE *inFile);

int main(int argc,char *argv[])
{
char *file;
file = argv[1];
wav_hdr wavHeader;
FILE *wavFile;
int headerSize = sizeof(wav_hdr),filelength = 0;
wavFile = fopen(file,"r");
if(wavFile == NULL)
{
    printf("\nCan not open wave file. Usage: program [file] \n");
    exit(EXIT_FAILURE);
}

fread(&wavHeader,headerSize,1,wavFile);
filelength = getFileSize(wavFile);
fclose(wavFile);


std::cout << "\nRIFF " << offsetof(WAV_HEADER, RIFF) <<  std::endl;
std::cout << "\nChunkSize " << offsetof(WAV_HEADER, ChunkSize) <<  std::endl;
std::cout << "\nWAVE[4] " << offsetof(WAV_HEADER, WAVE) <<  std::endl;
std::cout << "\nfmt[4] " << offsetof(WAV_HEADER, fmt) <<  std::endl;
std::cout << "\nSubchunk1Size " << offsetof(WAV_HEADER, Subchunk1Size) <<  std::endl;
std::cout << "\nAudioFormat " << offsetof(WAV_HEADER, AudioFormat) <<  std::endl;
std::cout << "\nNumOfChan " << offsetof(WAV_HEADER, NumOfChan) <<  std::endl;
std::cout << "\nSamplesPerSec " << offsetof(WAV_HEADER, SamplesPerSec) <<  std::endl;
std::cout << "\nbytesPerSec " << offsetof(WAV_HEADER, bytesPerSec) <<  std::endl;
std::cout << "\nblockAlign " << offsetof(WAV_HEADER, blockAlign) <<  std::endl;
std::cout << "\nbitsPerSample " << offsetof(WAV_HEADER, bitsPerSample) <<  std::endl;
std::cout << "\nwExtSize (2) " << offsetof(WAV_HEADER, wExtSize) <<  std::endl;
std::cout << "\nFact[4] " << offsetof(WAV_HEADER, Fact) <<  std::endl;

std::cout << "\nDwFactSize " << offsetof(WAV_HEADER, DwFactSize) <<  std::endl;
std::cout << "\ndwSamplesWritten " << offsetof(WAV_HEADER, dwSamplesWritten) <<  std::endl;
std::cout << "\nData[4] " << offsetof(WAV_HEADER, Data) <<  std::endl;
std::cout << "\ndwDataLength " << offsetof(WAV_HEADER, dwDataLength) <<  std::endl;


return 0;
}

int getFileSize(FILE *inFile)
{
int fileSize = 0;
fseek(inFile,0,SEEK_END);
fileSize=ftell(inFile);
fseek(inFile,0,SEEK_SET);
return fileSize;
}

要生成这样一个 32 位浮点型、88200 Wav 文件: sox 输入16_44100.wav -b 32 -e 浮点输出32F_88200.wav 速率-s -a -v -L 88200

c++ wav
2个回答
1
投票

显然,

Fact[]
DwFactSize
之间有 2 个字节的填充。

我怀疑这是因为你在那里使用了

typedef
。只需将其写为常规 C++ 定义即可:
struct __attribute__((packed)) WAV_HEADER { ...


0
投票

尝试WAVE文件格式的结构,它有RIFF header,fmt sub-chunk和data sub-chunk....

typedef struct WAV_FORMAT_T{
    // RIFF header
    char chunkID[4]; // Contains the letters "RIFF"
    int chunkSize;   // This is the size of the entire file in bytes minus 8 bytes for the two fields not included in this count
    char format[4];  // Contains the letters "WAVE"

    // fmt sub-chunk
    char subchunk1[4];   // Contains the letters "fmt "
    int subchunk1Size;   // 16 for PCM
    short audioFormat;   // PCM = 1 
    short numChannels;   // Mono = 1, Stereo = 2
    int sampleRate;      // 8000, 16000, 44100, etc.
    int byteRate;        //  sampleRate * numChannels * Bytes per sample
    short blockAlign;    // numChannels * Bytes per sample
    short bitsPerSample; // The number of bits (1 byte = 8 bits) for one sample including all channels

    // data sub-chunk
    char subChunk2[4]; // Contains the letters "data"
    int subChunk2Size; // Num of Samples * numChannels * Bytes per sample 
    char bytes[];      // The actual sound data (temporarily use 1 bytes)
} WAV_FORMAT;

假设音频波形文件以 25 fps(每帧 40 毫秒)和立体声通道录制。此外,采样率为 16000 Hz,每个样本的位数为 16 位(2 字节)。然后,通过C程序解析波形头格式

#include < stdio.h >
#include < stdlib.h >
#include < string.h >

#define NUM_FRAME 16000
#define SAMPLE_PER_FRAME 640 // 16000/25 = 640, 25 fps
#define NUM_CHANEL 2
#define BYTE_PER_SAMPLE 2
#define WAVE_SIZE_PER_FRAME  SAMPLE_PER_FRAME*BYTE_PER_SAMPLE*BYTE_PER_SAMPLE

// Signal parameter structure
typedef struct SIGNAL_PARA_T {
    int sampleSize;
} SIGNAL_PARA;

// Default Value for signal parameter
SIGNAL_PARA signal_para = { SAMPLE_PER_FRAME };

int main(int argc, char **argv) {
    FILE *wav_list = NULL;
    FILE *fp = NULL;
    int frame_num = 0;
    WAV_FORMAT wav_chunk;

    // Static Memory
    char wav_test_case[200];
    char wav_per_frame[WAVE_SIZE_PER_FRAME];

    // Open Wav file
    wav_list = fopen(("wav_test_case.txt"), "rb");

    // Protection on Reading file
    if (wav_list == NULL) {
       printf("Error opening file");
       return (-1);
    }

    // Read Wav file - char * fgets(char* str, int Max num of char, FILE* stream), one test case
    if (fgets(wav_test_case, sizeof(wav_test_case), wav_list) != NULL) {

    fp = fopen(wav_test_case, "rb");

    // Protection on Reading file
    if (wav_list == NULL) {
       printf("Can't opening wav file");
       return (-1);
    }

    // Parsing WAV FORMAT
    fread(&wav_chunk, 1, sizeof(WAV_FORMAT), fp);

    // Read fmt sub-chunk
    printf("fmt sub-chunk: %.3s \n", wav_chunk.subchunk1);

    // Read data sub-chunk
    printf("data sub-chunk: %.4s \n", wav_chunk.subChunk2);

    //Print the Format of Wav
    printf("numChannels = %d \n", wav_chunk.numChannels);
    printf("sampleRate = %d \n", wav_chunk.sampleRate);
    printf("byteRate = %d \n", wav_chunk.byteRate);
    printf("bitsPerSample = %d \n", wav_chunk.bitsPerSample);
    printf("sample_alignment (numChannels * bitsPerSample) = %d \n",
    wav_chunk.blockAlign);
    printf("audio_format = %s \n",
    wav_chunk.audioFormat ? "PCM" : "IEEE Float");

    /***** Frame Process *****/
    for (frame_num = 0; frame_num < NUM_FRAME; frame_num++) {
        // Read samples
        result = fread(wav_per_frame, wav_chunk.blockAlign, signal_para.sampleSize, fp); 
        printf("Frame = %d \n", frame_num);

        /***************************/
        /* Doing Signal Process per Frame */
        /***************************/

    } /***** End of Frame Process *****/
    fclose(wav_list);
    return 0;
}

输出:

fmt sub-chunk: fmt 
data sub-chunk: data 
numChannels = 1 
sampleRate = 16000 
byteRate = 32000 
bitsPerSample = 16 
sample_alignment (numChannels * bitsPerSample) = 2 
audio_format = PCM
Frame = 0
...
© www.soinside.com 2019 - 2024. All rights reserved.