我使用一个结构体来读取带有 58 字节 wav 标头的音频格式 3(=IEEE FLOAT 32 位)的 Wav 文件。问题:字节 38 以“事实”块开头。这似乎没问题。 DwFactSize 的偏移量应该为 42。问题就开始了!偏移量是44!首先我认为问题出在填充位上。所以我尝试了 uints、unsigned chars 和 attribute((packed))。这不会改变任何事情。
这是wav头的定义:
0 - 3 'RIFF'/'RIFX' 小/大端
4 - 7 wRiffLength 文件长度减去 8 字节 riff 标头
8 - 11 '波浪'
12 - 15 'fmt'
16 - 19 wFmtSize 格式块的长度减去 8 字节标头
20 - 21 wFormatTag 识别 PCM、ULAW 等
22 - 23 个频道
每通道每秒 24 - 27 个 dwSamplesPerSecond 样本
28 - 31 dwAvgBytesPerSec 对于压缩格式来说并不简单
32 - 33 wBlockAlign 基本块大小
34 - 35 wBitsPerSample 对于压缩格式而言并非易事
(直到字节 35,就像通常的 44 字节标头一样)
36 - 37 wExtSize = 0 格式扩展的长度
38 - 41“事实”
42 - 45 dwFactSize = 4 事实块的长度减去 8 字节 标题
46 - 49 dwSamplesWritten 实际写出的样本数
50 - 53“数据”
54 - 57 dwDataLength 数据块的长度减去 8 字节标头
...
直到这里...正确...
事实[4] 38
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
DwFactSize 44
dwSamplesWritten 48
数据[4] 52
dw数据长度56
快速破解显示偏移问题:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <cstring>
#include <iostream>
#include <cstddef>
typedef struct WAV_HEADER
{
unsigned char RIFF[4]; /* RIFF Header */ //Magic header
unsigned long ChunkSize; /* RIFF Chunk Size */
unsigned char WAVE[4]; /* WAVE Header */
unsigned char fmt[4]; /* FMT header */
unsigned long Subchunk1Size; /* Size of the fmt chunk: 16=PCM, 18=IEEE Float, 40=Extensible */
unsigned short AudioFormat; /* Audio format 1=PCM, 3=IEEE Float, 6=mulaw,7=alaw, 257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM, 65534=Extensible */
unsigned short NumOfChan; /* Number of channels 1=Mono 2=Stereo */
unsigned long SamplesPerSec; /* Sampling Frequency in Hz */
unsigned long bytesPerSec; /* bytes per second */
unsigned short blockAlign; /* 2=16-bit mono, 4=16-bit stereo , 6=24-bit stereo,8=32-bit stereo*/
unsigned short bitsPerSample; /* Number of bits per sample, inkl. hier=36 bytes */
unsigned short wExtSize; /* 2 byte, wExtSize = 0 the length of the format extension */
unsigned char Fact[4]; /* "fact" string */
/*Problem here! -> +2bytes*/
unsigned long DwFactSize; /* Sampled data length, inkl. hier bei 44 bytes */
unsigned long dwSamplesWritten;
unsigned char Data[4]; /* leerer string falls extensible -> Beginn nicht nach 44 sondern 68! */
unsigned long dwDataLength; /* raw DataLength */
}__attribute__((packed)) wav_hdr;
int getFileSize(FILE *inFile);
int main(int argc,char *argv[])
{
char *file;
file = argv[1];
wav_hdr wavHeader;
FILE *wavFile;
int headerSize = sizeof(wav_hdr),filelength = 0;
wavFile = fopen(file,"r");
if(wavFile == NULL)
{
printf("\nCan not open wave file. Usage: program [file] \n");
exit(EXIT_FAILURE);
}
fread(&wavHeader,headerSize,1,wavFile);
filelength = getFileSize(wavFile);
fclose(wavFile);
std::cout << "\nRIFF " << offsetof(WAV_HEADER, RIFF) << std::endl;
std::cout << "\nChunkSize " << offsetof(WAV_HEADER, ChunkSize) << std::endl;
std::cout << "\nWAVE[4] " << offsetof(WAV_HEADER, WAVE) << std::endl;
std::cout << "\nfmt[4] " << offsetof(WAV_HEADER, fmt) << std::endl;
std::cout << "\nSubchunk1Size " << offsetof(WAV_HEADER, Subchunk1Size) << std::endl;
std::cout << "\nAudioFormat " << offsetof(WAV_HEADER, AudioFormat) << std::endl;
std::cout << "\nNumOfChan " << offsetof(WAV_HEADER, NumOfChan) << std::endl;
std::cout << "\nSamplesPerSec " << offsetof(WAV_HEADER, SamplesPerSec) << std::endl;
std::cout << "\nbytesPerSec " << offsetof(WAV_HEADER, bytesPerSec) << std::endl;
std::cout << "\nblockAlign " << offsetof(WAV_HEADER, blockAlign) << std::endl;
std::cout << "\nbitsPerSample " << offsetof(WAV_HEADER, bitsPerSample) << std::endl;
std::cout << "\nwExtSize (2) " << offsetof(WAV_HEADER, wExtSize) << std::endl;
std::cout << "\nFact[4] " << offsetof(WAV_HEADER, Fact) << std::endl;
std::cout << "\nDwFactSize " << offsetof(WAV_HEADER, DwFactSize) << std::endl;
std::cout << "\ndwSamplesWritten " << offsetof(WAV_HEADER, dwSamplesWritten) << std::endl;
std::cout << "\nData[4] " << offsetof(WAV_HEADER, Data) << std::endl;
std::cout << "\ndwDataLength " << offsetof(WAV_HEADER, dwDataLength) << std::endl;
return 0;
}
int getFileSize(FILE *inFile)
{
int fileSize = 0;
fseek(inFile,0,SEEK_END);
fileSize=ftell(inFile);
fseek(inFile,0,SEEK_SET);
return fileSize;
}
要生成这样一个 32 位浮点型、88200 Wav 文件: sox 输入16_44100.wav -b 32 -e 浮点输出32F_88200.wav 速率-s -a -v -L 88200
显然,
Fact[]
和 DwFactSize
之间有 2 个字节的填充。
我怀疑这是因为你在那里使用了
typedef
。只需将其写为常规 C++ 定义即可:struct __attribute__((packed)) WAV_HEADER { ...
尝试WAVE文件格式的结构,它有RIFF header,fmt sub-chunk和data sub-chunk....
typedef struct WAV_FORMAT_T{
// RIFF header
char chunkID[4]; // Contains the letters "RIFF"
int chunkSize; // This is the size of the entire file in bytes minus 8 bytes for the two fields not included in this count
char format[4]; // Contains the letters "WAVE"
// fmt sub-chunk
char subchunk1[4]; // Contains the letters "fmt "
int subchunk1Size; // 16 for PCM
short audioFormat; // PCM = 1
short numChannels; // Mono = 1, Stereo = 2
int sampleRate; // 8000, 16000, 44100, etc.
int byteRate; // sampleRate * numChannels * Bytes per sample
short blockAlign; // numChannels * Bytes per sample
short bitsPerSample; // The number of bits (1 byte = 8 bits) for one sample including all channels
// data sub-chunk
char subChunk2[4]; // Contains the letters "data"
int subChunk2Size; // Num of Samples * numChannels * Bytes per sample
char bytes[]; // The actual sound data (temporarily use 1 bytes)
} WAV_FORMAT;
假设音频波形文件以 25 fps(每帧 40 毫秒)和立体声通道录制。此外,采样率为 16000 Hz,每个样本的位数为 16 位(2 字节)。然后,通过C程序解析波形头格式
#include < stdio.h >
#include < stdlib.h >
#include < string.h >
#define NUM_FRAME 16000
#define SAMPLE_PER_FRAME 640 // 16000/25 = 640, 25 fps
#define NUM_CHANEL 2
#define BYTE_PER_SAMPLE 2
#define WAVE_SIZE_PER_FRAME SAMPLE_PER_FRAME*BYTE_PER_SAMPLE*BYTE_PER_SAMPLE
// Signal parameter structure
typedef struct SIGNAL_PARA_T {
int sampleSize;
} SIGNAL_PARA;
// Default Value for signal parameter
SIGNAL_PARA signal_para = { SAMPLE_PER_FRAME };
int main(int argc, char **argv) {
FILE *wav_list = NULL;
FILE *fp = NULL;
int frame_num = 0;
WAV_FORMAT wav_chunk;
// Static Memory
char wav_test_case[200];
char wav_per_frame[WAVE_SIZE_PER_FRAME];
// Open Wav file
wav_list = fopen(("wav_test_case.txt"), "rb");
// Protection on Reading file
if (wav_list == NULL) {
printf("Error opening file");
return (-1);
}
// Read Wav file - char * fgets(char* str, int Max num of char, FILE* stream), one test case
if (fgets(wav_test_case, sizeof(wav_test_case), wav_list) != NULL) {
fp = fopen(wav_test_case, "rb");
// Protection on Reading file
if (wav_list == NULL) {
printf("Can't opening wav file");
return (-1);
}
// Parsing WAV FORMAT
fread(&wav_chunk, 1, sizeof(WAV_FORMAT), fp);
// Read fmt sub-chunk
printf("fmt sub-chunk: %.3s \n", wav_chunk.subchunk1);
// Read data sub-chunk
printf("data sub-chunk: %.4s \n", wav_chunk.subChunk2);
//Print the Format of Wav
printf("numChannels = %d \n", wav_chunk.numChannels);
printf("sampleRate = %d \n", wav_chunk.sampleRate);
printf("byteRate = %d \n", wav_chunk.byteRate);
printf("bitsPerSample = %d \n", wav_chunk.bitsPerSample);
printf("sample_alignment (numChannels * bitsPerSample) = %d \n",
wav_chunk.blockAlign);
printf("audio_format = %s \n",
wav_chunk.audioFormat ? "PCM" : "IEEE Float");
/***** Frame Process *****/
for (frame_num = 0; frame_num < NUM_FRAME; frame_num++) {
// Read samples
result = fread(wav_per_frame, wav_chunk.blockAlign, signal_para.sampleSize, fp);
printf("Frame = %d \n", frame_num);
/***************************/
/* Doing Signal Process per Frame */
/***************************/
} /***** End of Frame Process *****/
fclose(wav_list);
return 0;
}
输出:
fmt sub-chunk: fmt
data sub-chunk: data
numChannels = 1
sampleRate = 16000
byteRate = 32000
bitsPerSample = 16
sample_alignment (numChannels * bitsPerSample) = 2
audio_format = PCM
Frame = 0
...