我尝试使用 WASAPI 进行环回录音,但与捕获的原始音频相比,录制的 WAV 音频的音质明显较低。声音明显不同,我一直无法找到解决此问题的解决方案。
// Record.h
#include <windows.h>
#include <mmdeviceapi.h>
#include <audioclient.h>
#include <iostream>
#include <fstream>
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
// 定义WAV文件头部结构
struct WAVHeader {
char chunkID[4]; // 文件标识,通常为"RIFF"
uint32_t chunkSize; // 文件大小
char format[4]; // 文件格式,“WAVE”
char subchunk1ID[4]; // 子块标识,“fmt”
uint32_t subchunk1Size; // 子块大小
uint16_t audioFormat; // 音频格式,1为PCM格式
uint16_t numChannels; // 声道数,1:单声道,2:双声道
uint32_t sampleRate; // 采样率
uint32_t byteRate; // 每秒的字节数
uint16_t blockAlign; // 块对齐
uint16_t bitsPerSample; // 采样深度
char subchunk2ID[4]; // 子块标识,“data”
uint32_t subchunk2Size; // 子块大小
};
// 音频数据结构
struct WAVData {
struct WAVHeader header; // 音频头部数据
uint8_t* sample; // 音频数据
};
class WAVWriter {
public:
WAVWriter(const char* filename, WAVEFORMATEX* pwfx)
: file(filename, std::ios::binary), totalDataSize(0) {
if (!file.is_open()) {
std::cerr << "Failed to open file for writing\n";
return;
}
// 初始化 WAV 头
memcpy(wavData.header.chunkID, "RIFF", 4);
memcpy(wavData.header.format, "WAVE", 4);
memcpy(wavData.header.subchunk1ID, "fmt ", 4);
wavData.header.subchunk1Size = 16;
wavData.header.audioFormat = 1;
memcpy(wavData.header.subchunk2ID, "data", 4);
// 暂时写入空白头,稍后更新
file.write(reinterpret_cast<const char*>(&wavData.header), sizeof(wavData.header));
}
void updateHeader(WAVEFORMATEX* pwfx) {
wavData.header.numChannels = pwfx->nChannels;
wavData.header.sampleRate = pwfx->nSamplesPerSec;
wavData.header.bitsPerSample = pwfx->wBitsPerSample;
wavData.header.byteRate = pwfx->nSamplesPerSec * pwfx->nChannels * pwfx->wBitsPerSample / 8;
wavData.header.blockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
}
void WriteData(BYTE* pData, UINT32 dataSize) {
if (!file.is_open()) return;
file.write(reinterpret_cast<const char*>(pData), dataSize);
totalDataSize += dataSize;
}
void Finalize() {
if (!file.is_open()) return;
// 填写 WAV 头的最终大小信息
wavData.header.chunkSize = 36 + totalDataSize; // 36 是 WAV 头部的大小减去文件头的前8字节
wavData.header.subchunk2Size=totalDataSize;
wavData.sample = new uint8_t[totalDataSize];
// 回到文件开始位置,写入更新后的头
file.seekp(0, std::ios::beg);
file.write(reinterpret_cast<const char*>(&wavData.header), sizeof(wavData.header));
std::cout << std::string(wavData.header.chunkID, 4) << " chunkID\n";
std::cout << wavData.header.chunkSize << " chunkSize\n";
std::cout << std::string(wavData.header.format, 4) << " format\n";
std::cout << std::string(wavData.header.subchunk1ID, 4) << " subchunk1ID\n";
std::cout << wavData.header.subchunk1Size << " subchunk1Size\n";
std::cout << wavData.header.audioFormat << " audioFormat\n";
std::cout << wavData.header.numChannels << " numChannels\n";
std::cout << wavData.header.sampleRate << " sampleRate\n";
std::cout << wavData.header.byteRate << " byteRate\n";
std::cout << wavData.header.blockAlign << " blockAlign\n";
std::cout << wavData.header.bitsPerSample << " bitsPerSample\n";
std::cout << std::string(wavData.header.subchunk2ID, 4) << " subchunk2ID\n";
std::cout << wavData.header.subchunk2Size << " subchunk2Size\n";
file.close();
}
private:
std::ofstream file;
WAVData wavData;
uint32_t totalDataSize;
};
class MyAudioSink {
public:
MyAudioSink(const char* outputFile) : wavWriter(outputFile, nullptr) {}
void SetFormat(WAVEFORMATEX* pwfx) {
std::cout << "Audio format set: " << pwfx->nChannels << " channels, "
<< pwfx->nSamplesPerSec << " Hz\n";
wavWriter = WAVWriter("output.wav", pwfx); // 初始化 WAV 文件写入
}
void updateHeader(WAVEFORMATEX* pwfx) {
wavWriter.updateHeader(pwfx);
}
HRESULT CopyData(BYTE* pData, UINT32 numFramesAvailable, WAVEFORMATEX* pwfx, BOOL* bDone) {
std::cout << "Received " << numFramesAvailable << " frames\n";
if (numFramesAvailable > 0) {
if (pData != NULL) {
// 计算数据的字节数
UINT32 dataSize = numFramesAvailable * pwfx->nBlockAlign; // 根据音频格式动态计算
wavWriter.WriteData(pData, dataSize);
}
else {
// pData 为 NULL,处理静音情况
std::cout << "Received NULL pData, writing silence.\n";
UINT32 dataSize = numFramesAvailable * pwfx->nBlockAlign; // 计算需要写入的字节数
BYTE* silenceData = new BYTE[dataSize]; // 动态分配内存用于静音数据
memset(silenceData, 0, dataSize); // 用零填充静音数据
wavWriter.WriteData(silenceData, dataSize); // 写入静音数据
delete[] silenceData; // 释放内存
}
}
else {
std::cout << "No frames available to process.\n";
}
return S_OK;
}
void Finalize() {
wavWriter.Finalize(); // 录制完成后更新文件头
}
private:
WAVWriter wavWriter;
};
#include <windows.h>
#include <mmdeviceapi.h>
#include <audioclient.h>
#include <iostream>
#include <fstream>
#include "Record.h"
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
HRESULT RecordAudioStream(MyAudioSink* pMySink) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BOOL bDone = FALSE;
BYTE* pData;
DWORD flags;
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void**)&pEnumerator);
EXIT_ON_ERROR(hr)
// 获取默认的音频渲染设备
hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
// hr = pEnumerator->GetDefaultAudioEndpoint(eCapture, eConsole, &pDevice);
EXIT_ON_ERROR(hr)
std::cout << pDevice;
hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&pAudioClient);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr)
// 初始化为共享模式,启用环回
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
//hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, hnsRequestedDuration, 0, pwfx, NULL);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetService(IID_IAudioCaptureClient, (void**)&pCaptureClient);
EXIT_ON_ERROR(hr)
pMySink->SetFormat(pwfx);
hnsActualDuration = (double)REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start recording.
EXIT_ON_ERROR(hr)
pMySink->updateHeader(pwfx);
while (!bDone) {
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
while (packetLength != 0) {
hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
EXIT_ON_ERROR(hr)
if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
hr = pMySink->CopyData(pData, numFramesAvailable, pwfx, &bDone);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
}
if (GetAsyncKeyState(VK_RETURN)) {
bDone = TRUE; // 停止录制
}
}
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr)
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator)
SAFE_RELEASE(pDevice)
SAFE_RELEASE(pAudioClient)
SAFE_RELEASE(pCaptureClient)
return hr;
}
int main() {
CoInitialize(NULL);
MyAudioSink audioSink("output.wav");
HRESULT hr = RecordAudioStream(&audioSink);
audioSink.Finalize();
CoUninitialize();
return hr == S_OK ? 0 : 1;
}
我尝试使用WASAPI进行环回录音,希望捕获的WAV音频能够保持与原始音源相同的质量。然而,生成的音频质量显着下降,清晰度和保真度存在明显差异,这是我没有预料到的。
您录制的音频实际上非常好。只是 WAV 标头不知何故弄乱了。刚刚使用您的代码进行了测试,然后将 output.wav 重命名为 output.raw 并将其加载到 Audacity 或任何允许您加载原始音频文件并手动选择如何解释它的音频编辑软件中。就我而言,这给了我一个干净的 32 位浮点、立体声 48khz 音频文件。
我没有进一步深入了解 wav 标头有什么问题。可能会在需要 WAVEFORMATEXTENSIBLE 的地方编写 WAVEFORMATEX,反之亦然,或者只是错误地填写了某些字段。