我们有一个用 C++ 编写的 Windows 应用程序,用于执行音频录制。
录制过程是通过检测操作系统级别的麦克风活动来启动的,从而创建两个单独的原始文件 - 每个音频通道一个。
一个通道捕获麦克风输入,另一个通道记录扬声器输出,形成双通道录音。
录音停止后,这两个文件将合并为一个立体声文件,其中一个通道专用于计算机上的本地用户,另一个通道专用于来自远程扬声器的传入音频。
目前,该应用程序可以有效运行,但缺乏音频预处理和回声消除功能。
此缺陷导致麦克风从扬声器拾取音频,并将其添加到麦克风通道。
此问题不影响扬声器通道。显然,使用耳机时不会出现此问题,因为在这种情况下麦克风听不到扬声器发出的任何声音。
与我们的应用程序不同,Microsoft Teams、Zoom、Skype、Viber 和 WhatsApp for Windows 等标准通信平台采用的机制可防止麦克风捕获扬声器的音频输出,从而避免远程参与者可能听到自己的声音回响的反馈循环.
我需要您帮助将此功能添加到我们现有的应用程序中,使麦克风能够隔离和消除扬声器中的噪音和声音,确保它仅在电话会议期间记录本地用户的音频。
我正在附上麦克风录音的代码。 我必须添加专业处理,我们可以在下面的代码中回显取消:
// TODO:将回声消除算法应用于 pData
bool AudioCapture::CaptureMicrophoneAudio(const wchar_t* outputFilePath)
{
// Activate audio client
IAudioClient* pAudioClient = nullptr;
HRESULT hr = pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
if (FAILED(hr)) {
logger->Log("Error activating audio client: ");
pDevice->Release();
//CoUninitialize();
return false;
}
// Get mix format
WAVEFORMATEX* pWaveFormat = nullptr;
hr = pAudioClient->GetMixFormat(&pWaveFormat);
if (FAILED(hr)) {
logger->Log("Error getting mix format: ");
pAudioClient->Release();
pDevice->Release();
//CoUninitialize();
return false;
}
logger->Log("WAVEFORMATEX: nChannels - " + std::to_string(pWaveFormat->nChannels));
logger->Log("WAVEFORMATEX: nSamplesPerSec - " + std::to_string(pWaveFormat->nSamplesPerSec));
logger->Log("WAVEFORMATEX: wBitsPerSample - " + std::to_string(pWaveFormat->wBitsPerSample));
logger->Log("WAVEFORMATEX: wFormatTag - " + std::to_string(pWaveFormat->wFormatTag));
logger->Log("WAVEFORMATEX: cbSize - " + std::to_string(pWaveFormat->cbSize));
logger->Log("WAVEFORMATEX: nAvgBytesPerSec - " + std::to_string(pWaveFormat->nAvgBytesPerSec));
logger->Log("WAVEFORMATEX: nBlockAlign - " + std::to_string(pWaveFormat->nBlockAlign));
nMicChannels = pWaveFormat->nChannels;
nMicSamplesPerSec = pWaveFormat->nSamplesPerSec;
wMicBitsPerSample = pWaveFormat->wBitsPerSample;
nMicAvgBytesPerSec = pWaveFormat->nAvgBytesPerSec;
// Initialize audio client with the mix format
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, 0, 0, pWaveFormat, NULL);
if (FAILED(hr)) {
logger->Log("Error initializing audio client: ");
CoTaskMemFree(pWaveFormat);
pAudioClient->Release();
pDevice->Release();
//CoUninitialize();
return false;
}
// Get capture client
IAudioCaptureClient* pCaptureClient = nullptr;
hr = pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
if (FAILED(hr)) {
logger->Log("Error getting capture client: ");
CoTaskMemFree(pWaveFormat);
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
return false;
}
// Open binary file for writing
std::string tempFilePath = Utils::GetTempFilename("Audio_mic_capture", "raw");
logger->Log(tempFilePath);
std::ofstream outFile(tempFilePath, std::ios::binary);
if (!outFile.is_open()) {
logger->Log("Error opening binary file for writing");
CoTaskMemFree(pWaveFormat);
pCaptureClient->Release();
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
return false;
}
// Start capturing
hr = pAudioClient->Start();
if (FAILED(hr))
{
logger->Log("Error starting audio client: ");
CoTaskMemFree(pWaveFormat);
pCaptureClient->Release();
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
return false;
}
// Main loop for capturing and writing to file
while (!exitFlag)
{
// Capture audio data
// Read audio data from pCaptureClient
BYTE* pData;
UINT32 numFramesAvailable;
DWORD flags;
hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
if (FAILED(hr))
{
logger->Log("Error getting audio buffer: ");
break;
}
//
// TODO: Apply echo cancellation algorithm to pData
//
// Write audio data to file
int count = numFramesAvailable * pWaveFormat->nBlockAlign;
outFile.write(reinterpret_cast<const char*>(pData), count);
// Release the buffer
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
if (FAILED(hr))
{
logger->Log("Error releasing audio buffer: ");
break;
}
}
logger->Log("Capturing Completed");
// Close binary file
outFile.close();
// Stop capturing
pAudioClient->Stop();
// Clean up resources
CoTaskMemFree(pWaveFormat);
pCaptureClient->Release();
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
// convert to mp3 and delete temp file
std::string tempFilePathMp3 = Utils::GetTempFilename("Audio_capture_mic", "mp3");
tempMicFilePathWMp3 = std::wstring(tempFilePathMp3.begin(), tempFilePathMp3.end());
std::wstring tempFilePathW(tempFilePath.begin(), tempFilePath.end());
if (!convertToMp3(tempFilePathW, tempMicFilePathWMp3, nMicChannels, nMicSamplesPerSec, wMicBitsPerSample, nMicAvgBytesPerSec))
{
logger->Log("Failed to convert to MP3");
}
else
{
logger->Log("Saved to MP3 data");
}
// Remove temp file
std::remove(tempFilePath.c_str());
return true;
}
非常感谢:)
寻找您的专业知识以获取任何参考。
尝试使用具有 AEC 功能的虚拟麦克风/扬声器。例如,您可以使用 SoliCall Pro (https://solicall.com/solicall-pro/)。它生成一个新的虚拟麦克风和扬声器。不要忘记打开其 AEC 算法。在您的应用程序中开始使用这些新的麦克风/扬声器。