// Audio Preprocessing Implementation
const audioContext = new AudioContext();
const mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
const source = audioContext.createMediaStreamSource(mediaStream);
// Dynamic gain adjustment
const gainNode = audioContext.createGain();
gainNode.gain.value = 5;
const analyser = audioContext.createAnalyser();
analyser.fftSize = 512;
source.connect(analyser);
// Dynamic gain adjustment based on input levels
function adjustMicGain(): void {
const buffer = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(buffer);
const avgVolume = buffer.reduce((a, b) => a + b, 0) / buffer.length;
if (avgVolume < 5) {
gainNode.gain.value = Math.min(gainNode.gain.value + 1.0, 15);
} else if (avgVolume < 15) {
gainNode.gain.value = Math.min(gainNode.gain.value + 0.8, 12);
} else if (avgVolume < 25) {
gainNode.gain.value = Math.min(gainNode.gain.value + 0.5, 10);
} else if (avgVolume < 35) {
gainNode.gain.value = Math.min(gainNode.gain.value + 0.3, 8);
} else if (avgVolume > 45) {
gainNode.gain.value = Math.max(gainNode.gain.value - 0.2, 4);
}
requestAnimationFrame(adjustMicGain);
}
adjustMicGain();
// Bandpass filter for noise reduction
const biquadFilter = audioContext.createBiquadFilter();
biquadFilter.type = "bandpass";
biquadFilter.frequency.setValueAtTime(2500, audioContext.currentTime);
biquadFilter.Q.setValueAtTime(1.5, audioContext.currentTime);
const destination = audioContext.createMediaStreamDestination();
source.connect(gainNode).connect(biquadFilter).connect(destination);
const audioConfig = SpeechSDK.AudioConfig.fromStreamInput(destination.stream);
问题
我的网络音频API实现正确处理这些问题吗? 在处理时通常用于提高语音识别质量的其他技术/解决方案:Microphone输入 低音频 背景噪音 跳过/错过的音频内容在语音识别应用中,有更好的替代网络音频API替代方案吗?
环境
speech识别:Azure Speek SDK 音频输入:麦克风和扬声器音频
在给定代码中的dynamic增益调整逻辑经过深思熟虑,但是增益的变化可能仍然太突然了,并且可以在低容量条件下放大噪声。
即将通过步骤直接调整增益,考虑使用指数坡道进行
室调整。
gainNode.gain.setTargetAtTime(targetGain, audioContext.currentTime, 0.1);
MediaRecorder
const audioContext = new AudioContext();
const mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
const source = audioContext.createMediaStreamSource(mediaStream);
const gainNode = audioContext.createGain();
gainNode.gain.value = 5;
const highpassFilter = audioContext.createBiquadFilter();
highpassFilter.type = "highpass";
highpassFilter.frequency.setValueAtTime(100, audioContext.currentTime);
const bandpassFilter = audioContext.createBiquadFilter();
bandpassFilter.type = "bandpass";
bandpassFilter.frequency.setValueAtTime(1000, audioContext.currentTime);
bandpassFilter.Q.setValueAtTime(0.9, audioContext.currentTime);
const analyser = audioContext.createAnalyser();
analyser.fftSize = 256;
function adjustMicGain() {
const buffer = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteTimeDomainData(buffer);
const avgVolume = buffer.reduce((a, b) => a + b, 0) / buffer.length;
let targetGain = gainNode.gain.value;
if (avgVolume < 50) targetGain = Math.min(gainNode.gain.value + 0.5, 10);
else if (avgVolume > 200) targetGain = Math.max(gainNode.gain.value - 0.5, 2);
gainNode.gain.setTargetAtTime(targetGain, audioContext.currentTime, 0.1);
requestAnimationFrame(adjustMicGain);
}
adjustMicGain();
const destination = audioContext.createMediaStreamDestination();
source
.connect(highpassFilter)
.connect(bandpassFilter)
.connect(gainNode)
.connect(analyser)
.connect(destination);
const audioConfig = SpeechSDK.AudioConfig.fromStreamInput(destination.stream);
[Wed Feb 19 2025 15:48:30 GMT+0530 (India Standard Time)] "GET /" 200 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
[Wed Feb 19 2025 15:48:30 GMT+0530 (India Standard Time)] "GET /favicon.ico" 200 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
[Wed Feb 19 2025 15:48:30 GMT+0530 (India Standard Time)] "GET /index.js" 200 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
[Wed Feb 19 2025 15:49:27 GMT+0530 (India Standard Time)] "GET /" 200 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
[Wed Feb 19 2025 15:49:27 GMT+0530 (India Standard Time)] "GET /favicon.ico" 200 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
[Wed Feb 19 2025 15:49:27 GMT+0530 (India Standard Time)] "GET /index.js" 200 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"