我正在使用 NextJS 开发一个项目,我需要通过语言检测来实现连续的语音到文本。虽然我已成功为单一语言设置了语音转文本,但我仍在努力让自动语言检测发挥作用。文档似乎有限,我似乎无法弄清楚我做错了什么。
按照文档应该这样实现(source)
var autoDetectSourceLanguageConfig = SpeechSDK.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "de-DE"]); var speechRecognizer = SpeechSDK.SpeechRecognizer.FromConfig(speechConfig, autoDetectSourceLanguageConfig, audioConfig);
这是我的组件的一部分:
useEffect(() => {
const fetchTokenAndSetupRecognizer = async () => {
const tokenObj = await getTokenOrRefresh();
if (tokenObj.authToken && tokenObj.region) {
audioConfig.current = AudioConfig.fromDefaultMicrophoneInput();
const autoDetectLanguages = [
"en-US",
"de-DE"
];
speechConfig.current = SpeechConfig.fromAuthorizationToken(
tokenObj.authToken,
tokenObj.region
);
const autoDetectConfig =
AutoDetectSourceLanguageConfig.fromLanguages(autoDetectLanguages);
audioConfig.current = AudioConfig.fromDefaultMicrophoneInput();
recognizer.current = SpeechRecognizer.FromConfig(
speechConfig.current,
autoDetectConfig,
audioConfig.current
);
recognizer.current.recognized = (s, e) =>
processRecognizedTranscript(e);
recognizer.current.canceled = (s, e) => handleCanceled(e);
}
setIsDisabled(!recognizer.current);
};
fetchTokenAndSetupRecognizer();
return () => {
recognizer.current?.close();
};
}, []);
我搜索了这里、文档和存储库,但 React/JavaScript 的示例和信息有限
我尝试了您的代码,但在使用 Azure 语音 SDK 在 Azure 语音转文本中实现自动语言检测时遇到了问题。
要启用语言识别,您应该使用这样的代码。
Const autoDetectConfig = sdk. AutoDetectSourceLanguageConfig.fromLanguages(["en-US","de-DE","zh-CN"]);
Const recognizer = new sdk. SpeechRecognizer(config, audioConfig, autoDetectConfig);
下面的代码使用 Azure 语音 SDK 从音频文件中识别语音,该代码取自 MSDOC 和 git。
const sdk = require('microsoft-cognitiveservices-speech-sdk');
const fs = require('fs');
require('dotenv').config();
const subscriptionKey =process.env.AZURE_SPEECH_KEY;
const serviceRegion = process.env.AZURE_SpeechRegion;
class AutoDetectSourceLanguageResult {
constructor(language, confidence) {
this.privLanguage = language;
this.privLanguageDetectionConfidence = confidence;
}
static fromResult(result) {
return new AutoDetectSourceLanguageResult(result.language, result.languageDetectionConfidence);
}
get language() {
return this.privLanguage;
}
get languageDetectionConfidence() {
return this.privLanguageDetectionConfidence;
}
}
async function recognitionWithMicrophone() {
const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
const config = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "zh-CN"]);
const recognizer = new sdk.SpeechRecognizer(config, audioConfig,autoDetectConfig);
recognizer.recognizeOnceAsync(result => {
if (result.reason === sdk.ResultReason.RecognizedSpeech) {
const languageResult = AutoDetectSourceLanguageResult.fromResult(result);
var detectedLanguage = languageDetectionResult.language;
console.log(`RECOGNIZED: Text=${result.text}`);
console.log(`DETECTED: Language=${detectedLanguage}; (Confidence: ${languageResult.languageDetectionConfidence})`);
} else if (result.reason === sdk.ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
} else if (result.reason === sdk.ResultReason.Canceled) {
const cancellation = sdk.CancellationDetails.fromResult(result);
console.log(`CANCELED: Reason=${cancellation.reason}`);
if (cancellation.reason === sdk.CancellationReason.Error) {
console.log(`CANCELED: ErrorCode=${cancellation.errorCode}`);
console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
}
}
});
}
async function multiLingualRecognitionWithAudioFile() {
const audioFilePath = "console_en-us_zh-cn.wav";
console.log(`Attempting to access audio file at: ${audioFilePath}`);
if (!fs.existsSync(audioFilePath)) {
console.error(`Error: Audio file '${audioFilePath}' not found.`);
return;
}
try {
const audioData = fs.readFileSync(audioFilePath);
const audioConfig = sdk.AudioConfig.fromWavFileInput(audioData);
const config = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "zh-CN"]);
const recognizer = new sdk.SpeechRecognizer(config, audioConfig);
recognizer.recognizing = (s, e) => {
if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
const languageResult = AutoDetectSourceLanguageResult.fromResult(e.result);
console.log(`RECOGNIZING: Text=${e.result.text}`);
console.log(`DETECTED: Language=${languageResult.language} (Confidence: ${languageResult.languageDetectionConfidence})`);
}
};
recognizer.recognized = (s, e) => {
if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
const languageResult = AutoDetectSourceLanguageResult.fromResult(e.result);
console.log(`RECOGNIZED: Text=${e.result.text}`);
console.log(`DETECTED: Language=${languageResult.language} (Confidence: ${languageResult.languageDetectionConfidence})`);
} else if (e.result.reason === sdk.ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
}
};
recognizer.canceled = (s, e) => {
console.log(`CANCELED: Reason=${e.reason}`);
if (e.reason === sdk.CancellationReason.Error) {
console.log(`CANCELED: ErrorCode=${e.errorCode}`);
console.log(`CANCELED: ErrorDetails=${e.errorDetails}`);
}
recognizer.stopContinuousRecognitionAsync();
};
recognizer.sessionStarted = (s, e) => {
console.log("\n Session started event.");
};
recognizer.sessionStopped = (s, e) => {
console.log("\n Session stopped event.");
recognizer.stopContinuousRecognitionAsync();
};
await recognizer.startContinuousRecognitionAsync();
} catch (error) {
console.error("Error while initializing speech recognizer:", error);
}
}
async function main() {
console.log("Starting Speech Recognition Samples...");
try {
await Promise.all([
recognitionWithMicrophone(),
multiLingualRecognitionWithAudioFile()
]);
} catch (err) {
console.error("Error occurred:", err);
}
}
main().catch(err => {
console.error("Error occurred:", err);
});
输出: