Azure 语音转文本 - 自动检测语言不起作用

问题描述 投票:0回答:1

我正在使用 NextJS 开发一个项目,我需要通过语言检测来实现连续的语音到文本。虽然我已成功为单一语言设置了语音转文本,但我仍在努力让自动语言检测发挥作用。文档似乎有限,我似乎无法弄清楚我做错了什么。

按照文档应该这样实现(source

var autoDetectSourceLanguageConfig = SpeechSDK.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "de-DE"]); var speechRecognizer = SpeechSDK.SpeechRecognizer.FromConfig(speechConfig, autoDetectSourceLanguageConfig, audioConfig);

这是我的组件的一部分:

    useEffect(() => {
      const fetchTokenAndSetupRecognizer = async () => {
        const tokenObj = await getTokenOrRefresh();
        if (tokenObj.authToken && tokenObj.region) {
          audioConfig.current = AudioConfig.fromDefaultMicrophoneInput();

          const autoDetectLanguages = [
            "en-US",
            "de-DE"
          ];
          speechConfig.current = SpeechConfig.fromAuthorizationToken(
            tokenObj.authToken,
            tokenObj.region
          );
          const autoDetectConfig =
            AutoDetectSourceLanguageConfig.fromLanguages(autoDetectLanguages);

          audioConfig.current = AudioConfig.fromDefaultMicrophoneInput();
          recognizer.current = SpeechRecognizer.FromConfig(
            speechConfig.current,
            autoDetectConfig,
            audioConfig.current
          );
          recognizer.current.recognized = (s, e) =>
            processRecognizedTranscript(e);
          recognizer.current.canceled = (s, e) => handleCanceled(e);
        }
        setIsDisabled(!recognizer.current);
      };
      fetchTokenAndSetupRecognizer();
      return () => {
        recognizer.current?.close();
      };
    }, []);

我搜索了这里、文档和存储库,但 React/JavaScript 的示例和信息有限

javascript azure azure-cognitive-services speech-to-text azure-speech
1个回答
0
投票

我尝试了您的代码,但在使用 Azure 语音 SDK 在 Azure 语音转文本中实现自动语言检测时遇到了问题。

要启用语言识别,您应该使用这样的代码。

Const autoDetectConfig = sdk. AutoDetectSourceLanguageConfig.fromLanguages(["en-US","de-DE","zh-CN"]);
Const recognizer = new sdk. SpeechRecognizer(config, audioConfig, autoDetectConfig);

下面的代码使用 Azure 语音 SDK 从音频文件中识别语音,该代码取自 MSDOCgit

const sdk = require('microsoft-cognitiveservices-speech-sdk');
const fs = require('fs');
require('dotenv').config();

const subscriptionKey =process.env.AZURE_SPEECH_KEY; 
const serviceRegion = process.env.AZURE_SpeechRegion;

class AutoDetectSourceLanguageResult {
    constructor(language, confidence) {
        this.privLanguage = language;
        this.privLanguageDetectionConfidence = confidence;
    }

    static fromResult(result) {
        return new AutoDetectSourceLanguageResult(result.language, result.languageDetectionConfidence);
    }

    get language() {
        return this.privLanguage;
    }

    get languageDetectionConfidence() {
        return this.privLanguageDetectionConfidence;
    }
}

async function recognitionWithMicrophone() {
    const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
    const config = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
    const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "zh-CN"]);
    const recognizer = new sdk.SpeechRecognizer(config, audioConfig,autoDetectConfig);

    recognizer.recognizeOnceAsync(result => {
        if (result.reason === sdk.ResultReason.RecognizedSpeech) {
            const languageResult = AutoDetectSourceLanguageResult.fromResult(result);
            var detectedLanguage = languageDetectionResult.language;
            console.log(`RECOGNIZED: Text=${result.text}`);

            console.log(`DETECTED: Language=${detectedLanguage}; (Confidence: ${languageResult.languageDetectionConfidence})`);
        } else if (result.reason === sdk.ResultReason.NoMatch) {
            console.log("NOMATCH: Speech could not be recognized.");
        } else if (result.reason === sdk.ResultReason.Canceled) {
            const cancellation = sdk.CancellationDetails.fromResult(result);
            console.log(`CANCELED: Reason=${cancellation.reason}`);
            if (cancellation.reason === sdk.CancellationReason.Error) {
                console.log(`CANCELED: ErrorCode=${cancellation.errorCode}`);
                console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
            }
        }
    });
}

async function multiLingualRecognitionWithAudioFile() {
    const audioFilePath = "console_en-us_zh-cn.wav";

    console.log(`Attempting to access audio file at: ${audioFilePath}`);

    if (!fs.existsSync(audioFilePath)) {
        console.error(`Error: Audio file '${audioFilePath}' not found.`);
        return;
    }

    try {
        const audioData = fs.readFileSync(audioFilePath);
        const audioConfig = sdk.AudioConfig.fromWavFileInput(audioData);
        const config = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);

        const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "zh-CN"]);
        const recognizer = new sdk.SpeechRecognizer(config, audioConfig);

        recognizer.recognizing = (s, e) => {
            if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
                const languageResult = AutoDetectSourceLanguageResult.fromResult(e.result);
                console.log(`RECOGNIZING: Text=${e.result.text}`);
                console.log(`DETECTED: Language=${languageResult.language} (Confidence: ${languageResult.languageDetectionConfidence})`);
            }
        };

        recognizer.recognized = (s, e) => {
            if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
                const languageResult = AutoDetectSourceLanguageResult.fromResult(e.result);
                console.log(`RECOGNIZED: Text=${e.result.text}`);
                console.log(`DETECTED: Language=${languageResult.language} (Confidence: ${languageResult.languageDetectionConfidence})`);
            } else if (e.result.reason === sdk.ResultReason.NoMatch) {
                console.log("NOMATCH: Speech could not be recognized.");
            }
        };

        recognizer.canceled = (s, e) => {
            console.log(`CANCELED: Reason=${e.reason}`);
            if (e.reason === sdk.CancellationReason.Error) {
                console.log(`CANCELED: ErrorCode=${e.errorCode}`);
                console.log(`CANCELED: ErrorDetails=${e.errorDetails}`);
            }
            recognizer.stopContinuousRecognitionAsync();
        };

        recognizer.sessionStarted = (s, e) => {
            console.log("\n    Session started event.");
        };

        recognizer.sessionStopped = (s, e) => {
            console.log("\n    Session stopped event.");
            recognizer.stopContinuousRecognitionAsync();
        };

        await recognizer.startContinuousRecognitionAsync();
    } catch (error) {
        console.error("Error while initializing speech recognizer:", error);
    }
}

async function main() {
    console.log("Starting Speech Recognition Samples...");
    try {
        await Promise.all([
            recognitionWithMicrophone(),
            multiLingualRecognitionWithAudioFile()
        ]);
    } catch (err) {
        console.error("Error occurred:", err);
    }
}

main().catch(err => {
    console.error("Error occurred:", err);
});

输出:

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.