如何为 Node JS 的 MS Azure 语音转文本服务启用字级置信度

问题描述 投票:0回答:1

根据 this,可以在 Azure STT 服务的 JSON 输出中获取每个单词的置信度。问题是我似乎无法找到如何使用 Node JS 库来做到这一点(或者是否可能)。

现有代码
目前我的 STT 类中有以下配置:

// Create the push audio input stream with the correct audio format
const audioFormat = speechSdk.AudioStreamFormat.getWaveFormatPCM(
    currentConfig.sampleRateHertz,
    16, // Bits per sample
    1   // Mono channel
);

this.audioStream = speechSdk.AudioInputStream.createPushStream(audioFormat);

// Configure the speech settings
this.speechConfig = speechSdk.SpeechConfig.fromSubscription(this.subscriptionKey, this.region);
this.speechConfig.speechRecognitionLanguage = currentConfig.language;
// MORE CONFIG
this.speechConfig.outputFormat = speechSdk.OutputFormat.Detailed;

// Create the audio config from the push stream
this.audioConfig = speechSdk.AudioConfig.fromStreamInput(this.audioStream);

// Create the speech recognizer
this.recognizer = new speechSdk.SpeechRecognizer(this.speechConfig, this.audioConfig);

然后我可以通过以下方式实时识别音频:

this.recognizer.recognizing = (s, e) => {
    console.debug("AzureProvider: JSON Result: ", e.results);

    if (e.result.reason === speechSdk.ResultReason.NoMatch) {
        console.warn("AzureProvider: No match found.");
        return;
    }

    let result = {
        text: e.result.text,
        score: e.result.confidence ? Math.round(e.result.confidence * 100) : 100,
    };

    if (this.results.length === this.maxResults) {
        this.results.shift();
    }

    this.results.push(result);
};

this.recognizer.recognized = (s, e) => {
    console.debug("AzureProvider: JSON Result: ", e.results);

    // Test if no match was found
    if (e.result.reason === speechSdk.ResultReason.NoMatch) {
        console.warn("AzureProvider: No final match found.");
        return;
    }

    let result = {
        text: this.postProcessTranscript(e.result.text),
        score: e.result.confidence ? Math.round(e.result.confidence * 100) : 100,
    };

    // Print to the console
    console.debug("AzureProvider: Final recognized result: ", result);
    this.emit('result', result);

    if (this.results.length === this.maxResults) {
        this.results.shift();
    }

    this.results.push(result);
};

尝试实现词级置信度
为了尝试启用单词级别的置信度,我尝试在上面添加以下配置(其中

MORE CONFIG
注释所在):

this.speechConfig.setProperty("SpeechServiceResponse_RequestWordLevelConfidence", "true");
this.speechConfig.setProperty("SpeechServiceResponse_RequestWordLevelTimestamps", "true");
this.speechConfig.setProperty("SpeechServiceResponse_NBestList", "5");

我还尝试添加以下内容:

this.speechConfig.setProperty("wordLevelConfidence", "true", speechSdk.ServicePropertyChannel.UriQueryParameter)

都没工作,我很茫然……

node.js azure speech-recognition speech-to-text azure-speech
1个回答
0
投票

通过对您的代码进行一些更改,我已成功检索了单词级置信度。我已经配置了

setProperty
,如下所示。

speechConfig.setProperty(
    speechSdk.PropertyId.SpeechServiceResponse_RequestWordLevelConfidence,
    "true"
);
speechConfig.setProperty(
    speechSdk.PropertyId.SpeechServiceResponse_RequestWordLevelTimestamps,
    "true"
);
  • 我使用
    fs.readFileSync
    加载
    .wav
    文件并通过
    AudioConfig.fromWavFileInput
    将其传递给识别器。
const  audioConfig = speechSdk.AudioConfig.fromWavFileInput(fs.readFileSync(audioFile));

app.js:

const speechSdk = require("microsoft-cognitiveservices-speech-sdk");
const fs = require("fs");

const subscriptionKey = '<speechKey>';  
const region = '<speeckRegion>';  
const audioFile = 'audio/SPkam.wav';

const speechConfig = speechSdk.SpeechConfig.fromSubscription(subscriptionKey, region);
speechConfig.speechRecognitionLanguage = "en-US";
speechConfig.outputFormat = speechSdk.OutputFormat.Detailed;

speechConfig.setProperty(
    speechSdk.PropertyId.SpeechServiceResponse_RequestWordLevelConfidence,
    "true"
);
speechConfig.setProperty(
    speechSdk.PropertyId.SpeechServiceResponse_RequestWordLevelTimestamps,
    "true"
);

if (!fs.existsSync(audioFile)) {
    console.error("Audio file not found:", audioFile);
    process.exit(1);
}
const audioConfig = speechSdk.AudioConfig.fromWavFileInput(fs.readFileSync(audioFile));
const recognizer = new speechSdk.SpeechRecognizer(speechConfig, audioConfig);
console.log("Starting speech recognition...");
recognizer.recognizing = (s, e) => {
};

recognizer.recognized = (s, e) => {
    if (e.result.reason === speechSdk.ResultReason.RecognizedSpeech) {
        console.log("Recognized:", e.result.text);
        const resultJson = e.result;
        try {
            const parsedJson = JSON.parse(resultJson.privJson);
            console.log("Corrected JSON:", parsedJson);
            console.log("Transcription:", parsedJson.DisplayText);
            parsedJson.NBest.forEach(bestResult => {
                const words = bestResult.Words || [];
                console.log("Word-level confidence:");
                if (words.length > 0) {
                    words.forEach((word, index) => {
                        console.log(`  Word ${index + 1}: "${word.Word}"`);
                        console.log(`    Confidence: ${bestResult.Confidence}`);
                    });
                } else {
                    console.log("  No words found in this result.");
                }
                console.log(`  Overall confidence for this transcription: ${bestResult.Confidence}`);
            });
        } catch (error) {
            console.error("Failed to parse JSON:", error);
        }
    } else if (e.result.reason === speechSdk.ResultReason.NoMatch) {
        console.warn("No match found.");
    }
};

recognizer.canceled = (s, e) => {
    console.error(`Recognition canceled: ${e.errorDetails}`);
    recognizer.close();
    process.exit(1);
};

recognizer.sessionStopped = (s, e) => {
    console.log("Session stopped.");
    recognizer.close();
    process.exit(0);
};
recognizer.startContinuousRecognitionAsync();

输出:

我成功获得了以下输出中的单词级别置信度。

enter image description here enter image description here enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.