我使用WebSocket接收浏览器捕获的音频流数据。
如果我使用默认英语,一切看起来都很好,但是当我尝试切换语言时,它几乎无法识别任何单词。
相关代码如下
...
const speechConfig = SpeechConfig.fromSubscription(
process.env.AZURE_SPEECH_SERVICE_KEY || '',
process.env.AZURE_SPEECH_SERVICE_REGION || '',
)
speechConfig.speechRecognitionLanguage = 'zh-CN'
console.log(speechConfig.speechRecognitionLanguage) // log zh-CN
wss.on('connection', (ws) => {
const audioPushStream = PushAudioInputStream.create()
const audioConfig = AudioConfig.fromStreamInput(audioPushStream)
const speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig)
speechRecognizer.recognizing = (s, e) => {
if (e.result.reason == ResultReason.RecognizingSpeech) {
logger.info(`RECOGNIZING: Text=${e.result.text}`)
} else if (e.result.reason === ResultReason.NoMatch) {
logger.info(
'Recognizing NOMATCH: Speech could not be recognized: ' +
e.result.errorDetails
)
} else {
throw new Error('Unexpected result reason')
}
}
...
speechRecognizer.startContinuousRecognitionAsync()
}
info: NOMATCH: Speech could not be recognized: undefined
info: NOMATCH: Speech could not be recognized: undefined
info: NOMATCH: Speech could not be recognized: undefined
info: NOMATCH: Speech could not be recognized: undefined
感谢您联系我们并报告此问题。
我使用了下面的代码,它对我来说效果很好。
请注意,我是从 master 的认知服务语音-sdk/samples/csharp/dotnet-windows/console处的 C# 示例解决方案运行的。
请注意将 SpeechRecognitionLanguage 设置为
zh-CN
的行。 (默认为en-US
)。
public static async Task SpeechRecognitionWithCompressedInputPushStreamAudio()
{
// <recognitionWithCompressedInputPushStreamAudio>
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("*****", "eastus2");
config.SpeechRecognitionLanguage = "zh-CN";
var stopRecognition = new TaskCompletionSource<int>(TaskCreationOptions.RunContinuationsAsynchronously);
using (var pushStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetCompressedFormat(AudioStreamContainerFormat.ANY)))
{
using (var audioInput = AudioConfig.FromStreamInput(pushStream))
{
// Creates a speech recognizer using audio stream input.
using (var recognizer = new SpeechRecognizer(config, audioInput))
{
// Subscribes to events.
recognizer.Recognizing += (s, e) =>
{
Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}");
};
recognizer.Recognized += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
}
else if (e.Result.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
}
};
recognizer.Canceled += (s, e) =>
{
Console.WriteLine($"CANCELED: Reason={e.Reason}");
if (e.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you update the subscription info?");
}
stopRecognition.TrySetResult(0);
};
recognizer.SessionStarted += (s, e) =>
{
Console.WriteLine("\nSession started event.");
};
recognizer.SessionStopped += (s, e) =>
{
Console.WriteLine("\nSession stopped event.");
Console.WriteLine("\nStop recognition.");
stopRecognition.TrySetResult(0);
};
// Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
using (BinaryAudioStreamReader reader = new BinaryReader(File.OpenRead(@"1.wav")))
{
byte[] buffer = new byte[1000];
while (true)
{
var readSamples = reader.Read(buffer, (uint)buffer.Length);
if (readSamples == 0)
{
break;
}
pushStream.Write(buffer, readSamples);
}
}
pushStream.Close();
// Waits for completion.
// Use Task.WaitAny to keep the task rooted.
Task.WaitAny(new[] { stopRecognition.Task });
// Stops recognition.
await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
}
}
}
// </recognitionWithCompressedInputPushStreamAudio>
}
希望这有帮助。