我想要什么: 我想在 dotnet 6 中将音频从客户端流式传输到服务器。然后阅读并转录它。之后将文本发送回客户端。
到目前为止我得到了什么:
服务器代码:
public class AudioRecordingHub : Hub
{
private SpeechRecognizer recognizer;
private MemoryStream audioStream;
private bool isTranscribing = false;
public async Task SendAudio(string base64Audio)
{
byte[] audioBytes = Convert.FromBase64String(base64Audio);
await audioStream.WriteAsync(audioBytes);
if (!isTranscribing)
{
isTranscribing = true;
await StartTranscription();
}
}
public async Task StopTranscription()
{
if (recognizer != null)
{
await recognizer.StopContinuousRecognitionAsync();
isTranscribing = false;
}
}
private async Task StartTranscription()
{
var speechConfig = SpeechConfig.FromSubscription("<key>", "<region>");
speechConfig.SetProperty("SpeechServiceResponse_OutputFormatOption", "Simple");
var callback = new CustomPullAudioInputStreamCallback(audioStream);
var inputStream = AudioInputStream.CreatePullStream(callback);
var audioConfig = AudioConfig.FromStreamInput(inputStream);
recognizer = new SpeechRecognizer(speechConfig, audioConfig);
recognizer.Recognized += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
// Send transcribed text to the client
Clients.Caller.SendAsync("ReceiveText", e.Result.Text);
}
};
await recognizer.StartContinuousRecognitionAsync();
}
}
public class CustomPullAudioInputStreamCallback : PullAudioInputStreamCallback
{
private readonly MemoryStream _audioStream;
private BinaryReader _binaryReader;
public CustomPullAudioInputStreamCallback(MemoryStream audioStream)
{
_audioStream = audioStream;
_binaryReader = new BinaryReader(_audioStream);
}
public override int Read(byte[] dataBuffer, uint size)
{
return _binaryReader.Read(dataBuffer, 0, (int)size);
}
public override void Close()
{
_binaryReader.Dispose();
}
}
客户代码:
recordRTC = RecordRTC(stream, {
type: 'audio',
mimeType: 'audio/webm',
timeSlice: 1000, // Send audio chunks every 1000 ms (1 second)
ondataavailable: async (blob) => {
if (connection && connection.state === signalR.HubConnectionState.Connected) {
const base64Audio = await blobToBase64(blob);
await connection.invoke('SendAudio', base64Audio);
}
}
},
});
const blobToBase64 = (blob) => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onerror = reject;
reader.onload = () => {
resolve(reader.result.split(',')[1]);
};
reader.readAsDataURL(blob);
});
我的程序.cs
.AddSignalR(options =>
{
options.EnableDetailedErrors = true;
options.MaximumReceiveMessageSize = 1024 * 1024 * 10; // Allow messages up to 10 MB in size
});
app
.UseEndpoints(endpoints =>
{
endpoints.MapControllerRoute(
name: "default",
pattern: "{controller=Home}/{action=Index}/{id?}");
endpoints.MapHub<AudioRecordingHub>("/audiorecordinghub");
});
来自 chrome 的日志消息:
在此之后我得到一个错误:
现在的问题是每次调用 SendAudio 时,所有变量都会被重置,并且 SpeechRecognizer Recognized 事件永远不会触发。
private SpeechRecognizer recognizer;
private MemoryStream audioStream = new();
private bool isTranscribing = false;
我尝试过的:
关于我的问题,你还有什么想知道的吗?
感谢大家的帮助!