如何将流从Naudio传递到MS SpeechRecognization?

问题描述 投票:0回答:1

我正在尝试将SetInputToAudioStream方法用于语音引擎。

但是它不起作用。我也搜索了几篇文章,并尝试了所有可能的方法。但它仍然不起作用。

https://stackoverflow.com/a/6203533/1336662

https://stackoverflow.com/a/6203533/1336662

为了使SpeechRecognitionEngine正常工作,我必须使用Sean's response中描述的SpeechStreamer类。

这是我的代码,如果有人可以帮助我,那太好了。

using NAudio.Wave;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Speech.Recognition;
using System.Speech.AudioFormat;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Threading;

namespace WpfAppNAudio
{
    /// <summary>
    /// Interaction logic for MainWindow.xaml
    /// </summary>
    public partial class MainWindow : Window
    {
        public MainWindow()
        {
            InitializeComponent();
            console.AppendText("Click to start recording");
        }
        public WaveIn waveSource = null;
        public WaveFileWriter waveFile = null;
        private SpeechRecognitionEngine _recognizer = null;
        //Stream a = null;
        SpeechStreamer stream = null;
        private bool _recognizing;

        void RecognizeSpeechAndWriteToConsole()
        {
            _recognizer = new SpeechRecognitionEngine();
            try
            {
                //_recognizer.
                // Create and load a grammar.  
                Grammar dictation = new DictationGrammar();
                dictation.Name = "Dictation Grammar";

                _recognizer.LoadGrammar(dictation);


                _recognizer.SpeechRecognized += _recognizer_SpeechRecognized; // if speech is recognized, call the specified method
                _recognizer.SpeechRecognitionRejected += _recognizer_SpeechRecognitionRejected; // if recognized speech is rejected, call the specified method
                _recognizer.SpeechDetected += _recognizer_SpeechDetected;
                _recognizer.RecognizeCompleted += _recognizer_RecognizeCompleted;


            }
            catch (Exception)
            {

            }
        }

        private void _recognizer_RecognizeCompleted(object sender, RecognizeCompletedEventArgs e)
        {
        }

        private void _recognizer_SpeechDetected(object sender, SpeechDetectedEventArgs e)
        {
        }

        private void _recognizer_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            console.AppendText("speech rejected");
        }

        private void _recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            console.AppendText("speech recognized" + e.Result.Text);
        }

        private void StartBtn_Click()
        {

            waveSource = new WaveIn();
            waveSource.WaveFormat = new WaveFormat(22050, 8, 1);

            waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable);
            waveSource.RecordingStopped += new EventHandler<StoppedEventArgs>(waveSource_RecordingStopped);

            waveFile = new WaveFileWriter(@"C:\Temp\Test0001.wav", waveSource.WaveFormat);

            console.AppendText("Starting recording");
            RecognizeSpeechAndWriteToConsole();
            waveSource.StartRecording();

        }

        void StopBtn_Click(object sender, EventArgs e)
        {
            waveSource.StopRecording();
        }

        void waveSource_DataAvailable(object sender, WaveInEventArgs e)
        {
            if (waveFile != null)
            {
                stream = new SpeechStreamer(e.Buffer.Length);               
                stream.Write(e.Buffer, 0, e.BytesRecorded);
                waveFile.Write(e.Buffer, 0, e.BytesRecorded);
                waveFile.Flush();
                if (!_recognizing)
                {
                    _recognizing = true;
                    _recognizer.SetInputToAudioStream(stream, new System.Speech.AudioFormat.SpeechAudioFormatInfo(22050, System.Speech.AudioFormat.AudioBitsPerSample.Eight, System.Speech.AudioFormat.AudioChannel.Mono));
                    var s = _recognizer.RecognizerInfo.SupportedAudioFormats;
                    _recognizer.RecognizeAsync(RecognizeMode.Multiple);
                }
            }
        }

        void waveSource_RecordingStopped(object sender, StoppedEventArgs e)
        {
            if (waveSource != null)
            {
                waveSource.Dispose();
                waveSource = null;
            }

            if (waveFile != null)
            {
                waveFile.Dispose();
                waveFile = null;
            }

        }

        private void Button_Click(object sender, RoutedEventArgs e)
        {
            StartBtn_Click();
        }

        private void Button_Click_1(object sender, RoutedEventArgs e)
        {
            waveSource.StopRecording();
        }
    }

    class SpeechStreamer : Stream
    {
        private AutoResetEvent _writeEvent;
        private List<byte> _buffer;
        private int _buffersize;
        private int _readposition;
        private int _writeposition;
        private bool _reset;

        public SpeechStreamer(int bufferSize)
        {
            _writeEvent = new AutoResetEvent(false);
            _buffersize = bufferSize;
            _buffer = new List<byte>(_buffersize);
            for (int i = 0; i < _buffersize; i++)
                _buffer.Add(new byte());
            _readposition = 0;
            _writeposition = 0;
        }

        public override bool CanRead
        {
            get { return true; }
        }

        public override bool CanSeek
        {
            get { return false; }
        }

        public override bool CanWrite
        {
            get { return true; }
        }

        public override long Length
        {
            get { return -1L; }
        }

        public override long Position
        {
            get { return 0L; }
            set { }
        }

        public override long Seek(long offset, SeekOrigin origin)
        {
            return 0L;
        }

        public override void SetLength(long value)
        {

        }

        public override int Read(byte[] buffer, int offset, int count)
        {
            int i = 0;
            while (i < count && _writeEvent != null)
            {
                if (!_reset && _readposition >= _writeposition)
                {
                    _writeEvent.WaitOne(100, true);
                    continue;
                }
                buffer[i] = _buffer[_readposition + offset];
                _readposition++;
                if (_readposition == _buffersize)
                {
                    _readposition = 0;
                    _reset = false;
                }
                i++;
            }

            return count;
        }

        public override void Write(byte[] buffer, int offset, int count)
        {
            for (int i = offset; i < offset + count; i++)
            {
                _buffer[_writeposition] = buffer[i];
                _writeposition++;
                if (_writeposition == _buffersize)
                {
                    _writeposition = 0;
                    _reset = true;
                }
            }
            _writeEvent.Set();

        }

        public override void Close()
        {
            _writeEvent.Close();
            _writeEvent = null;
            base.Close();
        }

        public override void Flush()
        {

        }
    }
}
c# .net speech-recognition naudio sapi
1个回答
0
投票
            stream = new SpeechStreamer(e.Buffer.Length);               
            stream.Write(e.Buffer, 0, e.BytesRecorded);

这真的是唯一写入流的地方,应用程序中没有其他地方吗?如果它在初始化时只写一些字节,它将不会继续使音频进入识别器。

© www.soinside.com 2019 - 2024. All rights reserved.