我目前正在尝试编写一个Python脚本,使用连续识别来识别来自麦克风的语音。我使用了 Azure 语音服务中的示例代码(https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-蟒蛇)。但是,我的程序永远不会退出 while 循环。如何在不输入命令的情况下停止识别?是否可以通过口头命令(例如长时间停顿或说出关键字)来停止连续语音识别?我正在尝试构建一个语音机器人。我是否正确,为了与语音机器人交互,用户应该说话 < 15 s (using single-shot recognition) or interact with the device after every utterance (using continuous recognition) ? Thank you!
代码:
import time
from dotenv import dotenv_values
import azure.cognitiveservices.speech as speechsdk
def recognised_speech(evt):
print(f"You: {evt.result.text}")
def cont_speech_to_text():
done_talking=False
def stop_cb(evt):
print('You: {}'.format(evt))
nonlocal done_talking
done_talking = True
speech_recognizer.stop_continuous_recognition()
#speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(recognised_speech)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done_talking:
time.sleep(.5)
SPEECH_REGION = "westeurope"
keypath="..."
speechkey=dotenv_values(keypath+".key")
speech_config = speechsdk.SpeechConfig(subscription=speechkey['KEY'], region=SPEECH_REGION)
speech_config.speech_recognition_language="en-US"
speech_config.speech_synthesis_voice_name='en-US-AvaMultilingualNeural'
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
cont_speech_to_text()
'''
是的,您可以退出循环或停止语音命令或长时间暂停的识别。
def cont_speech_to_text():
done_talking=False
def recognised_speech(evt):
print(f"You: {evt.result.text}")
if "stop listening" in evt.result.text.lower():
speech_recognizer.stop_continuous_recognition()
def stop_cb(evt):
print('CLOSING {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
nonlocal done_talking
done_talking = True
#speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(recognised_speech)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done_talking:
time.sleep(.5)
SPEECH_REGION = "eastus"
speech_config = speechsdk.SpeechConfig(subscription="xxxxxxx", region=SPEECH_REGION)
speech_config.speech_recognition_language="en-US"
speech_config.speech_synthesis_voice_name='en-US-AvaMultilingualNeural'
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
输出:
SESSION STARTED: SessionEventArgs(session_id=ee53dddc86fc4142ab6a3dddf03c5b7e)
You: Stop listening.
SESSION STOPPED SessionEventArgs(session_id=ee53dddc86fc4142ab6a3dddf03c5b7e)
CLOSING SessionEventArgs(session_id=ee53dddc86fc4142ab6a3dddf03c5b7e)
在这里,我使用了停止监听作为语音命令来停止连续语音识别,但你可以使用任何你想要的。
代码
def cont_speech_to_text():
done_talking=False
last_recognized_time = time.time()
pause_threshold = 10
def recognised_speech(evt):
print(f"You: {evt.result.text}")
nonlocal last_recognized_time
last_recognized_time = time.time()
# if "stop listening" in evt.result.text.lower():
# speech_recognizer.stop_continuous_recognition()
def stop_cb(evt):
print('CLOSING {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
nonlocal done_talking
done_talking = True
#speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(recognised_speech)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done_talking:
if time.time() - last_recognized_time > pause_threshold:
print("Stopping due to long pause...")
speech_recognizer.stop_continuous_recognition()
time.sleep(.5)
SPEECH_REGION = "eastus"
speech_config = speechsdk.SpeechConfig(subscription="xxcxxxx", region=SPEECH_REGION)
speech_config.speech_recognition_language="en-US"
speech_config.speech_synthesis_voice_name='en-US-AvaMultilingualNeural'
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
输出:
SESSION STARTED: SessionEventArgs(session_id=ef6dc34bd7bd4e0b81ae9a6d9f4363c1)
You: Hi.
Stopping due to long pause...
SESSION STOPPED SessionEventArgs(session_id=ef6dc34bd7bd4e0b81ae9a6d9f4363c1)
CLOSING SessionEventArgs(session_id=ef6dc34bd7bd4e0b81ae9a6d9f4363c1)
在这里,我计算每次演讲的时间,并检查 while 循环中的时间差,暂停阈值为 10 秒。
或者您可以同时使用语音命令和暂停时间来停止连续语音识别,上面给出的代码只需取消注释recognised_speech函数中的if条件即可。