总而言之,当translator.recognize_once_async().get()运行时,它会阻止我的代码,但是,无论我对着麦克风说多少话,它都不起作用。
我已经针对实验室练习对此进行了测试,其本身不会返回任何错误,并且能够检测音频输入。但是当使用 tkinter 运行时,它会导致它拒绝接受任何语音输入并花费了整整 15 秒。
我尝试过使用线程和队列,认为这种方法可以让我在后台运行语音输入检测。然而还是不行。
我已经检查了我的麦克风输入访问权限,并根据 azure 提供的练习实验室对其进行了测试
以下是我用来测试的代码
from dotenv import load_dotenv
import os
import time as timer
import azure.cognitiveservices.speech as speech_sdk
import tkinter as tk
from tkinter import ttk
# note: dropdown boxes work fine in my code testing, but for code simplicity its preset to english input, chinese output
# initialize azure service
load_dotenv()
ai_key = os.getenv('SPEECH_KEY')
ai_region = os.getenv('SPEECH_REGION')
speech_config = speech_sdk.SpeechConfig(ai_key, ai_region)
# Configure translation
translation_config = speech_sdk.translation.SpeechTranslationConfig(ai_key, ai_region)
translation_config.add_target_language('en')
translation_config.add_target_language('zh-Hans')
translation_config.add_target_language('ta')
translation_config.add_target_language('yue')
translation_config.add_target_language('ms')
def reset_application():
# Reset the combobox values
input_lang_combobox.set('')
output_lang_combobox.set('')
# Reset the combobox options to all languages
input_lang_combobox['values'] = languages
output_lang_combobox['values'] = languages
# Reset the status label to its initial state
status_label.config(text="Status: Waiting for both input and output language to be selected")
# Re-enable the run button and disable it until new selections are made
run_button.config(state=tk.DISABLED)
def update_languages(*args):
# Get the currently selected languages
input_language = input_lang_combobox.get()
output_language = output_lang_combobox.get()
# Update output language options based on input selection
if input_language:
output_options = [lang for lang in languages if lang != input_language]
output_lang_combobox['values'] = output_options
if output_language == input_language:
output_lang_combobox.set('')
else:
output_lang_combobox['values'] = languages
# Update input language options based on output selection
if output_language:
input_options = [lang for lang in languages if lang != output_language]
input_lang_combobox['values'] = input_options
if input_language == output_language:
input_lang_combobox.set('')
else:
input_lang_combobox['values'] = languages
# Check if both languages are selected to update the status label and button state
if input_language and output_language:
style.configure('Custom.TButton',
foreground='orange', # Text color
background='white') # Background color
status_label.config(text=f"Status: Ready to translate from {input_language} to {output_language}\n "
f"Click button to begin translation service",style='Custom.TButton')
run_button.config(state=tk.NORMAL) # Enable the button
else:
status_label.config(text="Status: Waiting for both input and output language to be selected")
run_button.config(state=tk.DISABLED) # Keep the button disabled
def start_translation():
global translation_config
run_button.config(state=tk.DISABLED)
# configure input language
translation_config.speech_recognition_language = 'en-US'
status_label.config(text="Status: Please speak into the microphone now")
def translate(targetLanguage):
def recognition(targetLanguage):
global translation_config
audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
translator = speech_sdk.translation.TranslationRecognizer(translation_config, audio_config=audio_config)
result = translator.recognize_once_async().get() #UI is supposed to be unresponsive when detecing input, but here it doesnt get any input back, elapsing the whole 15s
print(result.text)
print(result.translations['zh-Hans'])
if result.reason == speech_sdk.ResultReason.TranslatedSpeech:
root.after(1000, translate_process, targetLanguage, result)
elif result.reason == speech_sdk.ResultReason.NoMatch:
new_window2 = tk.Toplevel(root)
new_window2.title("Error Pop-Up")
translated_text_label = tk.Label(new_window2, text=f"Status: An error occurred {result.no_match_details}, event 'matching speech with input' failed. Restarting Process\nClose the pop-up to proceed", font=("Arial", 12,))
translated_text_label.pack()
reset_application()
elif result.reason == speech_sdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
new_window2 = tk.Toplevel(root)
new_window2.title("Error Pop-Up")
translated_text_label = tk.Label(new_window2, text=f"Status: An error occurred {cancellation_details.reason}, event 'input speech' failed. Restarting Process\nClose the pop-up to proceed", font=("Arial", 12,))
translated_text_label.pack()
reset_application()
def translate_process(targetLanguage,result):
status_label.config(text="Status: Valid Input Speech Detected. Translating In Process")
try:
translation = result.translations[targetLanguage]
except Exception as e:
new_window2 = tk.Toplevel(root)
new_window2.title("Error Pop-Up")
translated_text_label = tk.Label(new_window2, text=f"Status: An error occurred: {e}, event 'Translation' failed. Restarting Process\nClose the pop-up to proceed", font=("Arial", 12,))
translated_text_label.pack()
reset_application()
root.after(1000, complete_translation, targetLanguage, translation)
def complete_translation(targetLanguage, translation):
global speech_config
status_label.config(text="Status: Translation Complete")
# Synthesize translation
voices = {
"en": "en-SG-WayneNeural",
"yue": "yue-CN-YunSongNeural",
"ta": "ta-SG-AnbuNeural",
"zh-Hans": "zh-CN-YunxiNeural",
"ms": "ms-MY-OsmanNeural"
}
speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
root.after(1000, synthesize_output, translation, speech_synthesizer)
def synthesize_output(translation, speech_synthesizer):
status_label.config(text="Status: Producing Output")
speak = speech_synthesizer.speak_text_async(translation).get()
if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
print(speak.reason)
reset_application()
# Start the translation after a short delay to allow the label to update
root.after(500, recognition, targetLanguage)
# Configure output language + begin translation
translate(targetLanguage='zh-Hans')
languages = ["English", "Chinese", "Cantonese", "Malay", "Tamil"]
# Assuming root is your Tkinter root window
root = tk.Tk()
root.geometry("1200x400") # Adjust the size as needed
# Add the Translation App label at the top
app_title_label = ttk.Label(root, text="Translation App", font=("LG Smart UI Bold", 56, "bold"), foreground='orange')
app_title_label.grid(row=0, column=0, padx=10, pady=5, columnspan=2) # Span across 2 columns if needed
# Dropdown Frame
dropdown_frame = ttk.Frame(root)
dropdown_frame.grid(row=1, column=0, padx=10, pady=10)
# Input Language ComboBox setup
input_lang_label = ttk.Label(dropdown_frame, text="Select Input Language", foreground="white",background="orange" ,font=("LG Smart UI Bold", 24, "bold"))
input_lang_label.pack(side=tk.LEFT)
input_lang_combobox = ttk.Combobox(dropdown_frame, values=languages)
input_lang_combobox.pack(side=tk.LEFT, padx=10)
input_lang_combobox.bind('<<ComboboxSelected>>', update_languages)
# Output Language ComboBox setup
output_lang_label = ttk.Label(dropdown_frame, text="Select Output Language", foreground="white",background="orange",font=("LG Smart UI Bold", 24, "bold"))
output_lang_label.pack(side=tk.LEFT)
output_lang_combobox = ttk.Combobox(dropdown_frame, values=languages)
output_lang_combobox.pack(side=tk.LEFT)
output_lang_combobox.bind('<<ComboboxSelected>>', update_languages)
# Button Frame
button_frame = ttk.Frame(root)
button_frame.grid(row=2, column=0, pady=30)
# Configure style for buttons
style = ttk.Style()
style.configure('Custom.TButton', foreground='orange', background='white', font=("LG Smart UI Bold", 12, "bold"), padding=10)
# Run and Reset Buttons
run_button = ttk.Button(button_frame, text="Begin Translation", style='Custom.TButton', command=start_translation, state=tk.DISABLED)
run_button.pack(side=tk.LEFT, padx=10)
reset_button = ttk.Button(button_frame, text="Reset", style='Custom.TButton', command=reset_application)
reset_button.pack(side=tk.LEFT, padx=10)
# Status Bar
status_bar = ttk.Frame(root, relief=tk.SUNKEN)
status_bar.grid(row=3, column=0, sticky=tk.W+tk.E, padx=20, pady=50)
status_label = ttk.Label(status_bar, text="Status: Waiting for both input and output language to be selected", anchor=tk.W)
status_label.pack(fill=tk.X)
root.mainloop()
我尝试了以下示例代码,使用 tkinter 在对着麦克风说话时将语音转换为文本。
代码:
import tkinter as tk
from tkinter import ttk
import azure.cognitiveservices.speech as speechsdk
class AudioToTextTranslator:
def __init__(self, root):
self.root = root
self.root.title("Speech to Text Translator")
dropdown_frame = ttk.Frame(root)
dropdown_frame.grid(row=0, column=0, padx=10, pady=5)
input_lang_label = ttk.Label(dropdown_frame, text="Select Input Language", foreground="white", background="orange", font=("LG Smart UI Bold", 24, "bold"))
input_lang_label.pack(side=tk.LEFT)
self.input_lang_combobox = ttk.Combobox(dropdown_frame, values=list(languages.keys()))
self.input_lang_combobox.pack(side=tk.LEFT, padx=10)
self.input_lang_combobox.bind('<<ComboboxSelected>>', self.update_languages)
output_lang_label = ttk.Label(dropdown_frame, text="Select Output Language", foreground="white", background="orange", font=("LG Smart UI Bold", 24, "bold"))
output_lang_label.pack(side=tk.LEFT)
self.output_lang_combobox = ttk.Combobox(dropdown_frame, values=list(languages.keys()))
self.output_lang_combobox.pack(side=tk.LEFT)
self.output_lang_combobox.bind('<<ComboboxSelected>>', self.update_languages)
button_frame = ttk.Frame(root)
button_frame.grid(row=1, column=0, pady=30)
style = ttk.Style()
style.configure('Custom.TButton', foreground='orange', background='white', font=("LG Smart UI Bold", 12, "bold"), padding=10)
self.run_button = ttk.Button(button_frame, text="Begin Translation", style='Custom.TButton', command=self.start_translation, state=tk.DISABLED)
self.run_button.pack(side=tk.LEFT, padx=10)
self.reset_button = ttk.Button(button_frame, text="Reset", style='Custom.TButton', command=self.reset_application)
self.reset_button.pack(side=tk.LEFT, padx=10)
def update_languages(self, event):
self.run_button.config(state=tk.NORMAL)
def start_translation(self):
output_language_code = languages[self.output_lang_combobox.get()]
speech_config = speechsdk.SpeechConfig(subscription="<speech_key>", region="<speech_region>")
speech_config.speech_recognition_language = output_language_code
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
print("Listening for audio...")
result = recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
print("Recognized: {}".format(result.text))
elif result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized")
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
def reset_application(self):
self.input_lang_combobox.current(0)
self.output_lang_combobox.current(0)
self.run_button.config(state=tk.DISABLED)
if __name__ == "__main__":
languages = {"English": "en-US", "Chinese (Simplified)": "zh-CN", "Tamil": "ta-IN", "Malay": "ms-MY"}
root = tk.Tk()
app = AudioToTextTranslator(root)
root.mainloop()
输出:
它运行成功,将语音翻译转换为文本输出,如下所示。
C:\Users\xxxxxxx\Documents\xxxxxxx>python kam.py
Listening for audio...
Recognized: Hi Kamali. How are you?
Listening for audio...
Recognized: 嗨。
Listening for audio...
Recognized: வெல்கம்.