所以我想创建一个将语音转换为文本以及文本转换为语音的应用程序。我的文本转语音功能正常工作,但我无法弄清楚语音转文本功能。
我的main.py代码-
import pyaudio
import speech_recognition as sr
import pyttsx3
from playsound import playsound
import kivy
from kivy.app import App
from kivy.uix.gridlayout import GridLayout
from kivy.uix.screenmanager import ScreenManager,Screen
from kivy.properties import ObjectProperty
from kivy.uix.floatlayout import FloatLayout
from kivy.uix.widget import Widget
from kivy.graphics import Rectangle
from kivy.graphics import Color
from kivy.uix.image import Image
from kivy.lang import Builder
from kivy.core.text import LabelBase
class MainWindow(Screen):
LabelBase.register(name='Roboto-Medium',
fn_regular='Roboto-Medium.ttf')
def exit(self):
quit()
class speechtotext(Screen):
r = sr.Recognizer()
LabelBase.register(name='Roboto-Medium',
fn_regular='Roboto-Medium.ttf')
def exit(self):
quit()
def change(self):
self.ids.final_text.text = "Recognising"
with sr.Microphone as self.source:
self.audio_data = self.r.listen(self.source)
self.data = self.r.recognize_google(self.audio_data)
def speechtotextfas(self):
self.ids.final_text.text = self.data
kv = Builder.load_file("audibuddy.kv")
class Audibuddy(App):
def build(self):
return kv
if __name__ == "__main__":
Audibuddy().run()
和我的 .kv 文件 -
WindowManager:
MainWindow:
speechtotext:
texttospeech:
<Mainwindow>:
name:"mainwindow"
BoxLayout:
Button:
font_size:25
colour:1,1,1,1
BoxLayout:
canvas:
Color:
rgba:0.16862745098,0.16862745098,0.16862745098,1
Rectangle:
pos:self.pos
size:self.size
BoxLayout:
pos_hint:{'top':1.456}
size_hint:2,0.57
canvas:
Color:
rgba:0.09803921568,0.13725490196,0.17647058823,1
Rectangle:
pos:self.pos
size:self.size
FloatLayout:
BoxLayout:
orientation:'horizontal'
padding:5,0,0,0
Label:
text:"audibuddy"
font_name:"Roboto-Medium"
font_size:35
pos_hint:{"top":1.45,"left":1}
Widget:
Widget:
FloatLayout:
BoxLayout:
padding:50,150,50,50
Button:
id:speechtotext_btn
text:"Speech to Text"
size_hint:0.5,0.15
pos_hint:{"top":1,"centre_x":0.5}
on_release:
app.root.current = "second_window"
root.manager.transition.direction = "left"
BoxLayout:
padding:50,150,50,50
Button:
id:texttospeech_btn
text:"Text to Speech"
size_hint:0.5,0.15
pos_hint:{"top":0.75,"centre_x":0.5}
on_release:
app.root.current = "third_window"
root.manager.transition.direction = "left"
BoxLayout:
padding:100,150,100,50
Button:
id:exitapp_btn
text:"Exit"
size_hint:0.1,0.15
pos_hint:{"top":0.5,"centre_x":0.5}
on_release:
app.root.exit()
Button:
background_normal:"homw button.webp"
background_down:"homw button.webp"
size_hint:0.2,0.1
pos_hint:{"top":1,"left":1}
on_release:
app.root.current = "mainwindow"
root.manager.transition.direction = "right"
<speechtotext>:
name:"second_window"
BoxLayout:
Button:
font_size:25
colour:1,1,1,1
BoxLayout:
canvas:
Color:
rgba:0.16862745098,0.16862745098,0.16862745098,1
Rectangle:
pos:self.pos
size:self.size
BoxLayout:
pos_hint:{'top':1.456}
size_hint:2,0.57
canvas:
Color:
rgba:0.09803921568,0.13725490196,0.17647058823,1
Rectangle:
pos:self.pos
size:self.size
FloatLayout:
BoxLayout:
orientation:'horizontal'
padding:5,0,0,0
Label:
text:"audibuddy"
font_name:"Roboto-Medium"
font_size:35
pos_hint:{"top":1.45,"left":1}
Widget:
Widget:
Button:
background_normal:"homw button.webp"
background_down:"homw button.webp"
size_hint:0.2,0.1
pos_hint:{"top":1,"left":1}
on_release:
app.root.current = "mainwindow"
root.manager.transition.direction = "right"
Label:
id:final_text
text:"Say something"
font_name : "Roboto-medium"
Button:
id:convert_btn
pos_hint:{"center_x":0.5,"top":0.3}
size_hint:0.3,0.2
background_normal:"mic.png"
background_down:"mic_down.png"
border:5,5,5,5
on_press:
root.change()
on_release:
root.speechtotextfas()
它给了我这个错误-
with sr.Microphone as self.source:
AttributeError: __enter__
任何人都可以解释为什么会发生这种情况。我对 kivy 很陌生,我自己无法弄清楚。 预先感谢:)
在
SpeechRecognition
的代码示例中,您的行似乎是:
with sr.Microphone as self.source:
实际上应该是:
with sr.Microphone() as self.source:
我正在构建一个使用语音输入的绘图程序,所以我一直在使用 kivy 和语音识别。这是我的代码,它被分解为 py 文件。我希望这有帮助。 我在试图弄清楚如何使语音到文本不阻塞时发现了这一点。因此,如果您或其他人弄清楚了这一点,那么了解一下就好了。我刚刚开始寻找,也许我会在接下来的几分钟内找到解决方案。 (我希望)我想我可以使用多个线程,但 kivy 或语音识别中可能有一些钩子可以让我更轻松地做到这一点。
#Input_Loop.py
import sys
# https://www.techwithtim.net/tutorials/kivy-tutorial/simple-drawing-app/
from kivy.config import Config # disable multy touch simulation
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
import kivy
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.label import Label
from kivy.graphics import Rectangle
from kivy.graphics import Color
from kivy.graphics import Line
from kivy.graphics import Ellipse
from kivy.core.window import Window
import Free_Google_ASR
import speech_recognition as sr
print("User Current Version:-", sys.version)
class Touch(Widget):
#point_log = ()
def __init__(self, **kwargs):
super(Touch, self).__init__(**kwargs)
self.asr = Free_Google_ASR.Free_Google_ASR()
#with self.canvas:
#Color(1, 0, 0, 1, mode='rgba')
#self.rect = Rectangle(pos=(0, 0), size=(50, 50))
self.mode = 'draw'
self._keyboard = Window.request_keyboard(
self._keyboard_closed, self, 'text')
if self._keyboard.widget:
# If it exists, this widget is a VKeyboard object which you can use
# to change the keyboard layout.
pass
self._keyboard.bind(on_key_down=self._on_keyboard_down)
self._keyboard.bind(on_key_up=self._on_keyboard_up)
self.lines = []
def on_touch_down(self, touch):
self.point_log = ()
print("Mouse down", touch)
def on_touch_move(self, touch):
with self.canvas:
self.point_log += touch.pos
if len(self.point_log) >= 4:
Color(0, 1, 0, 1, mode='rgba')
self.line = Line(points=self.point_log)
print("Mouse move", touch)
def on_touch_up(self, touch):
print("Mouse up", touch)
def _keyboard_closed(self):
print('My keyboard have been closed!')
self._keyboard.unbind(on_key_down=self._on_keyboard_down)
self._keyboard.unbind(on_key_up=self._on_keyboard_up)
self._keyboard = None
def _on_keyboard_down(self, keyboard, keycode, text, modifiers):
print('The key', keycode, 'have been pressed')
if text == 'f':
r = self.asr.get_command()
print(r)
print(' - text is %r' % text)
print(' - modifiers are %r' % modifiers)
self.mode = keycode
# Keycode is composed of an integer + a string
# If we hit escape, release the keyboard
if keycode[1] == 'escape':
keyboard.release()
# Return True to accept the key. Otherwise, it will be used by
# the system.
return True
def _on_keyboard_up(self, key, scancode, *args):
print("key up", scancode[1])
#print('The key', key, 'have been released')
#print(' - scancode is %r' % scancode)
#print(' - codepoint are %r' % codepoint)
self.mode = 'draw'
return True
class MyApp_2(App):
def build(self):
return Touch()
class MyApp(App):
def build(self):
return Label(text="Hello World")
if __name__ == '__main__':
MyApp_2().run()
#Free_Google_ASR.py
import speech_recognition as sr
class Free_Google_ASR:
def __init__(self):
self.recognizer = sr.Recognizer()
self.microphone = sr.Microphone()
# check that recognizer and microphone arguments are appropriate type
if not isinstance(self.recognizer, sr.Recognizer):
raise TypeError("`recognizer` must be `Recognizer` instance")
if not isinstance(self.microphone, sr.Microphone):
raise TypeError("`microphone` must be `Microphone` instance")
# set up the response object
self.response = {
"success": True,
"error": None,
"transcription": None
}
with self.microphone as source:
self.recognizer.adjust_for_ambient_noise(source)
print("ready!")
def get_command(self):
# adjust the recognizer sensitivity to ambient noise and record audio
# from the microphone
with self.microphone as source:
print("started listening")
self.audio = self.recognizer.listen(source)
print("got something")
# try recognizing the speech in the recording
# if a RequestError or UnknownValueError exception is caught,
# update the response object accordingly
try:
self.response["transcription"] = self.recognizer.recognize_google(self.audio)
except sr.RequestError:
# API was unreachable or unresponsive
self.response["success"] = False
self.response["error"] = "API unavailable"
except sr.UnknownValueError:
# speech was unintelligible
self.response["error"] = "Unable to recognize speech"
return self.response
def test_loop(self):
while(True):
i = 0
while(True):
guess = self.get_command()
if guess["transcription"]:
break
if not guess["success"]:
break
print(i, ": I didn't catch that. What did you say?\n")
i+=1
# if there was an error, stop the game
if guess["error"]:
print("ERROR: {}".format(guess["error"]))
break
# show the user the transcription
print("You said: {}".format(guess["transcription"]))
是否有语音识别的实现(我只需要非常基本的功能来识别何时静音 X 秒) Kivy 可以在 iOS 和 Android 设备上运行吗?