我的实验代码如下(
compileSdk 34
,minSdk 33
),只要单独考虑文本到语音和语音识别:,效果很好
package com.example.speechandspeak;
import android.Manifest;
import android.content.Intent;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.speech.tts.TextToSpeech;
import android.util.Log;
import android.widget.Toast;
import androidx.appcompat.app.AppCompatActivity;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;
import java.util.ArrayList;
import java.util.Locale;
public class MainActivity extends AppCompatActivity implements RecognitionListener {
private TextToSpeech tts;
private SpeechRecognizer speechRecognizer;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// Initialize TextToSpeech
tts = new TextToSpeech(this, new TextToSpeech.OnInitListener() {
@Override
public void onInit(int status) {
if (status == TextToSpeech.SUCCESS) {
int ttsLang = tts.setLanguage(Locale.US);
if (ttsLang == TextToSpeech.LANG_MISSING_DATA || ttsLang == TextToSpeech.LANG_NOT_SUPPORTED) {
Toast.makeText(MainActivity.this, "Language is not supported!", Toast.LENGTH_SHORT).show();
} else {
speakText();
}
} else {
Toast.makeText(MainActivity.this, "TTS Initialization failed!", Toast.LENGTH_SHORT).show();
}
}
});
// Check if microphone permission is granted
if (ContextCompat.checkSelfPermission(this, android.Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
// Permission is not granted, request it
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, 1);
}
// Initialize SpeechRecognizer
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
speechRecognizer.setRecognitionListener(this);
}
private void speakText() {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
String message = getString(R.string.tts_message);
tts.speak(message, TextToSpeech.QUEUE_FLUSH, null, null);
// After speaking, initiate speech recognition
Intent speechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
speechRecognizer.startListening(speechRecognizerIntent);
}
@Override
public void onReadyForSpeech(Bundle params) {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onBeginningOfSpeech() {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onRmsChanged(float rmsdB) {
//Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onBufferReceived(byte[] bytes) {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onEndOfSpeech() {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onError(int i) {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName() + "(" + i + ")");
}
@Override
public void onEvent(int eventType, Bundle params) {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onPartialResults(Bundle partialResults) {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
}
@Override
public void onResults(Bundle results) {
Log.d(this.getLocalClassName(), "Entered: " + Thread.currentThread().getStackTrace()[2].getMethodName());
ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
if (matches != null && !matches.isEmpty()) {
Log.v(this.getLocalClassName(), "Matches: " + matches);
String text = matches.get(0).toLowerCase(); // Convert to lowercase for case-insensitive comparison
if (text.equals("stop")) {
if (tts.isSpeaking()) {
tts.stop();
speechRecognizer.stopListening(); // Stop listening for further commands
Toast.makeText(MainActivity.this, "Stopped!", Toast.LENGTH_SHORT).show();
}
}
}
}
@Override
protected void onDestroy() {
super.onDestroy();
if (tts != null) {
tts.shutdown();
}
}
}
即例句(“This is a demo of Speech Recognition and Text-to-Speech. Say stop! to quit.”)正在清晰播放,语音识别器已启动并正常工作,如图所示通过 Logcat 调试消息:
12:40:09.125 MainActivity D Entered: speakText
12:40:09.297 D Entered: onReadyForSpeech
12:40:09.761 D Entered: onBeginningOfSpeech
12:40:13.365 D Entered: onEndOfSpeech
12:40:13.394 D Entered: onResults
12:40:13.396 V Matches: [demonstration of speech recognition and text to speech say stop to quit]
12:40:14.524 ProfileInstaller D Installing profile for com.example.speechandspeak
但是,由于 voiceRecognizer 实际上在播放 TTS 的同时开始收听(故意,这是预期的测试用例),因此 onResults()
仅识别 TTS 输出(“语音识别和文本到语音的演示,说停止退出” ),忽略我在那段时间所说的一切。
我希望它专注于我的声音并忽略 TTS 输出。
我该如何实现这一目标?我知道 Android 13+ 智能手机能够做到这一点,因为当我说
“嘿 Google” 当 TTS 大声播放时,我的语音正在被识别,TTS 输出暂停,Google Assistant 开始收听。 .
它是如何做到这一点的?
SpeechRecognizer
API 没有办法简单地打开它。您需要自己捕获输入音频,然后通过
SpeechRecognizer
将其传递到 EXTRA_AUDIO_SOURCE
。
输入音频流需要以支持 AcousticEchoCanceler
效果的方式进行配置。https://stackoverflow.com/a/38021196 有更多详细信息。 既然您询问了 Google Assistant:此类应用程序可能不使用
SpeechRecognizer
API,因为它们试图检测特定短语(更专业的模型会更有效)。