有没有办法使用某种(离线)语音识别来计算音频文件中的单词数?实施这样的事情的最佳方法是什么? 还有什么方法可以让它连续工作,而不必每次有人在说话时暂停时重新启动它?
Android 带有内置的语音转文本功能,您可以通过它 可以为您的应用程序提供语音输入。
这里有一个小示例代码,介绍如何使用内置 API 来识别语音并将其轻松转换为文本。之后,您可以将文本提取为
String
并计算代码中的单词数,就像您通常计算常规 String
中的单词一样,例如使用 .split(" ")
或其他已知方法。
/**
* Showing google speech input dialog
* */
private void promptSpeechInput() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault());
intent.putExtra(RecognizerIntent.EXTRA_PROMPT,
getString(R.string.speech_prompt));
try {
startActivityForResult(intent, REQ_CODE_SPEECH_INPUT);
} catch (ActivityNotFoundException a) {
Toast.makeText(getApplicationContext(),
getString(R.string.speech_not_supported),
Toast.LENGTH_SHORT).show();
}
}
/**
* Receiving speech input
* */
@Override
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
super.onActivityResult(requestCode, resultCode, data);
switch (requestCode) {
case REQ_CODE_SPEECH_INPUT: {
if (resultCode == RESULT_OK && null != data) {
ArrayList<String> result = data
.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
txtSpeechInput.setText(result.get(0));
}
break;
}
}
}
(完整的原始教程在这里:https://www.androidhive.info/2014/07/android-speech-to-text-tutorial/)
以下是我的问题的答案,供可能需要的人使用:
public class MainActivity extends AppCompatActivity implements
RecognitionListener {
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private TextView returnedText;
private TextView returnedError;
private ProgressBar progressBar;
private TextView brojtextview;
private SpeechRecognizer speech = null;
private Intent recognizerIntent;
private String LOG_TAG = "VoiceRecognitionActivity";
private int ukupanbroj;
private void resetSpeechRecognizer() {
if(speech != null)
speech.destroy();
speech = SpeechRecognizer.createSpeechRecognizer(this);
Log.i(LOG_TAG, "isRecognitionAvailable: " + SpeechRecognizer.isRecognitionAvailable(this));
if(SpeechRecognizer.isRecognitionAvailable(this))
speech.setRecognitionListener(this);
else
finish();
}
private void setRecogniserIntent() {
recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
// recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE,true);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,
"sr-RS");
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// UI initialisation
returnedText = findViewById(R.id.textView1);
returnedError = findViewById(R.id.errorView1);
progressBar = findViewById(R.id.progressBar1);
brojtextview=findViewById(R.id.brojtextview);
progressBar.setVisibility(View.INVISIBLE);
// start speech recogniser
resetSpeechRecognizer();
// start progress bar
progressBar.setVisibility(View.VISIBLE);
progressBar.setIndeterminate(true);
// check for permission
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
return;
}
setRecogniserIntent();
speech.startListening(recognizerIntent);
}
@Override
public void onRequestPermissionsResult(int requestCode,
@NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
speech.startListening(recognizerIntent);
} else {
Toast.makeText(MainActivity.this, "Permission Denied!", Toast
.LENGTH_SHORT).show();
finish();
}
}
}
@Override
public void onResume() {
Log.i(LOG_TAG, "resume");
super.onResume();
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
protected void onPause() {
Log.i(LOG_TAG, "pause");
super.onPause();
speech.stopListening();
}
@Override
protected void onStop() {
Log.i(LOG_TAG, "stop");
super.onStop();
if (speech != null) {
speech.destroy();
}
}
@Override
public void onBeginningOfSpeech() {
Log.i(LOG_TAG, "onBeginningOfSpeech");
progressBar.setIndeterminate(false);
progressBar.setMax(10);
}
@Override
public void onBufferReceived(byte[] buffer) {
Log.i(LOG_TAG, "onBufferReceived: " + buffer);
}
@Override
public void onEndOfSpeech() {
Log.i(LOG_TAG, "onEndOfSpeech");
progressBar.setIndeterminate(true);
speech.stopListening();
}
@Override
public void onResults(Bundle results) {
Log.i(LOG_TAG, "onResults");
ArrayList<String> matches = results
.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String text = matches.get(0);
returnedText.setText(text);
String trimmed = text.trim();
int words = trimmed.isEmpty() ? 0 : trimmed.split("\\s+").length;
ukupanbroj+=words;
brojtextview.setText( "" + ukupanbroj);
speech.startListening(recognizerIntent);
}
@Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
Log.i(LOG_TAG, "FAILED " + errorMessage);
returnedError.setText(errorMessage);
// rest voice recogniser
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
public void onEvent(int arg0, Bundle arg1) {
Log.i(LOG_TAG, "onEvent");
}
@Override
public void onPartialResults(Bundle arg0) {
Log.i(LOG_TAG, "onPartialResults");
}
@Override
public void onReadyForSpeech(Bundle arg0) {
Log.i(LOG_TAG, "onReadyForSpeech");
}
@Override
public void onRmsChanged(float rmsdB) {
//Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
progressBar.setProgress((int) rmsdB);
}
public String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio recording error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client side error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network timeout";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "error from server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No speech input";
break;
default:
message = "Didn't understand, please try again.";
break;
}
return message;
}
}
不!还没有用于字数统计的应用程序。