您好,我正在尝试使用 expo、microsoft-cognitiveservices-speech-sdk 和本机反应来实现语音到文本演示应用程序。我想知道我是否使用 expo Audio 库将 .wav 格式的音频文件存储在设备中。我可以通过它并使用 microsoft-cognitiveservices-speech-sdk 获取文本输出而不使用单独的后端服务吗?
使用下面的代码,当尝试获取audioConfig值时,我收到切片错误。
LOG [类型错误:无法读取未定义的属性“切片”]
我还想知道是否可以使用与 expo 和 React Native 兼容的替代语音文本包。
import { StyleSheet, Text, TextInput, View, ScrollView, FlatList, KeyboardAvoidingView, Platform, TouchableOpacity, Button } from 'react-native'
import { Audio } from 'expo-av';
import 'react-native-get-random-values';
import { v4 as uuidv4 } from 'uuid';
import { SafeAreaView } from 'react-native-safe-area-context';
import { AudioConfig, SpeechConfig, SpeechRecognizer, ResultReason, CancellationDetails } from 'microsoft-cognitiveservices-speech-sdk'
import React, { useEffect, useState } from 'react'
const speechConfig = SpeechConfig.fromSubscription(process.env.AZURE_SPEECH_KEY, "https://eastus.api.cognitive.microsoft.com/")
speechConfig.speechRecognitionLanguage = "en-US";
const Page = () => {
const [recording, setRecording] = useState();
const [permissionResponse, requestPermission] = Audio.usePermissions();
async function startRecording() {
try {
if (permissionResponse.status !== 'granted') {
console.log('Requesting permission..');
await requestPermission();
}
await Audio.setAudioModeAsync({
allowsRecordingIOS: true,
playsInSilentModeIOS: true,
});
console.log('Starting recording..');
const recordingOptions = {
// android settings
android: {
extension: '.wav',
outputFormat: Audio.RECORDING_OPTION_ANDROID_OUTPUT_FORMAT_DEFAULT,
audioEncoder: Audio.RECORDING_OPTION_ANDROID_AUDIO_ENCODER_DEFAULT,
sampleRate: 44100,
numberOfChannels: 2,
bitRate: 128000,
},
// ios settings
ios: {
extension: '.wav',
audioQuality: Audio.RECORDING_OPTION_IOS_AUDIO_QUALITY_HIGH,
sampleRate: 44100,
numberOfChannels: 2,
bitRate: 128000,
linearPCMBitDepth: 16,
linearPCMIsBigEndian: false,
linearPCMIsFloat: false,
},
};
console.log("Start Recordinf")
const { recording } = await Audio.Recording.createAsync(recordingOptions);
setRecording(recording);
console.log('Recording started');
} catch (err) {
console.error('Failed to start recording', err);
}
}
async function getTranscription() {
// let audioData = AudioConfig.fromAudioFileOutput(recording.getURI())
// const audioData = await FileSystem.readAsStringAsync(recording.getURI(), { encoding: FileSystem.EncodingType.Base64 });
// console.log("Audio Data")
// console.log(audioData)
const audioConfig = AudioConfig.fromWavFileInput(recording.getURI());
console.log("Audio Config")
console.log(audioConfig)
const speechConfig = SpeechConfig.fromSubscription('0c7e63fcc41d4bbf894db4d642ad9003', 'https://eastus.api.cognitive.microsoft.com/');
console.log("Speech Config")
console.log(speechConfig)
const recognizer = new SpeechRecognizer(speechConfig, audioConfig);
// recognizer.recognizeOnceAsync(result => {
// switch (result.reason) {
// case ResultReason.RecognizedSpeech:
// console.log(`RECOGNIZED: Text=${result.text}`);
// break;
// case ResultReason.NoMatch:
// console.log("NOMATCH: Speech could not be recognized.");
// break;
// case ResultReason.Canceled:
// const cancellation = CancellationDetails.fromResult(result);
// console.log(`CANCELED: Reason=${cancellation.reason}`);
// if (cancellation.reason == CancellationReason.Error) {
// console.log(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
// console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
// console.log("CANCELED: Did you set the speech resource key and region values?");
// }
// break;
// }
// speechRecognizer.close();
// });
}
async function stopRecording() {
console.log('Stopping recording..');
setRecording(undefined);
await recording.stopAndUnloadAsync();
await Audio.setAudioModeAsync(
{
allowsRecordingIOS: false,
}
);
const uri = recording.getURI();
console.log('Recording stopped and stored at', uri);
try {
await getTranscription()
} catch (error) {
console.log(error)
}
}
const [messages, setMessages] = useState([
'Lorem ipsum dolor sit amet consectetur adipisicing elit. Ipsum optio incidunt fuga fugiat. Accusantium quidem, commodi amet consequuntur quaerat, quo tempore provident ea aliquid ullam illo numquam magnam laudantium in?',])
return (
<SafeAreaView style={{ flex: 1 }}>
<View style={{ flex: 0 }}>
<Text>Header</Text>
</View>
<View style={{ flex: 1 }}>
<FlatList
data={messages}
renderItem={({ item }) => <View className='p-3'>
<Text>{item}</Text>
</View>}
keyExtractor={(item, index) => index.toString()}
/>
</View>
<View style={{ flex: 0 }}>
<Text>Hello</Text>
<View className='flex-row'>
<TextInput className='flex-1' style={{ borderWidth: 1 }} />
<Button
title={recording ? 'Stop Recording' : 'Start Recording'}
onPress={recording ? stopRecording : startRecording}
/>
<TouchableOpacity>
<Text>Send</Text>
</TouchableOpacity>
</View>
</View>
</SafeAreaView>
)
}
export default Page```
[类型错误:无法读取未定义的属性“切片”]
recording
创建 audioConfig
时,AudioConfig.fromWavFileInput(recording.getURI())
对象未完全初始化或未定义。在这里,我在 Expo React Native 应用程序中实现了 Azure 语音转文本服务。请检查下面的代码。
代码:
import React, { useState } from 'react';
import * as FileSystem from 'expo-file-system';
import { StyleSheet, Text, View, Button } from 'react-native';
import { Audio } from 'expo-av';
import { SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason, CancellationReason, CancellationDetails } from 'microsoft-cognitiveservices-speech-sdk';
const speechConfig = SpeechConfig.fromSubscription("key", "locatio/region");
speechConfig.speechRecognitionLanguage = "en-US";
export default function App() {
const [recording, setRecording] = useState(null);
const [transcription, setTranscription] = useState('');
async function startRecording() {
try {
const { status } = await Audio.requestPermissionsAsync();
if (status !== 'granted') {
console.error('Permission to access audio was denied');
return;
}
const recording = new Audio.Recording();
await recording.prepareToRecordAsync(Audio.RECORDING_OPTIONS_PRESET_HIGH_QUALITY);
await recording.startAsync();
setRecording(recording);
console.log('Recording started');
} catch (error) {
console.error('Failed to start recording', error);
}
}
async function stopRecording() {
try {
await recording.stopAndUnloadAsync();
const uri = recording.getURI();
setRecording(null);
console.log('Recording stopped and stored at', uri);
await getTranscription(uri);
} catch (error) {
console.error('Failed to stop recording', error);
}
}
async function getTranscription(uri) {
try {
console.log('Transcription started for URI:', uri);
const audioConfig = AudioConfig.fromWavFileInput(uri);
const recognizer = new SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(result => {
switch (result.reason) {
case ResultReason.RecognizedSpeech:
setTranscription(result.text);
console.log(`RECOGNIZED: Text=${result.text}`);
break;
case ResultReason.NoMatch:
console.log("NOMATCH: Speech could not be recognized.");
break;
case ResultReason.Canceled:
const cancellation = CancellationDetails.fromResult(result);
console.log(`CANCELED: Reason=${cancellation.reason}`);
if (cancellation.reason === CancellationReason.Error) {
console.error(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
console.error(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
}
break;
default:
console.error('Unexpected recognition result:', result.reason);
break;
}
});
} catch (error) {
console.error('Failed to get transcription', error);
}
}
return (
<View style={styles.container}>
<Text>Speech to Text Demo</Text>
<View style={styles.buttonsContainer}>
<Button title={recording ? 'Stop Recording' : 'Start Recording'} onPress={recording ? stopRecording : startRecording} />
</View>
{transcription !== '' && (
<View style={styles.transcriptionContainer}>
<Text>Transcription:</Text>
<Text>{transcription}</Text>
</View>
)}
</View>
);
}
const styles = StyleSheet.create({
container: {
flex: 1,
justifyContent: 'center',
alignItems: 'center',
padding: 20,
},
buttonsContainer: {
marginVertical: 20,
},
transcriptionContainer: {
marginTop: 20,
padding: 10,
borderWidth: 1,
borderColor: 'lightgray',
},
});
输出: