我可以在没有expo-react本机应用程序后端的情况下实现Azure语音到文本服务吗

问题描述 投票:0回答:1

您好,我正在尝试使用 expo、microsoft-cognitiveservices-speech-sdk 和本机反应来实现语音到文本演示应用程序。我想知道我是否使用 expo Audio 库将 .wav 格式的音频文件存储在设备中。我可以通过它并使用 microsoft-cognitiveservices-speech-sdk 获取文本输出而不使用单独的后端服务吗?

使用下面的代码,当尝试获取audioConfig值时,我收到切片错误。

LOG [类型错误:无法读取未定义的属性“切片”]

我还想知道是否可以使用与 expo 和 React Native 兼容的替代语音文本包。

import { StyleSheet, Text, TextInput, View, ScrollView, FlatList, KeyboardAvoidingView, Platform, TouchableOpacity, Button } from 'react-native'
import { Audio } from 'expo-av';
import 'react-native-get-random-values';
import { v4 as uuidv4 } from 'uuid';
import { SafeAreaView } from 'react-native-safe-area-context';
import { AudioConfig, SpeechConfig, SpeechRecognizer, ResultReason, CancellationDetails } from 'microsoft-cognitiveservices-speech-sdk'

import React, { useEffect, useState } from 'react'


const speechConfig = SpeechConfig.fromSubscription(process.env.AZURE_SPEECH_KEY, "https://eastus.api.cognitive.microsoft.com/")
speechConfig.speechRecognitionLanguage = "en-US";


const Page = () => {
  const [recording, setRecording] = useState();
  const [permissionResponse, requestPermission] = Audio.usePermissions();

  async function startRecording() {
    try {
      if (permissionResponse.status !== 'granted') {
        console.log('Requesting permission..');
        await requestPermission();
      }
      await Audio.setAudioModeAsync({
        allowsRecordingIOS: true,
        playsInSilentModeIOS: true,
      });

      console.log('Starting recording..');
      const recordingOptions = {
        // android settings
        android: {
          extension: '.wav',
          outputFormat: Audio.RECORDING_OPTION_ANDROID_OUTPUT_FORMAT_DEFAULT,
          audioEncoder: Audio.RECORDING_OPTION_ANDROID_AUDIO_ENCODER_DEFAULT,
          sampleRate: 44100,
          numberOfChannels: 2,
          bitRate: 128000,
        },
        // ios settings
        ios: {
          extension: '.wav',
          audioQuality: Audio.RECORDING_OPTION_IOS_AUDIO_QUALITY_HIGH,
          sampleRate: 44100,
          numberOfChannels: 2,
          bitRate: 128000,
          linearPCMBitDepth: 16,
          linearPCMIsBigEndian: false,
          linearPCMIsFloat: false,
        },
      };

      console.log("Start Recordinf")
      const { recording } = await Audio.Recording.createAsync(recordingOptions);


      setRecording(recording);
      console.log('Recording started');
    } catch (err) {
      console.error('Failed to start recording', err);
    }
  }

  async function getTranscription() {
    // let audioData = AudioConfig.fromAudioFileOutput(recording.getURI())

    // const audioData = await FileSystem.readAsStringAsync(recording.getURI(), { encoding: FileSystem.EncodingType.Base64 });
    // console.log("Audio Data")
    // console.log(audioData)
    const audioConfig = AudioConfig.fromWavFileInput(recording.getURI());
    console.log("Audio Config")
    console.log(audioConfig)
    const speechConfig = SpeechConfig.fromSubscription('0c7e63fcc41d4bbf894db4d642ad9003', 'https://eastus.api.cognitive.microsoft.com/');
    console.log("Speech Config")
    console.log(speechConfig)
    const recognizer = new SpeechRecognizer(speechConfig, audioConfig);


    // recognizer.recognizeOnceAsync(result => {
    //   switch (result.reason) {
    //     case ResultReason.RecognizedSpeech:
    //       console.log(`RECOGNIZED: Text=${result.text}`);
    //       break;
    //     case ResultReason.NoMatch:
    //       console.log("NOMATCH: Speech could not be recognized.");
    //       break;
    //     case ResultReason.Canceled:
    //       const cancellation = CancellationDetails.fromResult(result);
    //       console.log(`CANCELED: Reason=${cancellation.reason}`);

    //       if (cancellation.reason == CancellationReason.Error) {
    //         console.log(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
    //         console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
    //         console.log("CANCELED: Did you set the speech resource key and region values?");
    //       }
    //       break;
    //   }
    //   speechRecognizer.close();
    // });
  }


  async function stopRecording() {
    console.log('Stopping recording..');
    setRecording(undefined);
    await recording.stopAndUnloadAsync();
    await Audio.setAudioModeAsync(
      {
        allowsRecordingIOS: false,
      }
    );
    const uri = recording.getURI();
    console.log('Recording stopped and stored at', uri);

    try {
      await getTranscription()
    } catch (error) {
      console.log(error)
    }

  }



  const [messages, setMessages] = useState([
    'Lorem ipsum dolor sit amet consectetur adipisicing elit. Ipsum optio incidunt fuga fugiat. Accusantium quidem, commodi amet consequuntur quaerat, quo tempore provident ea aliquid ullam illo numquam magnam laudantium in?',])

  return (
    <SafeAreaView style={{ flex: 1 }}>
      <View style={{ flex: 0 }}>
        <Text>Header</Text>
      </View>

      <View style={{ flex: 1 }}>
        <FlatList
          data={messages}
          renderItem={({ item }) => <View className='p-3'>
            <Text>{item}</Text>
          </View>}
          keyExtractor={(item, index) => index.toString()}
        />
      </View>

      <View style={{ flex: 0 }}>
        <Text>Hello</Text>

        <View className='flex-row'>
          <TextInput className='flex-1' style={{ borderWidth: 1 }} />

          <Button
            title={recording ? 'Stop Recording' : 'Start Recording'}
            onPress={recording ? stopRecording : startRecording}
          />

          <TouchableOpacity>
            <Text>Send</Text>
          </TouchableOpacity>
        </View>
      </View>
    </SafeAreaView>
  )
}

export default Page```

azure react-native expo speech-recognition azure-cognitive-services
1个回答
0
投票

[类型错误:无法读取未定义的属性“切片”]

  • 这是因为当您尝试使用
    recording
    创建
    audioConfig
    时,
    AudioConfig.fromWavFileInput(recording.getURI())
    对象未完全初始化或未定义。

在这里,我在 Expo React Native 应用程序中实现了 Azure 语音转文本服务。请检查下面的代码。

代码:

import React, { useState } from 'react';
import * as FileSystem from 'expo-file-system';
import { StyleSheet, Text, View, Button } from 'react-native';
import { Audio } from 'expo-av';
import { SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason, CancellationReason, CancellationDetails } from 'microsoft-cognitiveservices-speech-sdk';

const speechConfig = SpeechConfig.fromSubscription("key", "locatio/region");
speechConfig.speechRecognitionLanguage = "en-US";

export default function App() {
  const [recording, setRecording] = useState(null);
  const [transcription, setTranscription] = useState('');

  async function startRecording() {
    try {
      const { status } = await Audio.requestPermissionsAsync();
      if (status !== 'granted') {
        console.error('Permission to access audio was denied');
        return;
      }

      const recording = new Audio.Recording();
      await recording.prepareToRecordAsync(Audio.RECORDING_OPTIONS_PRESET_HIGH_QUALITY);
      await recording.startAsync();
      setRecording(recording);
      console.log('Recording started');
    } catch (error) {
      console.error('Failed to start recording', error);
    }
  }

  async function stopRecording() {
    try {
      await recording.stopAndUnloadAsync();
      const uri = recording.getURI();
      setRecording(null);
      console.log('Recording stopped and stored at', uri);

      await getTranscription(uri);
    } catch (error) {
      console.error('Failed to stop recording', error);
    }
  }

  async function getTranscription(uri) {
    try {
      console.log('Transcription started for URI:', uri);
      const audioConfig = AudioConfig.fromWavFileInput(uri);
      const recognizer = new SpeechRecognizer(speechConfig, audioConfig);
  
      recognizer.recognizeOnceAsync(result => {
        switch (result.reason) {
          case ResultReason.RecognizedSpeech:
            setTranscription(result.text);
            console.log(`RECOGNIZED: Text=${result.text}`);
            break;
          case ResultReason.NoMatch:
            console.log("NOMATCH: Speech could not be recognized.");
            break;
          case ResultReason.Canceled:
            const cancellation = CancellationDetails.fromResult(result);
            console.log(`CANCELED: Reason=${cancellation.reason}`);
            if (cancellation.reason === CancellationReason.Error) {
              console.error(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
              console.error(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
            }
            break;
          default:
            console.error('Unexpected recognition result:', result.reason);
            break;
        }
      });
    } catch (error) {
      console.error('Failed to get transcription', error);
    }
  }  

  return (
    <View style={styles.container}>
      <Text>Speech to Text Demo</Text>
      <View style={styles.buttonsContainer}>
        <Button title={recording ? 'Stop Recording' : 'Start Recording'} onPress={recording ? stopRecording : startRecording} />
      </View>
      {transcription !== '' && (
        <View style={styles.transcriptionContainer}>
          <Text>Transcription:</Text>
          <Text>{transcription}</Text>
        </View>
      )}
    </View>
  );
}

const styles = StyleSheet.create({
  container: {
    flex: 1,
    justifyContent: 'center',
    alignItems: 'center',
    padding: 20,
  },
  buttonsContainer: {
    marginVertical: 20,
  },
  transcriptionContainer: {
    marginTop: 20,
    padding: 10,
    borderWidth: 1,
    borderColor: 'lightgray',
  },
});

enter image description here

输出: enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.