语音转文本一次仅打印一个句子

问题描述 投票:0回答:1

我正在构建一个快速应用程序,允许用户对着手机说话并将转录内容保存到文本视图中。当我说出一个句子并转录它后,文本字段会被清除以打印下一个句子,问题就出现了。我希望它继续打印转录而不删除前面的句子。

希望获得有关此问题的一些帮助或见解!!

private var lastRecognizedText = ""
private var fullTranscriptionText = ""
private var isListening = false

private func startListening() {
    if isListening {
        print("Already listening, start request ignored.")
        return
    }
    stopListening()  // Stop any previous sessions
    
    isListening = true
    listeningAnimation?.startAnimating()
    processedNotesAnimation?.startAnimating()
    listeningLabel.isHidden = false
    processedNotesLabel.isHidden = false
    producedNotesView.isHidden = false
    configureHeroModifiers()
    styleTextView()
    setupAudioSession()
    
    recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
    guard let recognitionRequest = recognitionRequest else {
        print("Failed to create recognition request.")
        return
    }
    recognitionRequest.shouldReportPartialResults = true
    
    let inputNode = audioEngine.inputNode
    var currentSentenceText = ""  // Temporary variable to hold intermediate sentence text

    recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { result, error in
        if let result = result {
            let newText = result.bestTranscription.formattedString
            
            if newText != self.lastRecognizedText {
                if result.isFinal {
                    // Append finalized text to fullTranscriptionText
                    self.fullTranscriptionText += newText + ". "
                    self.textView.text = self.fullTranscriptionText
                    
                    // Print the final accumulated transcription to the console
                    print("Final Transcription: \(self.fullTranscriptionText)")
                    
                    // Reset currentSentenceText after finalizing
                    currentSentenceText = ""
                } else {
                    // Update intermediate sentence text for real-time correction
                    currentSentenceText = newText
                    // Display full transcription with the current intermediate text
                    self.textView.text = self.fullTranscriptionText + currentSentenceText
                }

                self.lastRecognizedText = newText
                
                // Auto-scroll to show the latest additions
                let bottom = NSMakeRange(self.textView.text.count - 1, 1)
                self.textView.scrollRangeToVisible(bottom)
            }
        }

        if let error = error {
            print("Speech recognition error: \(error.localizedDescription)")
            self.handleError(error)
            self.stopListening()  // Stop on error
        }
    }

    let recordingFormat = inputNode.outputFormat(forBus: 0)
    inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, when in
        self.recognitionRequest?.append(buffer)
    }

    audioEngine.prepare()
    do {
        try audioEngine.start()
        print("Audio engine started.")
    } catch {
        print("Audio Engine Error: \(error.localizedDescription)")
    }

    textView.isScrollEnabled = true
    textView.isEditable = false
}

@IBAction func stopRecordingTapped(_ sender: Any) {
    stopListening()
}

private func stopListening() {
    guard isListening else { return }
    print("Stopping listening session.")
    isListening = false

    audioEngine.stop()
    audioEngine.inputNode.removeTap(onBus: 0)
    recognitionTask?.finish()
    recognitionTask = nil
    recognitionRequest = nil
}

private func setupAudioSession() {
    let audioSession = AVAudioSession.sharedInstance()
    do {
        try audioSession.setCategory(.record, mode: .measurement, options: [.duckOthers, .allowBluetooth])
        try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
        print("Audio session successfully configured.")
    } catch {
        print("Audio session setup error: \(error.localizedDescription)")
    }
}




// Request permission for speech recognition and microphone
private func requestSpeechRecognitionPermission() {
    SFSpeechRecognizer.requestAuthorization { authStatus in
        OperationQueue.main.addOperation {
            switch authStatus {
            case .authorized:
                print("Speech recognition authorized")
            case .denied, .restricted, .notDetermined:
                print("Speech recognition permission denied")
            default:
                break
            }
        }
    }
    
    AVAudioSession.sharedInstance().requestRecordPermission { granted in
        if granted {
            print("Microphone permission granted")
        } else {
            print("Microphone permission denied")
        }
    }
}

// Function to reset variables and clear old inputs
private func resetForNewSession() {
    // Reset recognizedText to start fresh
    recognizedText = ""
    
    // Clear previous content from the transcript file
    clearTranscriptFile()
}

// Function to clear the transcript file
private func clearTranscriptFile() {
    do {
        // Overwrite the file with an empty string
        try "".write(to: transcriptFilePath, atomically: true, encoding: .utf8)
        print("Transcript file cleared for new session")
    } catch {
        print("Error clearing transcript file: \(error)")
    }
}




func setupAudioRecorder() {
       let audioSession = AVAudioSession.sharedInstance()
       
       // Configure audio session
       do {
           try audioSession.setCategory(.playAndRecord, mode: .default, options: .defaultToSpeaker)
           try audioSession.setActive(true)
           
           let settings: [String: Any] = [
               AVFormatIDKey: Int(kAudioFormatAppleLossless),
               AVSampleRateKey: 44100.0,
               AVNumberOfChannelsKey: 1,
               AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
           ]
           
           audioRecorder = try AVAudioRecorder(url: URL(fileURLWithPath: "/dev/null"), settings: settings)
           audioRecorder.isMeteringEnabled = true
           audioRecorder.record()
           
       } catch {
           print("Failed to set up audio recorder: \(error)")
       }
   }
swift speech-recognition sfspeechrecognizer
1个回答
0
投票

您忘记将每个

newText
添加到
fullTranscriptionText
,而是在每次调用
result
闭包时覆盖它:

recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { result, error in
            
            if let result = result {
                let newText = result.bestTranscription.formattedString
                
                if newText != self.lastRecognizedText {
                    if result.isFinal {
                        // Append finalized text to fullTranscriptionText
                        self.fullTranscriptionText += newText + ". "
//                        self.textView.text = self.fullTranscriptionText
                        
                        // Print the final accumulated transcription to the console
                        print("Final Transcription: \(self.fullTranscriptionText)")
                        
                        // Reset currentSentenceText after finalizing
//                        currentSentenceText = ""
                    } else {
                        // Update intermediate sentence text for real-time correction
//                        currentSentenceText = newText
                        
                        self.fullTranscriptionText += newText // <-- Add new to full text

                        // Display full transcription with the current intermediate text
//                        self.textView.text = self.fullTranscriptionText + currentSentenceText
                    }

                    self.lastRecognizedText = newText
                    
                    // Auto-scroll to show the latest additions
//                    let bottom = NSMakeRange(self.textView.text.count - 1, 1)
//                    self.textView.scrollRangeToVisible(bottom)
                }
            }

            if let error = error {
                print("Speech recognition error: \(error.localizedDescription)")
                self.speechError = error.localizedDescription
//                self.handleError(error)
                self.stopListening()  // Stop on error
            }
© www.soinside.com 2019 - 2024. All rights reserved.