我已经尝试了几天使用 Node.JS 迁移到 Google STT V2。在 v1 中一切都很完美。我创建了一个识别器并使用 https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/main/speech/transcribeStreaming.v2.js
创建了一个脚本我的观点是转录来自 Twilio 电话的音频,我使用 Twilio 的 websockets 连接到 WSS 并流式传输音频数据,并将其传递给 Google StreamingRecognition。我的代码如下所示:
const speech = require('@google-cloud/speech').v2;
const fs = require('fs');
const client = new speech.SpeechClient({
keyFilename: './googlecreds.json',
apiEndpoint: 'eu-speech.googleapis.com'
});
const recognizerName = "projects/12345678910/locations/eu/recognizers/name";
const recognitionConfig = {
audoDecodingConfig: {},
};
const streamingConfig = {
config: recognitionConfig,
};
const configRequest = {
recognizer: recognizerName,
streamingConfig: streamingConfig,
};
const express = require('express');
const bodyParser = require('body-parser');
const app = express();
app.use(bodyParser.urlencoded({ extended: true }));
// Load your key and certificate
const privateKey = fs.readFileSync('location', 'utf8');
const certificate = fs.readFileSync('location', 'utf8');
const ca = fs.readFileSync('location', 'utf8');
const credentials = {
key: privateKey,
cert: certificate,
ca: ca
};
//wss
const WebSocket = require('ws');
const https = require('https');
const server = https.createServer(credentials, app);
const wss = new WebSocket.Server({
server: server,
path: '/stream',
});
wss.on("connection", async function connection(ws) {
let recognizeStream = null;
ws.on("message", function incoming(message) {
const msg = JSON.parse(message);
switch (msg.event) {
case "start":
recognizeStream = client
._streamingRecognize()
.on('data', response => {
const {results} = response;
console.log(results[0].alternatives[0].transcript);
})
.on('error', err => {
console.error(err.message);
})
recognizeStream.write(configRequest);
break;
case "media":
// Write the raw media data to the recognize stream
recognizeStream.write({audio: msg.media.payload});
break;
case "stop":
// Stop the recognize stream
recognizeStream.end();
break;
}
});
});
app.post('/voice', (req, res) => {
twiml = `
<Response>
<Say>talk now</Say>
<Connect>
<Stream url="wss://my.domain.com/stream"/>
</Connect>
<Pause length="60"/>
</Response>
`
res.type('text/xml');
res.send(twiml);
});
const port = process.env.PORT || 8080;
server.listen(port, '0.0.0.0', () => {
console.log(`Server running on port ${port}`);
});
流已连接,配置写入没有错误。我可以在我的“媒体”案例中记录从 Twilio 收到的 msg.media.payload,但是将其写入 recognizeStream 没有任何作用,我没有得到任何答案。我不知道该怎么办了。
该实现存在一些问题,可能导致其无法正常工作。以下是您应该检查并可能修改的一些事项:
代码:
const recognitionConfig = { audioConfig: {},};
const streamingConfig = { config: recognitionConfig,};
const configRequest = {
streamingConfig: streamingConfig, // Removed recognizer key
};
wss.on("connection", async function connection(ws) {
let recognizeStream = null;
ws.on("message", function incoming(message) {
const msg = JSON.parse(message);
switch (msg.event) {
case "start":
recognizeStream = client
.streamingRecognize() // Removed the underscore
.on('data', response => {
const { results } = response;
console.log(results[0].alternatives[0].transcript);
})
.on('error', err => {
console.error(err.message);
});
recognizeStream.write(configRequest);
break;
case "media":
// Write the raw media data to the recognize stream
recognizeStream.write({ audio: { content: msg.media.payload } });
break;
case "stop":
// Stop the recognize stream
recognizeStream.end();
break;
}
});
ws.on("close", function () {
if (recognizeStream) {
recognizeStream.end();
}
});
});