我有一个连接到 OpenAI 的 Whisper API 的端点:
import { NextResponse } from 'next/server';
import OpenAI from 'openai';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
export async function POST(request) {
try {
// Read the file data from the request
const formData = await request.formData();
const file = formData.get('file');
if (!file) {
return NextResponse.json({ error: 'No file uploaded' }, { status: 400 });
}
console.log('File type: ', file.type);
// Send the file to OpenAI for transcription
const transcription = await openai.audio.transcriptions.create({
file: file,
model: 'whisper-1',
});
console.log('Transcription: ', transcription);
return NextResponse.json({ transcription: transcription.text });
} catch (error) {
console.error('Error in transcription:', error);
return NextResponse.json({ error: 'Error in transcription' }, { status: 500 });
}
}
但是,我遇到以下日志:
File type: audio/mp4
Error in transcription: tF [Error]: 400 Invalid file format. Supported formats: ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']
at tx.generate (/var/task/.next/server/chunks/854.js:15:105829)
at p6.makeStatusError (/var/task/.next/server/chunks/854.js:15:97598)
at p6.makeRequest (/var/task/.next/server/chunks/854.js:15:98521)
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
at async u (/var/task/.next/server/app/api/speech-to-text/route.js:1:1282)
at async /var/task/node_modules/next/dist/compiled/next-server/app-route.runtime.prod.js:6:36258
at async eR.execute (/var/task/node_modules/next/dist/compiled/next-server/app-route.runtime.prod.js:6:26874)
at async eR.handle (/var/task/node_modules/next/dist/compiled/next-server/app-route.runtime.prod.js:6:37512)
at async es (/var/task/node_modules/next/dist/compiled/next-server/server.runtime.prod.js:16:24953)
at async en.responseCache.get.routeKind (/var/task/node_modules/next/dist/compiled/next-server/server.runtime.prod.js:17:1026) {
status: 400,
headers: {
'access-control-expose-headers': 'X-Request-ID',
'alt-svc': 'h3=":443"; ma=86400',
'cf-cache-status': 'DYNAMIC',
'cf-ray': '8bd627db99516fec-IAD',
connection: 'keep-alive',
'content-length': '226',
'content-type': 'application/json',
date: 'Tue, 03 Sep 2024 13:41:09 GMT',
'openai-organization': 'user-so9bfaz9pguckwzpiqbc13lg',
'openai-processing-ms': '50',
'openai-version': '2020-10-01',
server: 'cloudflare',
'set-cookie': '__cf_bm=1_kcVzTUqflazicy.zU.C3Fdsx5XutZCDtTWQq8vBII-1725370869-1.0.1.1-5xTxFbxqFzjMcKdxNVKCgsBUcauYlDs6yGK4dlWOtGmEUKq10yf2oJhSCHBpZUC8K__uFjX3ftFETra.456M2w; path=/; expires=Tue, 03-Sep-24 14:11:09 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=ZiOsCYq_0H0WkkpBtGvLP8srrOQ90fnV_8.g3x6FRoc-1725370869213-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None',
'strict-transport-security': 'max-age=15552000; includeSubDomains; preload',
'x-content-type-options': 'nosniff',
'x-ratelimit-limit-requests': '50',
'x-ratelimit-remaining-requests': '49',
'x-ratelimit-reset-requests': '1.2s',
'x-request-id': 'req_1f4352d0b89a49d63a0c0c9936ab5b2a'
},
request_id: 'req_1f4352d0b89a49d63a0c0c9936ab5b2a',
error: {
message: "Invalid file format. Supported formats: ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm']",
type: 'invalid_request_error',
param: null,
code: null
},
code: null,
param: null,
type: 'invalid_request_error'
}
我不明白为什么它不允许我上传 iPhone 上录制的音频文件。我的iPhone以audio/mp4格式发送音频,根据文档,mp4被列为支持的格式。 语音转文本文档
我尝试这样做,OpenAI API 接受该格式,但文件已损坏并且无法识别我通过语音说出的单词:
const arrayBuffer = await file.arrayBuffer();
const newBlob = new Blob([arrayBuffer], { type: file.type });
const newFile = new File([newBlob], 'audio.mp4', { type: file.type });
transcription = await openai.audio.transcriptions.create({
file: newFile,
model: "whisper-1",
});
任何有关解决此问题的帮助或指导将不胜感激。
这个问题已在官方OpenAI论坛上讨论过。您遇到的问题特定于 iOS 录制的音频。
人们报告说,他们能够使用 MediaRecorder 通过添加一分钟的等待来解决该错误,如下所示:
mediaRecorder.current.start(1000);
请参阅以下两个讨论以获取更多信息: