在较高层面上,我想在 Twilio 双向媒体流中利用 Twilio 的 Dial 功能,但似乎 Twilio 的 Media Streams 功能仅支持将原始音频发送回呼叫者的功能。
这里有更多细节:
我目前有一个应用程序,它使用双向媒体流来使用 OpenAI Realtime API 通过电话回答问题和疑虑(我的代码的灵感此处)。
这是接收音频并发送回音频的主要双向媒体流代码:
@app.websocket("/media-stream")
async def handle_media_stream(websocket: WebSocket):
"""Handle WebSocket connections between Twilio and OpenAI."""
print("Client connected")
await websocket.accept()
async with websockets.connect(
"wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01",
extra_headers={
"Authorization": f"Bearer {CONFIG.api_key}",
"OpenAI-Beta": "realtime=v1",
},
) as openai_ws:
await initialize_session(openai_ws)
# Connection specific state
stream_sid = None
latest_media_timestamp = 0
last_assistant_item = None
mark_queue = []
response_start_timestamp_twilio = None
async def receive_from_twilio():
"""Receive audio data from Twilio and send it to the OpenAI Realtime API."""
nonlocal stream_sid, latest_media_timestamp
try:
async for message in websocket.iter_text():
data = json.loads(message)
if data["event"] == "media" and openai_ws.open:
latest_media_timestamp = int(data["media"]["timestamp"])
audio_append = {
"type": "input_audio_buffer.append",
"audio": data["media"]["payload"],
}
await openai_ws.send(json.dumps(audio_append))
elif data["event"] == "start":
stream_sid = data["start"]["streamSid"]
print(f"Incoming stream has started {stream_sid}")
response_start_timestamp_twilio = None # noqa: F841
latest_media_timestamp = 0
last_assistant_item = None # noqa: F841
elif data["event"] == "mark":
if mark_queue:
mark_queue.pop(0)
except WebSocketDisconnect:
print("Client disconnected.")
if openai_ws.open:
await openai_ws.close()
async def send_to_twilio():
"""Receive events from the OpenAI Realtime API, send audio back to Twilio."""
nonlocal stream_sid, last_assistant_item, response_start_timestamp_twilio
try:
async for openai_message in openai_ws:
response = json.loads(openai_message)
response_type = response.get("type")
if response_type in CONFIG.log_event_types:
# print(f"Received event: {response['type']}", response)
logging.info(f"Received event: {response['type']}")
match response_type:
case "response.audio.delta":
if "delta" not in response:
continue
audio_payload = base64.b64encode(
base64.b64decode(response["delta"])
).decode("utf-8")
audio_delta = {
"event": "media",
"streamSid": stream_sid,
"media": {"payload": audio_payload},
}
await websocket.send_json(audio_delta)
if response_start_timestamp_twilio is None:
response_start_timestamp_twilio = latest_media_timestamp
if CONFIG.show_timing_math:
print(
f"Setting start timestamp for new response: {response_start_timestamp_twilio}ms"
)
# Update last_assistant_item safely
if response.get("item_id"):
last_assistant_item = response["item_id"]
await send_mark(websocket, stream_sid)
# Trigger an interruption. Your use case might work better using `input_audio_buffer.speech_stopped`, or combining the two.
case "input_audio_buffer.speech_started":
print("Speech started detected.")
if last_assistant_item:
print(
f"Interrupting response with id: {last_assistant_item}"
)
await handle_speech_started_event()
case "response.function_call_arguments.done":
# https://platform.openai.com/docs/api-reference/realtime-server-events/response/function_call_arguments/done
# TODO: eventually migrate domain model to voice/
event = FunctionCallArgumentsEvent(**response)
logging.info(
f"Calling {event.name=} with {event.arguments=}"
)
await call_tool(
event.call_id,
event.name,
json.loads(event.arguments),
openai_ws,
)
except Exception as e:
traceback.print_exc()
print(f"Error in send_to_twilio: {e}")
问题是,在某些时候,如果呼叫者感到沮丧,我希望能够通过人工拨打他们的电话号码。所以基本上类似于
from twilio.twiml.voice_response import Dial, VoiceResponse, Say
response = VoiceResponse()
response.dial("111-111-1111") # dial out to human
但后者(即 TwiML)仅可在用于传入呼叫的 Webhook 中使用,而不是在用于双向媒体流的 Webhook 中使用。有谁知道解决方法或基本上能够在双向媒体流中以编程方式拨出?
感谢您花时间阅读本文。
如果我理解正确的话,您希望在移交给人工代理时完全退出媒体流,对吗?
在这种情况下,您可以使用新的 TwiML 覆盖调用(发起 Web 套接字流)的原始 TwiML。为此,您需要知道活动呼叫的呼叫 SID。
// Download the helper library from https://www.twilio.com/docs/node/install
const twilio = require("twilio"); // Or, for ESM: import twilio from "twilio";
// Find your Account SID and Auth Token at twilio.com/console
// and set the environment variables. See http://twil.io/secure
const accountSid = process.env.TWILIO_ACCOUNT_SID;
const authToken = process.env.TWILIO_AUTH_TOKEN;
const client = twilio(accountSid, authToken);
async function updateCall() {
const call = await client
.calls("CAaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
.update({ twiml: `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Dial>111-111-1111</Dial>
</Response>` });
console.log(call.sid);
}
updateCall();