我一直在尝试使用 RTP/UDP 通过 pyaudio 通过网络传输音频流。
因为我可以找到任何 RTP 库,所以我在网上文章的帮助下进行了自己的编码。
我现在面临的问题是,当我不使用 RTP 库时一切正常,但当我使用 RTP 编码时存在一些延迟问题,我在接收器端收到切碎的音频。
我在这里分享发送者、接收者和 rtp 代码。
如有任何帮助,我们将不胜感激。
Sender_rtp.py
import pyaudio
import sys
import socket
import datetime
import pyrtp_2 as rtp
import random
HOST = sys.argv[1]
PORT = sys.argv[2]
data = bytes() # Stream of audio bytes
CHUNK_SIZE = 1024
BROADCAST_SIZE = 1024
CHANNELS = 1
FORMAT = pyaudio.paInt16 # 2 bytes size
RATE = 16000
# instantiate PyAudio (1)
p = pyaudio.PyAudio()
# define callback (2)
def pyaudio_callback(in_data, frame_count, time_info, status):
global data
data += in_data
return (None, pyaudio.paContinue)
# open stream (3)
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK_SIZE,
stream_callback=pyaudio_callback)
# start the stream (4)
stream.start_stream()
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
#sock.bind((HOST, int(PORT)))
def send_data():
global data
print()
if (len(data) > BROADCAST_SIZE):
packet_vars = {'version' : 2,
'padding' : 0,
'extension' : 0,
'csi_count' : 0,
'marker' : 0,
'payload_type' : 97,
'sequence_number' : random.randint(1,9999),
'timestamp' : random.randint(1,9999),
'ssrc' : 185755418,
'payload' : data}
rtp_packet = rtp.GenerateRTP(packet_vars)
sock.sendto(rtp_packet[:BROADCAST_SIZE], (HOST, int(PORT)))
data = data[BROADCAST_SIZE:]
print(f'Sent {str(BROADCAST_SIZE)} bytes of audio. {datetime.datetime.now().time()}')
try:
while True:
send_data()
except KeyboardInterrupt:
print('\nClosing stream...')
stream.stop_stream()
stream.close()
p.terminate()
sock.close()
Receiver_rtp.py
import pyaudio
import sys
import socket
import pyrtp_2 as rtp
HOST = sys.argv[1]
PORT = sys.argv[2]
data = bytes() # Stream of audio bytes
is_receiving = False
CHUNK_SIZE = 1024 # Size of frame window to write audio (frames_per_buffer)
BROADCAST_SIZE = 1024 # Socket receives audio with this size
BUFFER_SIZE = BROADCAST_SIZE * 4 # Receive this amount of data before playback
CHANNELS = 1
FORMAT = pyaudio.paInt16 # 2 bytes size
RATE = 16000
# instantiate PyAudio (1)
p = pyaudio.PyAudio()
# define callback (2)
def pyaudio_callback(in_data, frame_count, time_info, status):
if not is_receiving:
return (bytes([0] * frame_count * CHANNELS * 2), pyaudio.paContinue)
global data
try:
# Cut the data, if it started to bufferize
if len(data) >= BUFFER_SIZE * 2:
print('Cutting Audio Buffer..')
data = data[-BUFFER_SIZE:]
avail_data_count = min(frame_count * CHANNELS * 2, len(data))
return_data = data[:avail_data_count]
data = data[avail_data_count:]
# Inflate end of the array with zeros, if there is not enough audio.
return_data += bytes([0] * (frame_count * CHANNELS * 2 - avail_data_count))
return (return_data, pyaudio.paContinue)
except:
print('Exception in pyaudio_callback...')
# open stream (3)
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
output=True,
frames_per_buffer=CHUNK_SIZE,
stream_callback=pyaudio_callback)
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind((HOST, int(PORT)))
#sock.listen(1)
#connection, client_address = sock.accept()
print('Socket bind succeed.')
try:
while True:
new_data = sock.recv(BROADCAST_SIZE)
print(f"Incoming raw data : {type(new_data)}")
rtp_packet = rtp.DecodeRTP(new_data)
payload = rtp_packet['payload']
##break
data += rtp_packet['payload']
if len(data) >= BUFFER_SIZE and not is_receiving:
is_receiving = True
# start stream (4)
stream.start_stream()
print(f'Stream started, when {len(data)} bytes of data were received.\nThis causes {str(len(data) / RATE)} seconds of latency')
except KeyboardInterrupt:
print('\nClosing socket and stream...')
sock.close()
stream.stop_stream()
stream.close()
p.terminate()
pyrtp.py
def GenerateRTP(packet_vars):
#The first twelve octates are present in every RTP packet.
#The first octet is the version number of the RTP packet.
#The second octet is the padding bit.
#The third octet is the extension bit.
#The fourth octet is the CSRC count.
#The fifth octet is the marker bit.
#The sixth octet is the payload type.
#The seventh to twelve octets are the sequence number.
#The thirteen to eighteen octets are the timestamp.
#The nineteen to twenty-four octets are the synchronization source (SSRC).
#The remaining octets are the payload data.
#Generate fist byte of the header a binary string:
version = format(packet_vars['version'], 'b').zfill(2)
padding = format(packet_vars['padding'], 'b')
extension = format(packet_vars['extension'], 'b')
csrc_count = format(packet_vars['csi_count'], 'b').zfill(4)
byte1 = format(int((version + padding + extension + csrc_count), 2), 'x').zfill(2)
#Generate second byte of the header as binary string:
marker = format(packet_vars['marker'], 'b')
payload_type = format(packet_vars['payload_type'], 'b').zfill(7)
byte2 = format(int((marker + payload_type), 2), 'x').zfill(2)
sequence_number = format(packet_vars['sequence_number'], 'x').zfill(4)
timestamp = format(packet_vars['timestamp'], 'x').zfill(8)
ssrc = format(packet_vars['ssrc'], 'x').zfill(8)
payload = packet_vars['payload'].hex()
packet = byte1 + byte2 + sequence_number + timestamp + ssrc + payload
return packet.encode()
def DecodeRTP(packet_bytes):
#return dict of variables from the packet
packet_vars = {}
byte1 = packet_bytes[0:2]
byte1 = int(byte1, 16)
byte1 = format(byte1, 'b').zfill(8)
packet_vars['version'] = int(byte1[0:2],2)
packet_vars['padding'] = int(byte1[2:3],2)
packet_vars['extension'] = int(byte1[3:4])
packet_vars['csi_count'] = int(byte1[4:8], 2)
byte2 = packet_bytes[2:4]
byte2 = int(byte2, 16)
byte2 = format(byte2, 'b').zfill(8)
packet_vars['marker'] = int(byte2[0:1])
packet_vars['payload_type'] = int(byte2[1:8], 2)
packet_vars['sequence_number'] = int(packet_bytes[4:8], 16)
packet_vars['timestamp'] = int(packet_bytes[8:16], 16)
packet_vars['ssrc'] = int(packet_bytes[16:24], 16)
payload = packet_bytes[24:]
packet_vars['payload'] = bytes.fromhex(payload.decode())
#print(f"payload 4: {type(payload)} {payload}")
return packet_vars
我为 Sender_rtp.py 添加了一个队列。效果很好