我需要将音频文件划分为小于 25mb 的块。
我不想将文件保存在磁盘上。
这是我现在的代码,但它没有按预期工作,因为它将音频分割成大约 2mb 的块
def audio_splitter(audio_file)
audio = AudioSegment.from_file(audio_file)
# Set the chunk size and overlap
target_chunk_size = 20 * 1024 * 1024 # Target chunk size in bytes (20 MB)
# Overlap in milliseconds (10 seconds)
overlap_duration = 10 * 1000
# Estimate the number of bytes per millisecond in the audio
bytes_per_ms = len(audio.raw_data) / len(audio)
# Calculate duration of each chunk in milliseconds
chunk_duration = int(target_chunk_size / bytes_per_ms)
chunks = []
start = 0
while start < len(audio):
end = start + chunk_duration
chunk = audio[start:end]
chunks.append(chunk)
start += chunk_duration - overlap_duration
for i, chunk in enumerate(chunks):
chunk.export(f"chunk_{i + 1}.mp3", format="mp3")
我认为
len(audio.raw_data)
有问题,因为它似乎没有返回正确的字节大小。
有没有更好的方法来解决这个问题?
您可以使用
bytes.IO
估计 在我的测试用例中,我使用了 1/2 mb 限制,并且文件都完全正确 512kb,512kb,426kb,71.1kb
from pydub import AudioSegment
import io
import sys
import os
def audio_splitter(audio_file):
audio = AudioSegment.from_file(audio_file)
test = audio[0:len(audio)]
test_io = io.BytesIO()
test.export(test_io, format="mp3")
test_size = sys.getsizeof(test_io)
# Set the chunk size and overlap
target_chunk_size = 20 * 1024 * 1024 # Target chunk size in bytes (20 MB)
# Overlap in milliseconds (10 seconds)
overlap_duration = 10 * 1000
# Estimate the number of bytes per millisecond in the audio
bytes_per_ms = test_size/len(audio) # Estimation
# Calculate duration of each chunk in milliseconds
chunk_duration = int(target_chunk_size / bytes_per_ms)
chunks = []
start = 0
while start < len(audio):
end = start + chunk_duration
chunk = audio[start:end]
chunks.append(chunk)
start += chunk_duration - overlap_duration
for i, chunk in enumerate(chunks):
chunk.export(f"chunk_{i + 1}.mp3", format="mp3")
if __name__ == "__main__":
audio_splitter(*sys.argv[1:])