!git clone https://github.com/coqui-ai/TTS.git
!pip install -r requirements.txt
!pip install .
!pip install numpy==1.24.3
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts
import torch
import soundfile as sf
from pydub import AudioSegment
import base64
import TTS.tts.layers.xtts.tokenizer as xttsTokenizer
import numpy as np
import io
import os
config_path = "/content/drive/MyDrive/XTTS-v2/config.json"
model_path = "/content/drive/MyDrive/XTTS-v2/"
config = XttsConfig()
config.load_json(config_path)
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=model_path, eval=True)
model.cuda()
def TTS_XTTSv2(prompt, speaker_wav_path, id, lang, speed, text_split_length=226):
split_tts_sentence = xttsTokenizer.split_sentence(text=prompt, lang=lang, text_split_length=text_split_length)
if lang is None or lang == "":
lang = lang_detect(prompt)
output_files = []
for i, part in enumerate(split_tts_sentence):
splitted_text_voice_output_path = f"{voice_test_path}/{id}_{i+1}.wav"
outputs = model.synthesize(
part,
config=config,
speaker_wav=speaker_wav_path,
language=lang,
speed=speed
)
wav_output = outputs['wav']
sf.write(splitted_text_voice_output_path, wav_output, 24000)
output_files.append(splitted_text_voice_output_path)
误差是在模型上触发的。Synthesize步骤,该步骤在计算上很重,并且在GPU上运行。使用螺纹在螺纹API中调用此函数。线程与文本处理并行化。
Full错误追溯:
Exception in thread Thread-33 (generate_tts_response):
Traceback (most recent call last):
File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
Exception in thread Thread-35 (generate_tts_response):
Traceback (most recent call last):
File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
Exception in thread Thread-34 (generate_tts_response):
Traceback (most recent call last):
File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
self.run()
File "/usr/lib/python3.11/threading.py", line 982, in run
self.run()
File "/usr/lib/python3.11/threading.py", line 982, in run
self.run()
File "/usr/lib/python3.11/threading.py", line 982, in run
self._target(*self._args, **self._kwargs)
File "<ipython-input-9-d8f16acdbf2d>", line 1168, in generate_tts_response
File "<ipython-input-9-d8f16acdbf2d>", line 1034, in TTS_XTTSv2
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 419, in synthesize
self._target(*self._args, **self._kwargs)
File "<ipython-input-9-d8f16acdbf2d>", line 1168, in generate_tts_response
File "<ipython-input-9-d8f16acdbf2d>", line 1034, in TTS_XTTSv2
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 419, in synthesize
self._target(*self._args, **self._kwargs)
File "<ipython-input-9-d8f16acdbf2d>", line 1168, in generate_tts_response
File "<ipython-input-9-d8f16acdbf2d>", line 1034, in TTS_XTTSv2
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 419, in synthesize
return self.full_inference(text, speaker_wav, language, **settings)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ return self.full_inference(text, speaker_wav, language, **settings)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
^ ^return self.full_inference(text, speaker_wav, language, **settings)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
^ return func(*args, **kwargs)
return func(*args, **kwargs)
^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 488, in full_inference
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 488, in full_inference
^^^return self.inference(
^^^ return self.inference(
^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
^ ^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
^^^return func(*args, **kwargs)
^ ^^^^^ ^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 541, in inference
^ return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 488, in full_inference
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
return self.inference(
^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
gpt_codes = self.gpt.generate(
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/layers/xtts/gpt.py", line 590, in generate
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 541, in inference
return func(*args, **kwargs)
gpt_codes = self.gpt.generate(
^^ ^^^^^^ ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/layers/xtts/gpt.py", line 590, in generate
^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/models/xtts.py", line 541, in inference
gen = self.gpt_inference.generate(
gen = self.gpt_inference.generate(
^ ^^^^^^^^^ ^^^^^^^^^^^^^^^ ^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
gpt_codes = self.gpt.generate(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/layers/xtts/gpt.py", line 590, in generate
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^ return func(*args, **kwargs)
^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 2252, in generate
gen = self.gpt_inference.generate( result = self._sample(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
^^ ^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 2252, in generate
^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 3254, in _sample
outputs = model_forward(**model_inputs, return_dict=True)
^^ result = self._sample(
^^^^^^^^^^^^^^^^ return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 2252, in generate
^ ^^^^^^^^^^^^^^^ ^^^^^ ^^ ^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 3251, in _sample
^^ result = self._sample(
^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 3310, in _sample
^^^^^^^^^^^^^^^^^^^ unfinished_sequences = unfinished_sequences & ~stopping_criteria(input_ids, scores)^^^^^^^^^^^
^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/generation/stopping_criteria.py", line 494, in __call__
^^^outputs = self(**model_inputs, return_dict=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/layers/xtts/gpt_inference.py", line 94, in forward
is_done = torch.full((input_ids.shape[0],), False, device=input_ids.device, dtype=torch.bool)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
emb = emb + self.pos_embedding.get_fixed_embedding(
^^^^^^^^^^^^^^^^^^ return forward_call(*args, **kwargs)
^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/layers/xtts/gpt_inference.py", line 97, in forward
File "/usr/local/lib/python3.11/dist-packages/TTS/tts/layers/xtts/gpt.py", line 40, in get_fixed_embedding
transformer_outputs = self.transformer(
^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ return self.emb(torch.tensor([ind], device=dev)).unsqueeze(0)
^ ^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
^^^^^^^^^^^^^^^^ return forward_call(*args, **kwargs)
^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/transformers/models/gpt2/modeling_gpt2.py", line 1133, in forward
return self._call_impl(*args, **kwargs)
^^^^^^^^ outputs = block(^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^ ^ ^ ^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/sparse.py", line 190, in forward
return self._call_impl(*args, **kwargs)
return F.embedding(
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/functional.py", line 2551, in embedding
^^^^^^^^^^^^^^^ return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
验证输入数据:确保及时文本,speaker_wav_path和lang输入为预期格式。检查GPU的使用:使用NVIDIA-SMI监视GPU内存。内存使用率似乎正常,也没有溢出的迹象。 tokenization:确认XttStokenizer.split_sensence函数正确地分配了文本,并在text_split_length限制中划分。
尽管进行了这些检查,但错误仍然存在。
我期望该功能在没有错误的情况下运行,将文本分解,合成音频并为每个文本部分保存WAV文件。
我尝试尝试的第一件事是用
TORCH_CUDA_USE_DSA
编译Pytorch,以查看是否存在不影响单线线程但在MultineRead中更普遍的潜在问题。
看到synthesize_lock = threading.Lock() # call this outside your function
def TTS_XTTSv2(prompt, speaker_wav_path, id, lang, speed, text_split_length=226):
...
for i, part in enumerate(split_tts_sentence):
splitted_text_voice_output_path = f"{voice_test_path}/{id}_{i+1}.wav"
with synthesize_lock: # Ensure exclusive access to the model
outputs = model.synthesize(
part,
config=config,
speaker_wav=speaker_wav_path,
language=lang,
speed=speed
)
...