-
Notifications
You must be signed in to change notification settings - Fork 611
Description
Traceback (most recent call last): File "/workspace/StyleTTS2/train_finetune.py", line 707, in <module> main() File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1157, in __call__ return self.main(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1078, in main rv = self.invoke(ctx) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, **ctx.params) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 783, in invoke return __callback(*args, **kwargs) File "/workspace/StyleTTS2/train_finetune.py", line 487, in main slm_out = slmadv(i, File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/workspace/StyleTTS2/Modules/slmadv.py", line 138, in forward y_pred = self.model.decoder(en, F0_fake, N_fake, sp[:, :128]) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/data_parallel.py", line 184, in forward replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/data_parallel.py", line 189, in replicate return replicate(module, device_ids, not torch.is_grad_enabled()) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/replicate.py", line 110, in replicate param_copies = _broadcast_coalesced_reshape(params, devices, detach) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/replicate.py", line 83, in _broadcast_coalesced_reshape tensor_copies = Broadcast.apply(devices, *tensors) File "/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py", line 539, in apply return super().apply(*args, **kwargs) # type: ignore[misc] File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/_functions.py", line 23, in forward outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/comm.py", line 57, in broadcast_coalesced return torch._C._broadcast_coalesced(tensors, devices, buffer_size) RuntimeError: NCCL Error 3: internal error - please report this issue to the NCCL developers