在使用nemo docker进行训练时,出现以下错误,已确认数据配置正确,按照官方代码训练,请问一下错误出现原因是什么
- Validation sanity check: 0it [00:00, ?it/s]Traceback (most recent call last):
- File "train.py", line 26, in <module>
- trainer.fit(quartznet)#调用‘fit’方法开始训练
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py", line 445, in fit
- results = self.accelerator_backend.train()
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/accelerators/gpu_accelerator.py", line 64, in train
- results = self.train_or_test()
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py", line 66, in train_or_test
- results = self.trainer.train()
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py", line 467, in train
- self.run_sanity_check(self.get_model())
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py", line 659, in run_sanity_check
- _, eval_results = self.run_evaluation(test_mode=False, max_batches=self.num_sanity_val_batches)
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py", line 579, in run_evaluation
- output = self.evaluation_loop.evaluation_step(test_mode, batch, batch_idx, dataloader_idx)
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/trainer/evaluation_loop.py", line 171, in evaluation_step
- output = self.trainer.accelerator_backend.validation_step(args)
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/accelerators/gpu_accelerator.py", line 88, in validation_step
- output = self.__validation_step(args)
- File "/opt/conda/lib/python3.6/site-packages/pytorch_lightning/accelerators/gpu_accelerator.py", line 96, in __validation_step
- output = self.trainer.model.validation_step(*args)
- File "/opt/conda/lib/python3.6/site-packages/nemo/collections/asr/models/ctc_models.py", line 442, in validation_step
- log_probs, encoded_len, predictions = self.forward(input_signal=signal, input_signal_length=signal_len)
- File "/opt/conda/lib/python3.6/site-packages/nemo/core/classes/common.py", line 511, in __call__
- outputs = wrapped(*args, **kwargs)
- File "/opt/conda/lib/python3.6/site-packages/nemo/collections/asr/models/ctc_models.py", line 396, in forward
- input_signal=input_signal, length=input_signal_length,
- File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
- result = self.forward(*input, **kwargs)
- File "/opt/conda/lib/python3.6/site-packages/nemo/core/classes/common.py", line 511, in __call__
- outputs = wrapped(*args, **kwargs)
- File "/opt/conda/lib/python3.6/site-packages/torch/autograd/grad_mode.py", line 15, in decorate_context
- return func(*args, **kwargs)
- File "/opt/conda/lib/python3.6/site-packages/nemo/collections/asr/modules/audio_preprocessing.py", line 79, in forward
- processed_signal, processed_length = self.get_features(input_signal, length)
- File "/opt/conda/lib/python3.6/site-packages/nemo/collections/asr/modules/audio_preprocessing.py", line 249, in get_features
- return self.featurizer(input_signal, length)
- File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
- result = self.forward(*input, **kwargs)
- File "/opt/conda/lib/python3.6/site-packages/torch/autograd/grad_mode.py", line 15, in decorate_context
- return func(*args, **kwargs)
- File "/opt/conda/lib/python3.6/site-packages/nemo/collections/asr/parts/features.py", line 375, in forward
- x = normalize_batch(x, seq_len, normalize_type=self.normalize)
- File "/opt/conda/lib/python3.6/site-packages/nemo/collections/asr/parts/features.py", line 60, in normalize_batch
- "normalize_batch with `per_feature` normalize_type received a tensor of length 1. This will result "
- ValueError: normalize_batch with `per_feature` normalize_type received a tensor of length 1. This will result in torch.std() returning nan
复制代码
|