안녕하세요 좋은 모델 배포해주셔서 감사합니다.
KoBART summarization을 이용하기 위해 설치 후 fine tuning을 하기 위해 Read.me에 안내된 아래의 코드를 실행했습니다.
[use cpu]
python train.py --gradient_clip_val 1.0 --max_epochs 50 --default_root_dir logs --batch_size 4 --num_workers 4
하지만 Validation sanity check 과정에서 다음과 같은 에러가 발생하였습니다.
INFO:root:Namespace(accelerator=None, accumulate_grad_batches=1, amp_backend='native', amp_level='O2', auto_lr_find=False, auto_scale_batch_size=False, auto_select_gpus=False, batch_size=4, benchmark=False, check_val_every_n_epoch=1, checkpoint_callback=True, checkpoint_path=None, default_root_dir='logs', deterministic=False, distributed_backend=None, fast_dev_run=False, flush_logs_every_n_steps=100, gpus=None, gradient_clip_algorithm='norm', gradient_clip_val=1.0, limit_predict_batches=1.0, limit_test_batches=1.0, limit_train_batches=1.0, limit_val_batches=1.0, log_every_n_steps=50, log_gpu_memory=None, logger=True, lr=3e-05, max_epochs=50, max_len=512, max_steps=None, max_time=None, min_epochs=None, min_steps=None, model_path=None, move_metrics_to_cpu=False, multiple_trainloader_mode='max_size_cycle', num_nodes=1, num_processes=1, num_sanity_val_steps=2, num_workers=4, overfit_batches=0.0, plugins=None, precision=32, prepare_data_per_node=True, process_position=0, profiler=None, progress_bar_refresh_rate=None, reload_dataloaders_every_epoch=False, replace_sampler_ddp=True, resume_from_checkpoint=None, stochastic_weight_avg=False, sync_batchnorm=False, terminate_on_nan=False, test_file='data/test.tsv', tpu_cores=None, track_grad_norm=-1, train_file='data/train.tsv', truncated_bptt_steps=None, val_check_interval=1.0, warmup_ratio=0.1, weights_save_path=None, weights_summary='top')
using cached model
using cached model
using cached model
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
INFO:root:number of workers 4, data length 34242
INFO:root:num_train_steps : 107006
INFO:root:num_warmup_steps : 10700
2021-11-05 10:27:55.060417: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'cudart64_101.dll'; dlerror: cudart64_101.dll not found
2021-11-05 10:27:55.069132: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
| Name | Type | Params
-------------------------------------------------------
0 | model | BartForConditionalGeneration | 123 M
-------------------------------------------------------
123 M Trainable params
0 Non-trainable params
123 M Total params
495.440 Total estimated model params size (MB)
Validation sanity check: 0%| | 0/2 [00:00<?, ?it/s]Traceback (most recent call last):
File "train.py", line 233, in <module>
trainer.fit(model, dm)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 460, in fit
self._run(model)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 758, in _run
self.dispatch()
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 799, in dispatch
self.accelerator.start_training(self)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\accelerators\accelerator.py", line 96, in start_training
self.training_type_plugin.start_training(trainer)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 144, in start_training
self._results = trainer.run_stage()
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 809, in run_stage
return self.run_train()
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 844, in run_train
self.run_sanity_check(self.lightning_module)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 1112, in run_sanity_check
self.run_evaluation()
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\trainer.py", line 967, in run_evaluation
output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\trainer\evaluation_loop.py", line 174, in evaluation_step
output = self.trainer.accelerator.validation_step(args)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\accelerators\accelerator.py", line 226, in validation_step
return self.training_type_plugin.validation_step(*args)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 161, in validation_step
return self.lightning_module.validation_step(*args, **kwargs)
File "train.py", line 195, in validation_step
outs = self(batch)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "train.py", line 185, in forward
labels=inputs['labels'], return_dict=True)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\transformers\models\bart\modeling_bart.py", line 1295, in forward
return_dict=return_dict,
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\transformers\models\bart\modeling_bart.py", line 1157, in forward
return_dict=return_dict,
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\transformers\models\bart\modeling_bart.py", line 748, in forward
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\modules\sparse.py", line 126, in forward
self.norm_type, self.scale_grad_by_freq, self.sparse)
File "C:\Users\Newrun\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\nn\functional.py", line 1852, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got torch.IntTensor instead (while checking arguments for embedding)
정상적으로 작동하게 하기 위해서는 어떻게 해야할까요??ㅜㅜ
감사합니다