❓ Questions and Help
Hi,
While I am trying the training code with m4c model, I am getting the following error,
2021-03-11T03:34:15 | mmf.utils.general: Total Parameters: 90850184. Trained Parameters: 90850184
2021-03-11T03:34:15 | mmf.trainers.core.training_loop: Starting training...
Traceback (most recent call last):
File "C:\Users\kvman\anaconda3\envs\mmf\Scripts\mmf_run-script.py", line 33, in
sys.exit(load_entry_point('mmf', 'console_scripts', 'mmf_run')())
File "d:\project\new folder\mmf\mmf_cli\run.py", line 133, in run
main(configuration, predict=predict)
File "d:\project\new folder\mmf\mmf_cli\run.py", line 56, in main
trainer.train()
File "d:\project\new folder\mmf\mmf\trainers\mmf_trainer.py", line 132, in train
self.training_loop()
File "d:\project\new folder\mmf\mmf\trainers\core\training_loop.py", line 31, in training_loop
self.run_training_epoch()
File "d:\project\new folder\mmf\mmf\trainers\core\training_loop.py", line 74, in run_training_epoch
for idx, batch in enumerate(self.train_loader):
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data\dataloader.py", line 363, in next
data = self._next_data()
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data\dataloader.py", line 989, in _next_data
return self._process_data(data)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data\dataloader.py", line 1014, in _process_data
data.reraise()
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch_utils.py", line 395, in reraise
raise self.exc_type(msg)
KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "d:\project\new folder\mmf\mmf\datasets\databases\readers\feature_readers.py", line 231, in load
image_id = int(split.split("")[-1])
ValueError: invalid literal for int() with base 10: 'train\7f14a505b6edcbc5'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data_utils\worker.py", line 185, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data_utils\fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data_utils\fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data\dataset.py", line 207, in getitem
return self.datasets[dataset_idx][sample_idx]
File "d:\project\new folder\mmf\mmf\datasets\builders\textvqa\dataset.py", line 100, in getitem
features = self.features_db[idx]
File "d:\project\new folder\mmf\mmf\datasets\databases\features_database.py", line 91, in getitem
return self.get(image_info)
File "d:\project\new folder\mmf\mmf\datasets\databases\features_database.py", line 99, in get
return self.from_path(feature_path)
File "d:\project\new folder\mmf\mmf\datasets\databases\features_database.py", line 107, in from_path
features, infos = self._get_image_features_and_info(path)
File "d:\project\new folder\mmf\mmf\datasets\databases\features_database.py", line 80, in _get_image_features_and_info
image_feats, infos = self._read_features_and_info(feat_file)
File "d:\project\new folder\mmf\mmf\datasets\databases\features_database.py", line 65, in _read_features_and_info
feature, info = feature_reader.read(feat_file)
File "d:\project\new folder\mmf\mmf\datasets\databases\readers\feature_readers.py", line 95, in read
return self.feat_reader.read(image_feat_path)
File "d:\project\new folder\mmf\mmf\datasets\databases\readers\feature_readers.py", line 158, in read
image_info = self._load(image_feat_path)
File "d:\project\new folder\mmf\mmf\datasets\databases\readers\feature_readers.py", line 238, in _load
img_id_idx = self.image_id_indices[image_id]
KeyError: b'train\7f14a505b6edcbc5'
When I tried with model = "Lorra", I am getting the below error,
2021-03-11T03:27:37 | mmf.utils.general: Total Parameters: 192497485. Trained Parameters: 192497485
2021-03-11T03:27:37 | mmf.trainers.core.training_loop: Starting training...
Traceback (most recent call last):
File "C:\Users\kvman\anaconda3\envs\mmf\Scripts\mmf_run-script.py", line 33, in
sys.exit(load_entry_point('mmf', 'console_scripts', 'mmf_run')())
File "d:\project\new folder\mmf\mmf_cli\run.py", line 133, in run
main(configuration, predict=predict)
File "d:\project\new folder\mmf\mmf_cli\run.py", line 56, in main
trainer.train()
File "d:\project\new folder\mmf\mmf\trainers\mmf_trainer.py", line 132, in train
self.training_loop()
File "d:\project\new folder\mmf\mmf\trainers\core\training_loop.py", line 31, in training_loop
self.run_training_epoch()
File "d:\project\new folder\mmf\mmf\trainers\core\training_loop.py", line 74, in run_training_epoch
for idx, batch in enumerate(self.train_loader):
File "d:\project\new folder\mmf\mmf\datasets\multi_dataset_loader.py", line 213, in iter
return iter(self.loaders[0])
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data\dataloader.py", line 291, in iter
return _MultiProcessingDataLoaderIter(self)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\site-packages\torch\utils\data\dataloader.py", line 737, in init
w.start()
File "C:\Users\kvman\anaconda3\envs\mmf\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\multiprocessing\popen_spawn_win32.py", line 89, in init
reduction.dump(process_obj, to_child)
File "C:\Users\kvman\anaconda3\envs\mmf\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
BrokenPipeError: [Errno 32] Broken pipe
Kindly help me to resolve this issue.
needs more info triaged