- PyTorch-Forecasting version: 0.8.3
- PyTorch version: 1.7.1
- Python version: 3.7.9
- Operating System: Linux
Expected behaviour
I am training a simple time series forecasting on temperature prediction problem. I have replicated the Stallion code for my dataset. I get an index out of bounds error as soon as training starts. I am unable to debug it.
Code to reproduce the problem
### Data Set and Data Loader
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = train_data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
train_data[lambda x: x.time_idx <= training_cutoff],
time_idx="time_idx",
target="meantemp",
group_ids= ["group"],
min_encoder_length=max_encoder_length // 2, # keep encoder length long (as it is in the validation set)
max_encoder_length=max_encoder_length,
min_prediction_length=1,
max_prediction_length=max_prediction_length,
time_varying_known_categoricals=["day", "month"],
time_varying_known_reals=["time_idx", "humidity","wind_speed"],
time_varying_unknown_reals=['meantemp'],
# target_normalizer=GroupNormalizer(
# groups=["group"], transformation="softplus"
# ), # use softplus and normalize by group
target_normalizer= EncoderNormalizer(transformation='softplus'), # use softplus and normalize by group
add_relative_time_idx=True,
allow_missings=True,
add_target_scales=True,
add_encoder_length=True,
)
validation = TimeSeriesDataSet.from_dataset(training, train_data, predict=True,
min_prediction_idx=training_cutoff + 1,
stop_randomization=False)
# create dataloaders for model
batch_size = 32 # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=4)
### Training code
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor() # log the learning rate
logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
trainer = pl.Trainer(
max_epochs=30,
gpus=0,
weights_summary="top",
gradient_clip_val=0.1,
limit_train_batches=30, # coment in for training, running valiation every 30 batches
# fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
callbacks=[lr_logger, early_stop_callback],
logger=logger,
)
tft = TemporalFusionTransformer.from_dataset(
training,
learning_rate=0.1,
hidden_size=16,
attention_head_size=1,
dropout=0.1,
hidden_continuous_size=8,
output_size=7, # 7 quantiles by default
loss=QuantileLoss(),
log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
# fit network
trainer.fit(
tft,
train_dataloader=train_dataloader,
val_dataloaders=val_dataloader,
)
The error comes on the fit function. See below:
RuntimeError Traceback (most recent call last)
in
3 tft,
4 train_dataloader=train_dataloader,
----> 5 val_dataloaders=val_dataloader,
6 )
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
508 self.call_hook('on_fit_start')
509
--> 510 results = self.accelerator_backend.train()
511 self.accelerator_backend.teardown()
512
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in train(self)
55 def train(self):
56 self.trainer.setup_trainer(self.trainer.model)
---> 57 return self.train_or_test()
58
59 def teardown(self):
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in train_or_test(self)
72 else:
73 self.trainer.train_loop.setup_training()
---> 74 results = self.trainer.train()
75 return results
76
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in train(self)
559 with self.profiler.profile("run_training_epoch"):
560 # run train epoch
--> 561 self.train_loop.run_training_epoch()
562
563 if self.max_steps and self.max_steps <= self.global_step:
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_epoch(self)
548 # ------------------------------------
549 with self.trainer.profiler.profile("run_training_batch"):
--> 550 batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
551
552 # when returning -1 from train_step, we end epoch early
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_batch(self, batch, batch_idx, dataloader_idx)
716
717 # optimizer step
--> 718 self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
719
720 else:
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
491 on_tpu=self.trainer.use_tpu and TPU_AVAILABLE,
492 using_native_amp=using_native_amp,
--> 493 using_lbfgs=is_lbfgs,
494 )
495
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py in optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, on_tpu, using_native_amp, using_lbfgs)
1296
1297 """
-> 1298 optimizer.step(closure=optimizer_closure)
1299
1300 def optimizer_zero_grad(
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py in step(self, closure, make_optimizer_step, *args, **kwargs)
284
285 if make_optimizer_step:
--> 286 self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
287 else:
288 # make sure to call optimizer_closure when accumulating
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py in __optimizer_step(self, closure, profiler_name, *args, **kwargs)
142 else:
143 with trainer.profiler.profile(profiler_name):
--> 144 optimizer.step(closure=closure, *args, **kwargs)
145
146 accelerator_backend = trainer.accelerator_backend
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/optim.py in step(self, closure)
129 closure: A closure that reevaluates the model and returns the loss.
130 """
--> 131 _ = closure()
132 loss = None
133 # note - below is commented out b/c I have other work that passes back
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in train_step_and_backward_closure()
711 opt_idx,
712 optimizer,
--> 713 self.trainer.hiddens
714 )
715 return None if result is None else result.loss
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
804 with self.trainer.profiler.profile("training_step_and_backward"):
805 # lightning module hook
--> 806 result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
807 self._curr_step_result = result
808
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in training_step(self, split_batch, batch_idx, opt_idx, hiddens)
317 model_ref._current_fx_name = 'training_step'
318 model_ref._results = Result()
--> 319 training_step_output = self.trainer.accelerator_backend.training_step(args)
320 self.trainer.logger_connector.cache_logged_metrics()
321
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py in training_step(self, args)
60
61 def training_step(self, args):
---> 62 return self._step(self.trainer.model.training_step, args)
63
64 def validation_step(self, args):
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py in _step(self, model_step, args)
56 output = model_step(*args)
57 else:
---> 58 output = model_step(*args)
59 return output
60
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/models/base_model.py in training_step(self, batch, batch_idx)
263 """
264 x, y = batch
--> 265 log, _ = self.step(x, y, batch_idx)
266 # log loss
267 self.log("train_loss", log["loss"], on_step=True, on_epoch=True, prog_bar=True)
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/init.py in step(self, x, y, batch_idx)
545 """
546 # extract data and run model
--> 547 log, out = super().step(x, y, batch_idx)
548 # calculate interpretations etc for latter logging
549 if self.log_interval > 0:
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/models/base_model.py in step(self, x, y, batch_idx, **kwargs)
378 self.log_metrics(x, y, out)
379 if self.log_interval > 0:
--> 380 self.log_prediction(x, out, batch_idx)
381 log = {"loss": loss, "n_samples": x["decoder_lengths"].size(0)}
382
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/models/base_model.py in log_prediction(self, x, out, batch_idx)
487 log_indices = [0]
488 for idx in log_indices:
--> 489 fig = self.plot_prediction(x, out, idx=idx, add_loss_to_title=True)
490 tag = f"{['Val', 'Train'][self.training]} prediction"
491 if self.training:
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/init.py in plot_prediction(self, x, out, idx, plot_attention, add_loss_to_title, show_future_observed, ax)
708 # add attention on secondary axis
709 if plot_attention:
--> 710 interpretation = self.interpret_output(out)
711 for f in to_list(fig):
712 ax = f.axes[0]
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/init.py in interpret_output(self, out, reduction, attention_prediction_horizon, attention_as_autocorrelation)
596
597 # histogram of decode and encode lengths
--> 598 encoder_length_histogram = integer_histogram(out["encoder_lengths"], min=0, max=self.hparams.max_encoder_length)
599 decoder_length_histogram = integer_histogram(
600 out["decoder_lengths"], min=1, max=out["decoder_variables"].size(1)
~/miniconda3/envs/torch/lib/python3.7/site-packages/pytorch_forecasting/utils.py in integer_histogram(data, min, max)
32 max = uniques.max()
33 hist = torch.zeros(max - min + 1, dtype=torch.long, device=data.device).scatter(
---> 34 dim=0, index=uniques - min, src=counts
35 )
36 return hist
RuntimeError: index 25 is out of bounds for dimension 0 with size 25
potential bug