Error passing CUDA tensor to nn.utils.rnn.pack_padded_sequence
See original GitHub issue- PyTorch-Forecasting version: v0.5.3
- PyTorch version: 1.7.0
- Python version: 3.7.9
- Operating System: Ubuntu 20.04.1 LTS
Expected behavior
I executed codes to find optimal learning rate or to fit network and and expected to get result as written in pytorch-forecasting.readthedocs.io. The only difference was gpus=1 in pl.Trainer parameter.
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
gpus=1,
# clipping gradients is a hyperparameter and important to prevent divergance
# of the gradient for recurrent neural networks
gradient_clip_val=0.1,
)
tft = TemporalFusionTransformer.from_dataset(
training,
# not meaningful for finding the learning rate but otherwise very important
learning_rate=0.03,
hidden_size=16, # most important hyperparameter apart from learning rate
# number of attention heads. Set to up to 4 for large datasets
attention_head_size=1,
dropout=0.1, # between 0.1 and 0.3 are good values
hidden_continuous_size=8, # set to <= hidden_size
output_size=7, # 7 quantiles by default
loss=QuantileLoss(),
# reduce learning rate if no improvement in validation loss after x epochs
reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
# find optimal learning rate
res = trainer.tuner.lr_find(
tft,
train_dataloader=train_dataloader,
val_dataloaders=val_dataloader,
max_lr=10.0,
min_lr=1e-6,
)
print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()
Actual behavior
However, it gives RuntimeError like below:
RuntimeError Traceback (most recent call last)
<ipython-input-11-a92b5627800b> in <module>
5 val_dataloaders=val_dataloader,
6 max_lr=10.0,
----> 7 min_lr=1e-6,
8 )
9
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/tuner/tuning.py in lr_find(self, model, train_dataloader, val_dataloaders, min_lr, max_lr, num_training, mode, early_stop_threshold, datamodule)
128 mode,
129 early_stop_threshold,
--> 130 datamodule,
131 )
132
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/tuner/lr_finder.py in lr_find(trainer, model, train_dataloader, val_dataloaders, min_lr, max_lr, num_training, mode, early_stop_threshold, datamodule)
173 train_dataloader=train_dataloader,
174 val_dataloaders=val_dataloaders,
--> 175 datamodule=datamodule)
176
177 # Prompt if we stopped early
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
437 self.call_hook('on_fit_start')
438
--> 439 results = self.accelerator_backend.train()
440 self.accelerator_backend.teardown()
441
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/accelerators/gpu_accelerator.py in train(self)
52
53 # train or test
---> 54 results = self.train_or_test()
55 return results
56
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in train_or_test(self)
64 results = self.trainer.run_test()
65 else:
---> 66 results = self.trainer.train()
67 return results
68
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in train(self)
459
460 def train(self):
--> 461 self.run_sanity_check(self.get_model())
462
463 # enable train mode
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
645
646 # run eval step
--> 647 _, eval_results = self.run_evaluation(test_mode=False, max_batches=self.num_sanity_val_batches)
648
649 # allow no returns from eval
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, test_mode, max_batches)
565
566 # lightning module methods
--> 567 output = self.evaluation_loop.evaluation_step(test_mode, batch, batch_idx, dataloader_idx)
568 output = self.evaluation_loop.evaluation_step_end(output)
569
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, test_mode, batch, batch_idx, dataloader_idx)
169 output = self.trainer.accelerator_backend.test_step(args)
170 else:
--> 171 output = self.trainer.accelerator_backend.validation_step(args)
172
173 # track batch size for weighted average
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/accelerators/gpu_accelerator.py in validation_step(self, args)
76 output = self.__validation_step(args)
77 else:
---> 78 output = self.__validation_step(args)
79
80 return output
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_lightning/accelerators/gpu_accelerator.py in __validation_step(self, args)
84 batch = self.to_device(batch)
85 args[0] = batch
---> 86 output = self.trainer.model.validation_step(*args)
87 return output
88
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_forecasting/models/base_model.py in validation_step(self, batch, batch_idx)
138 def validation_step(self, batch, batch_idx):
139 x, y = batch
--> 140 log, _ = self.step(x, y, batch_idx, label="val") # log loss
141 self.log("val_loss", log["loss"], on_step=False, on_epoch=True, prog_bar=True)
142 return log
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py in step(self, x, y, batch_idx, label)
566 """
567 # extract data and run model
--> 568 log, out = super().step(x, y, batch_idx, label=label)
569 # calculate interpretations etc for latter logging
570 if self.log_interval(label == "train") > 0:
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_forecasting/models/base_model.py in step(self, x, y, batch_idx, label)
194 loss = loss * (1 + monotinicity_loss)
195 else:
--> 196 out = self(x)
197 out["prediction"] = self.transform_output(out)
198
~/repo/emart-promo/env/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/repo/emart-promo/env/lib/python3.7/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py in forward(self, x)
489 encoder_output, (hidden, cell) = self.lstm_encoder(
490 rnn.pack_padded_sequence(
--> 491 embeddings_varying_encoder, lstm_encoder_lengths, enforce_sorted=False, batch_first=True
492 ),
493 (input_hidden, input_cell),
~/repo/emart-promo/env/lib/python3.7/site-packages/torch/nn/utils/rnn.py in pack_padded_sequence(input, lengths, batch_first, enforce_sorted)
242
243 data, batch_sizes = \
--> 244 _VF._pack_padded_sequence(input, lengths, batch_first)
245 return _packed_sequence_init(data, batch_sizes, sorted_indices, None)
246
RuntimeError: 'lengths' argument should be a 1D CPU int64 tensor, but got 1D cuda:0 Long tensor
Seems related to these issues:
Issue Analytics
- State:
- Created 3 years ago
- Comments:6 (2 by maintainers)
Top Results From Across the Web
Error with Lengths in pack_padded_sequence - PyTorch Forums
Hi, I am trying to set up an RNN capable of utilizing a GPU but packed_padded_sequence gives me a RuntimeError: 'lengths' argument should...
Read more >python - Pytorch with CUDA throws RuntimeError when using ...
If I understand well, pack_padded_sequence need the tensor to be on CPU rather than GPU. Unfortunately the pack_padded_sequence is called by my ...
Read more >torch.nn.utils.rnn
Batch sizes represent the number elements at each sequence step in the batch, not the varying sequence lengths passed to :func:`pack_padded_sequence`.
Read more >PyTorch Release v1.2.0 | Exxact Blog
Sparse Tensors: in-place shape modifications of Dense Tensor Constructor Arguments will no longer modify the Sparse Tensor itself (20614).
Read more >module 2 - | notebook.community
print("CUDA Version: ") print(torch.version.cuda) print("cuDNN version is: ... Tensor(), we are unable to pass a dtype to the constructor.
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Good point. Thanks for reporting this! The behaviour might have changed with PyTorch 1.7 again. Currently there are no tests on GPU (and it used to work a couple of weeks ago) but I am planning some soonish.
Missed this one. Fixed in #169