PyTorch Lightning adapter fails to log hyperparams when a git repo is initialized
See original GitHub issue🐛 Bug
Starting from 3.6, Pytorch Lightning adapter fails to log hyper-parameters when a git repo is initialized
To reproduce
1 - git init
2 - Run any PL script with Aim logging hyperparameters.
For example
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from aim.pytorch_lightning import AimLogger
from aim import Run, Figure
from typing import Optional, Any
from pytorch_lightning.callbacks import RichProgressBar, ModelCheckpoint
class MNISTDataModule(pl.LightningDataModule):
def __init__(self, data_dir: str = "./"):
super().__init__()
self.data_dir = data_dir
self.transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
# Setting default dims here because we know them.
# Could optionally be assigned dynamically in dm.setup()
self.dims = (1, 28, 28)
def prepare_data(self):
# download
MNIST(self.data_dir, train=True, download=True)
MNIST(self.data_dir, train=False, download=True)
def setup(self, stage: Optional[str] = None):
# Assign train/val datasets for use in dataloaders
if stage == "fit" or stage is None:
mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])
# Optionally...
# self.dims = tuple(self.mnist_train[0][0].shape)
# Assign test dataset for use in dataloader(s)
if stage == "test" or stage is None:
self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)
# Optionally...
# self.dims = tuple(self.mnist_test[0][0].shape)
def train_dataloader(self):
return DataLoader(self.mnist_train, batch_size=32, num_workers = 32)
def val_dataloader(self):
return DataLoader(self.mnist_val, batch_size=32, num_workers = 32)
def test_dataloader(self):
return DataLoader(self.mnist_test, batch_size=32, num_workers = 32)
class LitAutoEncoder(pl.LightningModule):
def __init__(self):
super().__init__()
self.encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))
self.decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))
def forward(self, x):
# in lightning, forward defines the prediction/inference actions
embedding = self.encoder(x)
return embedding
def training_step(self, batch, batch_idx):
# training_step defined the train loop.
# It is independent of forward
x, y = batch
x = x.view(x.size(0), -1)
z = self.encoder(x)
x_hat = self.decoder(z)
loss = F.mse_loss(x_hat, x)
# Logging to TensorBoard by default
self.log("train_loss", loss)
return loss
def validation_step(self, batch, batch_idx):
# training_step defined the train loop.
# It is independent of forward
x, y = batch
x = x.view(x.size(0), -1)
z = self.encoder(x)
x_hat = self.decoder(z)
loss = F.mse_loss(x_hat, x)
self.log("val_loss", loss)
return {"val_loss" : loss}
def test_step(self, batch, batch_idx):
# training_step defined the train loop.
# It is independent of forward
x, y = batch
x = x.view(x.size(0), -1)
z = self.encoder(x)
x_hat = self.decoder(z)
loss = F.mse_loss(x_hat, x)
# Logging to TensorBoard by default
self.log("test_loss", loss)
return {"test_loss" : loss}
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
datamodule = MNISTDataModule(os.getcwd() + "/mnist_data/")
# init model
autoencoder = LitAutoEncoder()
aim_logger = AimLogger(experiment="test")
aim_logger.log_hyperparams({
'learning_rate': 0.001,
'batch_size': 64
})
ckpt = ModelCheckpoint(monitor = 'val_loss', dirpath = "checkpoint/", filename = "mycheck")
ckpt.test_score = 26.51
trainer = pl.Trainer(callbacks = [RichProgressBar()], logger = aim_logger,
check_val_every_n_epoch = 1, gpus = 1, max_epochs = 2)
trainer.fit(autoencoder, datamodule)
Error when calling `get_git_info`
File "/home/rdeffaye/workspace/playground/pl_test.py", line 118, in <module>
aim_logger.log_hyperparams({
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/utilities/distributed.py", line 50, in wrapped_fn
return fn(*args, **kwargs)
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/sdk/adapters/pytorch_lightning.py", line 74, in log_hyperparams
self.experiment.set(('hparams', key), value, strict=False)
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/loggers/base.py", line 43, in experiment
return get_experiment() or DummyExperiment()
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/utilities/distributed.py", line 50, in wrapped_fn
return fn(*args, **kwargs)
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/loggers/base.py", line 41, in get_experiment
return fn(self)
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/sdk/adapters/pytorch_lightning.py", line 51, in experiment
self._run = Run(
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/sdk/run.py", line 287, in __init__
'git_info': get_git_info(),
File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/ext/utils.py", line 41, in get_git_info
commit_hash, commit_timestamp, commit_author = results[1].split('/')
ValueError: not enough values to unpack (expected 3, got 1)
Environment
- Aim Version >= 3.6
- Python version 3.9
- pip version 22.0.4
- OS : RedHat 4.8.5
- git version 2.26
Issue Analytics
- State:
- Created 2 years ago
- Comments:6 (5 by maintainers)
Top Results From Across the Web
Logging tensors as hparam fails · Issue #9022 · Lightning-AI ...
Bug When a trainer starts fit() and logging hyper parameters, an error occurs if one of hyperparameters is a Tensor.
Read more >Integration guides — Aim 3.15.2 documentation - Read the Docs
Integration guides . Aim integrates seamlessly with your favorite ML frameworks - Pytorch Ignite, Pytorch Lightning, Hugging Face and others.
Read more >How to save hparams when not provided as argument ...
In Lightning, we define the hyperparameters as follows: Hyperparameter: the set of arguments in the LightningModule's init method. This means ...
Read more >Using PyTorch Lightning with Tune — Ray 1.11.0
Let's first start with the basic PyTorch Lightning implementation of an MNIST classifier. This classifier does not include any tuning code at this...
Read more >Modify a PyTorch Lightning Script - Amazon SageMaker
Learn how to modify a PyTorch Lightning training script to adapt the SageMaker distributed data parallel library.
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
Hey @RomDeffayet ! Thanks for reporting this. @devfox-se please look into this.
Closing due to inactivity, feel free to reopen in case this still persists.