Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

PyTorch Lightning adapter fails to log hyperparams when a git repo is initialized

See original GitHub issue

🐛 Bug

Starting from 3.6, Pytorch Lightning adapter fails to log hyper-parameters when a git repo is initialized

To reproduce

1 - git init 2 - Run any PL script with Aim logging hyperparameters.

For example

import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from aim.pytorch_lightning import AimLogger
from aim import Run, Figure
from typing import Optional, Any
from pytorch_lightning.callbacks import RichProgressBar, ModelCheckpoint

class MNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = "./"):
        super().__init__()
        self.data_dir = data_dir
        self.transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

        # Setting default dims here because we know them.
        # Could optionally be assigned dynamically in dm.setup()
        self.dims = (1, 28, 28)

    def prepare_data(self):
        # download
        MNIST(self.data_dir, train=True, download=True)
        MNIST(self.data_dir, train=False, download=True)

    def setup(self, stage: Optional[str] = None):

        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

            # Optionally...
            # self.dims = tuple(self.mnist_train[0][0].shape)

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)

            # Optionally...
            # self.dims = tuple(self.mnist_test[0][0].shape)

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=32, num_workers = 32)

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=32, num_workers = 32)

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=32, num_workers = 32)

class LitAutoEncoder(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))
        self.decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))

    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        embedding = self.encoder(x)
        return embedding

    def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        

        self.log("val_loss", loss)
        return {"val_loss" : loss}
    
    def test_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        # Logging to TensorBoard by default
        self.log("test_loss", loss)
        return {"test_loss" : loss}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer



datamodule = MNISTDataModule(os.getcwd() + "/mnist_data/")

# init model
autoencoder = LitAutoEncoder()

aim_logger = AimLogger(experiment="test")
aim_logger.log_hyperparams({
        'learning_rate': 0.001,
        'batch_size': 64
})

ckpt =  ModelCheckpoint(monitor = 'val_loss', dirpath = "checkpoint/", filename = "mycheck")
ckpt.test_score = 26.51
trainer = pl.Trainer(callbacks = [RichProgressBar()], logger = aim_logger,
            check_val_every_n_epoch = 1, gpus = 1, max_epochs = 2)

trainer.fit(autoencoder, datamodule)

Error when calling `get_git_info`

File "/home/rdeffaye/workspace/playground/pl_test.py", line 118, in <module>
    aim_logger.log_hyperparams({
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/utilities/distributed.py", line 50, in wrapped_fn
    return fn(*args, **kwargs)
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/sdk/adapters/pytorch_lightning.py", line 74, in log_hyperparams
    self.experiment.set(('hparams', key), value, strict=False)
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/loggers/base.py", line 43, in experiment
    return get_experiment() or DummyExperiment()
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/utilities/distributed.py", line 50, in wrapped_fn
    return fn(*args, **kwargs)
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/pytorch_lightning/loggers/base.py", line 41, in get_experiment
    return fn(self)
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/sdk/adapters/pytorch_lightning.py", line 51, in experiment
    self._run = Run(
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/sdk/run.py", line 287, in __init__
    'git_info': get_git_info(),
  File "/home/rdeffaye/.rl4rec39/lib/python3.9/site-packages/aim/ext/utils.py", line 41, in get_git_info
    commit_hash, commit_timestamp, commit_author = results[1].split('/')
ValueError: not enough values to unpack (expected 3, got 1)