Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

[Bug] forward() missing input during inference, with a hybrid GP-MLP model

See original GitHub issue

🐛 Bug

To reproduce

** Code snippet to reproduce **

I am training a multiple-input model, so-called hybrid, that should handle this hybrid model:

$y = f(X) + \eta(D) +\varepsilon$

where f() is some non-linear function of X (n X p), eta() is a GP(0, K) where K is a standard RBF kernel on 2D grid of locations D (n X 2), and varepsilon is a standard N(0, sig2e) noise. So y marginally is a GP(f(X), K):

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from sklearn.model_selection import train_test_split

import torch
import gpytorch
import tqdm

p = 10
N = 10000
sig2e = 1.0
lengthscale = 1
q = 1000
n_per_cat = 30

X = np.random.uniform(-1, 1, N * p).reshape((N, p))
betas = np.ones(p)
Xbeta = 1.0 + X @ betas
fX = Xbeta * np.cos(Xbeta) + 2 * X[:, 0] * X[:, 1]
e = np.random.normal(0, np.sqrt(sig2e), N)

coords = np.stack([np.random.uniform(-10, 10, q), np.random.uniform(-10, 10, q)], axis=1)
dist_matrix = squareform(pdist(coords)) ** 2
D = np.exp(-dist_matrix / (2 * lengthscale))
b = np.random.multivariate_normal(np.zeros(q), D, 1)[0]
fs = np.random.poisson(n_per_cat, q) + 1
fs_sum = fs.sum()
ps = fs/fs_sum
ns = np.random.multinomial(N, ps)
Z_idx = np.repeat(range(q), ns)
gZb = np.repeat(b, ns)

y = fX + gZb + e

x_cols = ['X' + str(i) for i in range(p)]
x_cols.extend(['D1', 'D2'])
df = pd.concat([pd.DataFrame(X), pd.DataFrame(coords[Z_idx])], axis=1)
df.columns = x_cols
df['y'] = y

X_train, X_test, y_train, y_test = train_test_split(df.drop('y', axis=1), df['y'], test_size=0.2)

So my model has two inputs: perform a standard MLP on the p = 10 X features input (X0, …, X9), a standard GP with a RBF kernel on the 2-D “locations” features input (D1, D2), sum those and train via marginal log-likelihood:

x_cols_mlp = X_train.columns[X_train.columns.str.startswith('X')]
x_cols_gp = X_train.columns[X_train.columns.str.startswith('D')]

train_x_mlp = torch.Tensor(X_train[x_cols_mlp].values)
train_x_gp = torch.Tensor(X_train[x_cols_gp].values)
train_y = torch.Tensor(y_train.values)
test_x_mlp = torch.Tensor(X_test[x_cols_mlp].values)
test_x_gp = torch.Tensor(X_test[x_cols_gp].values)
test_y = torch.Tensor(y_test.values)

if torch.cuda.is_available():
    train_x_mlp, train_x_gp, train_y, test_x_mlp, test_x_gp, test_y = train_x_mlp.cuda(), train_x_gp.cuda(), train_y.cuda(), test_x_mlp.cuda(), test_x_gp.cuda(), test_y.cuda()

class GPMLPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x_gp, train_y, likelihood, mlp):
        super(GPMLPModel, self).__init__(train_x_gp, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.GridInterpolationKernel(
            gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()),
            num_dims=2, grid_size=100
        )
        self.mlp = mlp

    def forward(self, x_gp, x_mlp):
        projected_x = self.mlp(x_mlp)
        mean_x = self.mean_module(x_gp) + projected_x.view(1, -1) #notice the mlp input and GP input are summed in the mean
        covar_x = self.covar_module(x_gp)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

class MLP(torch.nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.nn = torch.nn.Sequential(
            torch.nn.Linear(p, 100),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.25),
            torch.nn.Linear(100, 50),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.25),
            torch.nn.Linear(50, 25),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.25),
            torch.nn.Linear(25, 12),
            torch.nn.ReLU(),
            torch.nn.Linear(12, 1)
        )

    def forward(self, x):
        logits = self.nn(x)
        return logits

likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPMLPModel(train_x_gp, train_y, likelihood, MLP())

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

All goes well during training:

epochs = 100
model.train()
likelihood.train()

optimizer = torch.optim.Adam([
    {'params': model.mlp.parameters()},
    {'params': model.covar_module.base_kernel.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': likelihood.parameters()},
], lr=0.01)

mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

def train():
    iterator = tqdm.notebook.tqdm(range(epochs))
    train_loss = []
    for i in iterator:
        optimizer.zero_grad()
        output = model(train_x_gp, train_x_mlp) # notice the two inputs entering here
        loss = -mll(output, train_y)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        train_loss.append(loss.item())
        optimizer.step()
    return output, train_loss
        
%time train_out, train_loss = train()

I can see the MLL loss decreasing nicely:

plt.plot(np.arange(epochs), train_loss)
plt.show()

And I can see a reasonable fit on training data, a reasonable MSE, a reasonable likelihood noise:

pred_y_train = train_out.loc.view(-1,1)
plt.scatter(train_y, pred_y_train.detach().numpy())
plt.show()
print('Train MSE: {}'.format(torch.mean(torch.square(pred_y_train.flatten() - train_y))))
print(f'Actual noise value: {likelihood.noise}')

But when it comes to inference on the testing data…

model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
    out_test = model(test_x_gp, test_x_mlp)

See stack trace. Apparently GPytorch is going straight to the ExactGP constructor with only the first input, and forward() is missing the second input. If not a bug is there any way to circumvent this?

** Stack trace/error message **

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
[<ipython-input-27-08f4c5bb5b1e>](https://localhost:8080/#) in <module>()
      2 likelihood.eval()
      3 with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
----> 4     out_test = model(test_x_gp, test_x_mlp)

1 frames
[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_gp.py](https://localhost:8080/#) in __call__(self, *args, **kwargs)
    278             # Get the terms that only depend on training data
    279             if self.prediction_strategy is None:
--> 280                 train_output = super().__call__(*train_inputs, **kwargs)
    281 
    282                 # Create the prediction strategy for

[/usr/local/lib/python3.7/dist-packages/gpytorch/module.py](https://localhost:8080/#) in __call__(self, *inputs, **kwargs)
     28 
     29     def __call__(self, *inputs, **kwargs):
---> 30         outputs = self.forward(*inputs, **kwargs)
     31         if isinstance(outputs, list):
     32             return [_validate_module_outputs(output) for output in outputs]

TypeError: forward() missing 1 required positional argument: 'x_mlp'

Expected Behavior

The model() call should work seamlessly on standard testing data. If there’s something inherently wrong in the model - it’s probably best it shouldn’t train at all. But I’d expect any model which is good to compute on training set should be good to predict on testing set.

System information

Please complete the following information:

GPyTorch Version 1.6.0
PyTorch Version 1.10.0+cu111
Linux on Google Colab

Additional context

I just want to say how much I appreciate GPytorch and that I apologize if this isn’t in fact a bug but something wrong with my code.

Issue Analytics

State:
Created 2 years ago
Comments:10 (3 by maintainers)

Top GitHub Comments

2reactions

gsimchonicommented, Mar 28, 2022

Going back to this after some time:

To anyone interested the wrapper above actually WON’T work. Following advice from @wjmaddox and @gpleiss I got it to work without a wrapper, see below. It can also work for ApproximateGP with stochastic gradient, it’s just a little more complicated. So closing this and thank you. Obviously not a bug.

The model:

class DKLModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, mlp):
        super(DKLModel, self).__init__(train_x, train_y, likelihood,)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.GridInterpolationKernel(
            gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()),
            num_dims=2, grid_size=100
        )
        self.mlp = mlp
    
    def forward(self, x_gp, x_mlp):
        projected_x = self.mlp(x_mlp)
        mean_x = self.mean_module(x_gp) + projected_x.flatten()
        covar_x = self.covar_module(x_gp)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

Instantiation (notice the difference):

model = DKLModel((train_x_gp, train_x_mlp), train_y, likelihood, MLP())

During training:

output = model(train_x_gp, train_x_mlp)

0reactions

gsimchonicommented, Feb 17, 2022

Awkward.

Anyway, now getting during prediction:

[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_gp.py](https://localhost:8080/#) in __call__(self, *args, **kwargs)
    317             # Make the prediction
    318             with settings._use_eval_tolerance():
--> 319                 predictive_mean, predictive_covar = self.prediction_strategy.exact_prediction(full_mean, full_covar)
    320 
    321             # Reshape predictive mean to match the appropriate event shape

[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_prediction_strategies.py](https://localhost:8080/#) in exact_prediction(self, joint_mean, joint_covar)
    606         return (
    607             self.exact_predictive_mean(test_mean, test_train_covar),
--> 608             self.exact_predictive_covar(test_test_covar, test_train_covar),
    609         )
    610 

[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_prediction_strategies.py](https://localhost:8080/#) in exact_predictive_covar(self, test_test_covar, test_train_covar)
    635             return res
    636         else:
--> 637             precomputed_cache = self.covar_cache
    638             fps = settings.fast_pred_samples.on()
    639             if (fps and precomputed_cache[0] is None) or (not fps and precomputed_cache[1] is None):

[/usr/local/lib/python3.7/dist-packages/gpytorch/utils/memoize.py](https://localhost:8080/#) in g(self, *args, **kwargs)
     57         kwargs_pkl = pickle.dumps(kwargs)
     58         if not _is_in_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl):
---> 59             return _add_to_cache(self, cache_name, method(self, *args, **kwargs), *args, kwargs_pkl=kwargs_pkl)
     60         return _get_from_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl)
     61 

[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_prediction_strategies.py](https://localhost:8080/#) in covar_cache(self)
    575         # Get inverse root
    576         train_train_covar_inv_root = train_train_covar_plus_noise.root_inv_decomposition(
--> 577             initial_vectors=probe_vectors, test_vectors=test_vectors
    578         ).root
    579         train_train_covar_inv_root = train_train_covar_inv_root.evaluate()

[/usr/local/lib/python3.7/dist-packages/gpytorch/utils/memoize.py](https://localhost:8080/#) in g(self, *args, **kwargs)
     57         kwargs_pkl = pickle.dumps(kwargs)
     58         if not _is_in_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl):
---> 59             return _add_to_cache(self, cache_name, method(self, *args, **kwargs), *args, kwargs_pkl=kwargs_pkl)
     60         return _get_from_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl)
     61 

[/usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_tensor.py](https://localhost:8080/#) in root_inv_decomposition(self, initial_vectors, test_vectors, method)
   1754                     raise RuntimeError(
   1755                         "LazyTensor (size={}) and initial_vectors (size={}) should have the same number "
-> 1756                         "of dimensions.".format(self.shape, initial_vectors.shape)
   1757                     )
   1758                 elif self.batch_shape != initial_vectors.shape[:-2] or self.shape[-1] != initial_vectors.shape[-2]:

RuntimeError: LazyTensor (size=torch.Size([1, 8000, 8000])) and initial_vectors (size=torch.Size([8000, 1])) should have the same number of dimensions.

If I specifically write likelihood(model(test_x_gp, x_mlp=test_x_mlp)) I get:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
[<ipython-input-34-69b9a720ba8a>](https://localhost:8080/#) in <module>()
      2 likelihood.eval()
      3 with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
----> 4     out_test = likelihood(model(test_x_gp, x_mlp=test_x_mlp))

2 frames
[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_gp.py](https://localhost:8080/#) in __call__(self, *args, **kwargs)
    303 
    304             # Get the joint distribution for training/test data
--> 305             full_output = super(ExactGP, self).__call__(*full_inputs, **kwargs)
    306             if settings.debug().on():
    307                 if not isinstance(full_output, MultivariateNormal):

[/usr/local/lib/python3.7/dist-packages/gpytorch/module.py](https://localhost:8080/#) in __call__(self, *inputs, **kwargs)
     28 
     29     def __call__(self, *inputs, **kwargs):
---> 30         outputs = self.forward(*inputs, **kwargs)
     31         if isinstance(outputs, list):
     32             return [_validate_module_outputs(output) for output in outputs]

[<ipython-input-15-79e6ace07566>](https://localhost:8080/#) in forward(self, x_gp, x_mlp)
     11     def forward(self, x_gp, x_mlp):
     12         projected_x = self.mlp(x_mlp)
---> 13         mean_x = self.mean_module(x_gp) + projected_x.view(1, -1)
     14         covar_x = self.covar_module(x_gp)
     15         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

RuntimeError: The size of tensor a (10000) must match the size of tensor b (2000) at non-singleton dimension 1

If I further write likelihood(model(x_gp=test_x_gp, x_mlp=test_x_mlp)) I get during prediction:

/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_gp.py:275: GPInputWarning: The input matches the stored training data. Did you forget to call model.train()?
  GPInputWarning,
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
[<ipython-input-35-2cec92cd44ca>](https://localhost:8080/#) in <module>()
      2 likelihood.eval()
      3 with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
----> 4     out_test = likelihood(model(x_gp=test_x_gp, x_mlp=test_x_mlp))

6 frames
[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_gp.py](https://localhost:8080/#) in __call__(self, *args, **kwargs)
    317             # Make the prediction
    318             with settings._use_eval_tolerance():
--> 319                 predictive_mean, predictive_covar = self.prediction_strategy.exact_prediction(full_mean, full_covar)
    320 
    321             # Reshape predictive mean to match the appropriate event shape

[/usr/local/lib/python3.7/dist-packages/gpytorch/models/exact_prediction_strategies.py](https://localhost:8080/#) in exact_prediction(self, joint_mean, joint_covar)
    601         # Find the components of the distribution that contain test data
    602         test_mean = joint_mean[..., self.num_train :]
--> 603         test_test_covar = joint_covar[..., self.num_train :, self.num_train :].evaluate_kernel()
    604         test_train_covar = joint_covar[..., self.num_train :, : self.num_train].evaluate_kernel()
    605 

[/usr/local/lib/python3.7/dist-packages/gpytorch/utils/memoize.py](https://localhost:8080/#) in g(self, *args, **kwargs)
     57         kwargs_pkl = pickle.dumps(kwargs)
     58         if not _is_in_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl):
---> 59             return _add_to_cache(self, cache_name, method(self, *args, **kwargs), *args, kwargs_pkl=kwargs_pkl)
     60         return _get_from_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl)
     61 

[/usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py](https://localhost:8080/#) in evaluate_kernel(self)
    335                 diag=False,
    336                 last_dim_is_batch=self.last_dim_is_batch,
--> 337                 **self.params,
    338             )
    339             self.kernel.active_dims = temp_active_dims

[/usr/local/lib/python3.7/dist-packages/gpytorch/kernels/kernel.py](https://localhost:8080/#) in __call__(self, x1, x2, diag, last_dim_is_batch, **params)
    400                 res = LazyEvaluatedKernelTensor(x1_, x2_, kernel=self, last_dim_is_batch=last_dim_is_batch, **params)
    401             else:
--> 402                 res = lazify(super(Kernel, self).__call__(x1_, x2_, last_dim_is_batch=last_dim_is_batch, **params))
    403             return res
    404 

[/usr/local/lib/python3.7/dist-packages/gpytorch/module.py](https://localhost:8080/#) in __call__(self, *inputs, **kwargs)
     28 
     29     def __call__(self, *inputs, **kwargs):
---> 30         outputs = self.forward(*inputs, **kwargs)
     31         if isinstance(outputs, list):
     32             return [_validate_module_outputs(output) for output in outputs]

[/usr/local/lib/python3.7/dist-packages/gpytorch/kernels/grid_interpolation_kernel.py](https://localhost:8080/#) in forward(self, x1, x2, diag, last_dim_is_batch, **params)
    153             else:
    154                 x = torch.cat([x1.reshape(-1, self.num_dims), x2.reshape(-1, self.num_dims)])
--> 155             x_maxs = x.max(0)[0].tolist()
    156             x_mins = x.min(0)[0].tolist()
    157 

IndexError: max(): Expected reduction dim 0 to have non-zero size.