Help with SKI Kernel and Fixed Noise Likelihood
See original GitHub issueHello. I am playing around with some simple examples to build my familiarity with GPyTorch’s implementation of the SKI method. I have run into behavior that I’m not sure how to interpret or fix. Below is my code where I am comparing the results of exact vs SKI regression with an RBF kernel:
tl;dr: the SKI kernel seems to break with the FixedGaussianNoise likelihood (or I’m using it incorrectly).
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
import numpy as np
# Make plots inline
%matplotlib inline
class RBF(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super(RBF, self).__init__(train_x, train_y, likelihood)
# Subtracted mean already
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
class RBF_SKI(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super(RBF_SKI, self).__init__(train_x, train_y, likelihood)
grid_size = gpytorch.utils.grid.choose_grid_size(train_x, 1.0)
# Subtracted mean already
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.GridInterpolationKernel(
gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()),
grid_size=grid_size,
num_dims=1
)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
First testing with the RBF kernel:
noise_level = 1e-3
train_x = torch.tensor([1, 2, 3, 2.5], dtype=torch.float)
train_y = torch.tensor([3, 3.5, 2, 7], dtype=torch.float)
train_y_err = torch.ones(train_x.shape[0]) * noise_level
plt.errorbar(train_x.detach().numpy(), train_y.detach().numpy(), train_y_err.detach().numpy(), fmt='o')
plt.show()
likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=train_y_err, learn_additional_noise=False)
model = RBF(train_x, train_y, likelihood)
model.eval(), likelihood.eval()
with torch.no_grad(), gpytorch.settings.fast_pred_var():
test_x = torch.linspace(.5, 3.5, 51)
test_y_hat = model(test_x)
observed_pred = likelihood(test_y_hat, noise=torch.ones(test_x.shape[0]) * noise_level)
lower, upper = observed_pred.confidence_region()
log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
print("Loss: ", - log_likelihood(model(train_x), train_y).item())
plt.plot(test_x.detach().numpy(), observed_pred.mean.detach().numpy(), color="blue")
plt.scatter(train_x, train_y, color="k")
plt.fill_between(test_x, lower.detach().numpy(), upper.detach().numpy(), alpha=0.5, color="C0")
plt.show()
this gives the expected result for near noiseless observations: And then testing with the SKI kernel on top of the RBF kernel:
noise_level = 1e-3
train_x = torch.tensor([1, 2, 3, 2.5, 3.5], dtype=torch.float)
train_y = torch.tensor([3, 3.5, 2, 7, 4], dtype=torch.float)
train_y_err = torch.ones(train_x.shape[0]) * noise_level
plt.errorbar(train_x.detach().numpy(), train_y.detach().numpy(), train_y_err.detach().numpy(), fmt='o')
plt.show()
likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=train_y_err, learn_additional_noise=False)
model = RBF_SKI(train_x, train_y, likelihood)
model.eval(), likelihood.eval()
with torch.no_grad(), gpytorch.settings.fast_pred_var():
test_x = torch.linspace(.5, 3.5, 51)
test_y_hat = model(test_x)
observed_pred = likelihood(test_y_hat, noise=torch.ones(test_x.shape[0]) * noise_level)
lower, upper = observed_pred.confidence_region()
log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
print("Loss: ", - log_likelihood(model(train_x), train_y).item())
plt.plot(test_x.detach().numpy(), observed_pred.mean.detach().numpy(), color="blue")
plt.scatter(train_x, train_y, color="k")
plt.fill_between(test_x, lower.detach().numpy(), upper.detach().numpy(), alpha=0.5, color="C0")
plt.show()
Results in something completely off:
I thought that maybe since there is little data, then the grid may be sparse and the interpolation points inaccurate. So I added one more data point:
train_x = torch.tensor([1, 2, 3, 2.5, 3.5], dtype=torch.float)
train_y = torch.tensor([3, 3.5, 2, 7, 4], dtype=torch.float)
which results in the expected result for just the RBF kernel: and an error for the SKI kernel:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-19-4807188b9256> in <module>
21
22 log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
---> 23 print("Loss: ", - log_likelihood(model(train_x), train_y).item())
24
25 plt.plot(test_x.detach().numpy(), observed_pred.mean.detach().numpy(), color="blue")
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
20
21 def __call__(self, *inputs, **kwargs):
---> 22 outputs = self.forward(*inputs, **kwargs)
23 if isinstance(outputs, list):
24 return [_validate_module_outputs(output) for output in outputs]
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/mlls/exact_marginal_log_likelihood.py in forward(self, output, target, *params)
26 # Get the log prob of the marginal distribution
27 output = self.likelihood(output, *params)
---> 28 res = output.log_prob(target)
29
30 # Add terms for SGPR / when inducing points are learned
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/distributions/multivariate_normal.py in log_prob(self, value)
127
128 # Get log determininat and first part of quadratic form
--> 129 inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=diff.unsqueeze(-1), logdet=True)
130
131 res = -0.5 * sum([inv_quad, logdet, diff.size(-1) * math.log(2 * math.pi)])
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in inv_quad_logdet(self, inv_quad_rhs, logdet, reduce_inv_quad)
990 from .chol_lazy_tensor import CholLazyTensor
991
--> 992 cholesky = CholLazyTensor(self.cholesky())
993 return cholesky.inv_quad_logdet(inv_quad_rhs=inv_quad_rhs, logdet=logdet, reduce_inv_quad=reduce_inv_quad)
994
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in cholesky(self, upper)
716 (LazyTensor) Cholesky factor (lower triangular)
717 """
--> 718 res = self._cholesky()
719 if upper:
720 res = res.transpose(-1, -2)
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/utils/memoize.py in g(self, *args, **kwargs)
32 cache_name = name if name is not None else method
33 if not is_in_cache(self, cache_name):
---> 34 add_to_cache(self, cache_name, method(self, *args, **kwargs))
35 return get_from_cache(self, cache_name)
36
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in _cholesky(self)
401 evaluated_mat.register_hook(_ensure_symmetric_grad)
402
--> 403 cholesky = psd_safe_cholesky(evaluated_mat.double()).to(self.dtype)
404 return NonLazyTensor(cholesky)
405
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/utils/cholesky.py in psd_safe_cholesky(A, upper, out, jitter)
45 continue
46
---> 47 raise e
48
49
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/utils/cholesky.py in psd_safe_cholesky(A, upper, out, jitter)
19 """
20 try:
---> 21 L = torch.cholesky(A, upper=upper, out=out)
22 # TODO: Remove once fixed in pytorch (#16780)
23 if A.dim() > 2 and A.is_cuda:
RuntimeError: cholesky_cpu: U(1,1) is zero, singular U.
I then tried increasing the number of grid points,
grid_size = gpytorch.utils.grid.choose_grid_size(train_x, 2.0)
which results in the same, but slightly different, error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-29-4807188b9256> in <module>
21
22 log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
---> 23 print("Loss: ", - log_likelihood(model(train_x), train_y).item())
24
25 plt.plot(test_x.detach().numpy(), observed_pred.mean.detach().numpy(), color="blue")
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
20
21 def __call__(self, *inputs, **kwargs):
---> 22 outputs = self.forward(*inputs, **kwargs)
23 if isinstance(outputs, list):
24 return [_validate_module_outputs(output) for output in outputs]
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/mlls/exact_marginal_log_likelihood.py in forward(self, output, target, *params)
26 # Get the log prob of the marginal distribution
27 output = self.likelihood(output, *params)
---> 28 res = output.log_prob(target)
29
30 # Add terms for SGPR / when inducing points are learned
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/distributions/multivariate_normal.py in log_prob(self, value)
127
128 # Get log determininat and first part of quadratic form
--> 129 inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=diff.unsqueeze(-1), logdet=True)
130
131 res = -0.5 * sum([inv_quad, logdet, diff.size(-1) * math.log(2 * math.pi)])
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in inv_quad_logdet(self, inv_quad_rhs, logdet, reduce_inv_quad)
990 from .chol_lazy_tensor import CholLazyTensor
991
--> 992 cholesky = CholLazyTensor(self.cholesky())
993 return cholesky.inv_quad_logdet(inv_quad_rhs=inv_quad_rhs, logdet=logdet, reduce_inv_quad=reduce_inv_quad)
994
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in cholesky(self, upper)
716 (LazyTensor) Cholesky factor (lower triangular)
717 """
--> 718 res = self._cholesky()
719 if upper:
720 res = res.transpose(-1, -2)
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/utils/memoize.py in g(self, *args, **kwargs)
32 cache_name = name if name is not None else method
33 if not is_in_cache(self, cache_name):
---> 34 add_to_cache(self, cache_name, method(self, *args, **kwargs))
35 return get_from_cache(self, cache_name)
36
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in _cholesky(self)
401 evaluated_mat.register_hook(_ensure_symmetric_grad)
402
--> 403 cholesky = psd_safe_cholesky(evaluated_mat.double()).to(self.dtype)
404 return NonLazyTensor(cholesky)
405
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/utils/cholesky.py in psd_safe_cholesky(A, upper, out, jitter)
45 continue
46
---> 47 raise e
48
49
~/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/utils/cholesky.py in psd_safe_cholesky(A, upper, out, jitter)
19 """
20 try:
---> 21 L = torch.cholesky(A, upper=upper, out=out)
22 # TODO: Remove once fixed in pytorch (#16780)
23 if A.dim() > 2 and A.is_cuda:
RuntimeError: cholesky_cpu: U(3,3) is zero, singular U.
Finally, I changed my likelihood to stop fixing the noise model. I changed
likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=train_y_err, learn_additional_noise=False)
to
likelihood = gpytorch.likelihoods.GaussianLikelihood()
Which finally gave me semi-consistent results. For the RBF: and for the SKI RBF:
However, when I change back to the 4 data point example instead of the 5 data points, I get inconsistent results again. For the RBF: and for the SKI RBF:
Doubling the number of grid points seems to recover the expected behavior, so perhaps the “step function” inconsistency is due to the small data size.
However the more important thing is that it seems that using the SKI kernel breaks with fixed noise. I am wondering if anyone can help me understand what is happening in these tests, specifically how I can use a fixed noise model (this is required for the real data I am working with), and the expected behavior of the SKI method as data is added or removed.
Issue Analytics
- State:
- Created 4 years ago
- Comments:8 (4 by maintainers)
Top GitHub Comments
If this is really the case I propose we update the documentation here:
https://gpytorch.readthedocs.io/en/latest/examples/04_Scalable_GP_Regression_1D/KISSGP_Regression_1D.html
I imagine thats where the misconfiguration stems from. At least it was is my case.
Ah yes that’s a good point. I’ll make sure to fix the tutorials (probably once we push a new stable release, which should be soon).