Not positive definite covar while initializing variational distribution
See original GitHub issueWhen running SVGP or any other custom variational GP model, I encounter this error that says the covatiance matrix of the variational distribution while initialization is not positive definite. Unlike in #620, I have let the noise to be the default ones.
To reproduce
import torch
import gpytorch
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
from gpytorch.mlls.variational_elbo import VariationalELBO
from math import exp,pi
N = 100
M = 50
RND_SEED = 550
torch.manual_seed(RND_SEED)
train_x = torch.tensor([100.0000, 29.1457, 89.9497, 70.8543, 79.3970, 69.3467, 64.8241,
35.6784, 45.7286, 60.8040, 21.6080, 51.7588, 83.9196, 29.6482,
47.7387, 4.5226, 92.4623, 75.3769, 10.5528, 2.0101, 90.9548,
28.1407, 33.6683, 9.5477, 72.8643, 88.9447, 63.3166, 96.9849,
3.0151, 25.1256, 41.2060, 68.8442, 80.4020, 26.1307, 35.1759,
59.2965, 76.8844, 91.4573, 40.7035, 96.4824, 49.2462, 18.0905,
85.9296, 40.2010, 48.2412, 59.7990, 50.7538, 67.3367, 38.6935,
99.4975, 77.8895, 44.2211, 34.1709, 38.1910, 2.5126, 93.4673,
69.8492, 24.6231, 49.7487, 98.4925, 47.2362, 88.4422, 87.4372,
67.8392, 95.9799, 68.3417, 31.1558, 15.0754, 32.1608, 75.8794,
93.9698, 5.5276, 58.7940, 81.4070, 30.1508, 11.5578, 26.6332,
15.5779, 54.7739, 83.4171, 81.9095, 58.2915, 39.1960, 17.5879,
97.4874, 6.0302, 56.2814, 36.1809, 7.5377, 31.6583, 46.2312,
94.4724, 17.0854, 54.2714, 21.1055, 77.3869, 78.3920, 7.0352,
13.5678, 78.8945])
train_y = torch.tensor([-0.5250, 2.0168, -0.9815, 0.2925, 0.3771, 0.7594, 2.2385, -1.2605,
0.7819, 0.5837, 0.3003, 0.5934, 0.3341, 0.7612, -0.4951, 0.6401,
-0.7095, -1.3961, -1.1374, -1.7018, -1.6455, 0.3058, 0.5909, -0.1134,
-1.4203, -1.2134, 1.2836, -1.7483, -0.5434, 0.4256, -0.0286, -0.0166,
1.7111, 0.8741, -1.1740, 0.7600, -1.1979, -1.6259, -0.6183, -2.0811,
-0.6157, -0.6461, -0.8148, -0.9866, 0.3464, -0.1768, 0.9389, 0.4415,
-1.4143, -0.5450, -1.9407, 0.8691, 0.1064, -1.6399, -1.6331, -0.1329,
1.1218, 0.3822, 0.8884, -0.7801, -0.5491, 0.0157, 0.4199, -0.0407,
-1.2449, -0.2507, 0.8594, 0.7633, -0.1199, -2.3864, -0.4349, 0.3531,
-0.4498, 2.4584, 0.9470, -0.6287, 0.8534, 0.2332, -1.9791, 0.6449,
3.0610, 0.4349, -1.7569, -0.3121, -1.8278, 1.2081, -0.4701, -0.9048,
0.4833, 0.9413, -0.1512, -1.3624, -0.4590, -0.8729, -0.1536, -3.0318,
-1.6477, 0.8311, 0.0656, -0.4176])
class SVGPRegressionModel(AbstractVariationalGP):
def __init__(self, inducing_points):
variational_distribution = CholeskyVariationalDistribution(inducing_points.size(-1))
variational_strategy = VariationalStrategy(self,
inducing_points,
variational_distribution,
learn_inducing_locations=True)
super(SVGPRegressionModel, self).__init__(variational_strategy)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def forward(self,x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
return latent_pred
inducing_points = torch.tensor([17.0854, 47.7387, 59.7990, 93.4673, 39.1960, 95.9799, 89.9497, 47.2362,
2.0101, 93.9698, 34.1709, 24.6231, 97.4874, 77.8895, 56.2814, 45.7286,
49.2462, 85.9296, 46.2312, 60.8040, 30.1508, 54.2714, 17.5879, 94.4724,
33.6683, 35.1759, 32.1608, 59.2965, 48.2412, 36.1809, 29.6482, 58.7940,
18.0905, 7.0352, 99.4975, 88.9447, 96.4824, 28.1407, 83.4171, 92.4623,
26.6332, 38.6935, 38.1910, 40.7035, 64.8241, 69.8492, 2.5126, 13.5678,
67.8392, 63.3166])
model = SVGPRegressionModel(inducing_points)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
def train(model, train_x, train_y, train_steps):
model.set_train_data(train_x, train_y, strict=False)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model)
for i in range(train_steps):
optimizer.zero_grad()
output = model(train_x)
loss = -mll(output, train_y)
loss.backward()
optimizer.step()
model.eval()
inducing_points = train_x[:M]
model = SVGPRegressionModel(inducing_points)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model.train()
likelihood.train()
# Use the adam optimizer
optimizer = torch.optim.Adam([
{'params': model.parameters()},
{'params': likelihood.parameters()}
], lr=0.01)
mll = VariationalELBO(likelihood, model, train_y.size(0), combine_terms=False)
def train():
num_iter = 200
for i in range(num_iter):
optimizer.zero_grad()
output = model(train_x)
# Calc loss and backprop gradients
log_lik, kl_div, log_prior = mll(output, train_y)
loss = -(log_lik - kl_div + log_prior)
loss.backward()
if i % 50 == 0:
print('Iter %d - Loss: %.3f [%.3f, %.3f, %.3f]' % (i + 1, loss.item(), log_lik.item(), kl_div.item(), log_prior.item()))
optimizer.step()
train()
Stack trace/error message
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-1-e55cac5faeb1> in <module>
116 print('Iter %d - Loss: %.3f [%.3f, %.3f, %.3f]' % (i + 1, loss.item(), log_lik.item(), kl_div.item(), log_prior.item()))
117 optimizer.step()
--> 118 train()
<ipython-input-1-e55cac5faeb1> in train()
108 for i in range(num_iter):
109 optimizer.zero_grad()
--> 110 output = model(train_x)
111 # Calc loss and backprop gradients
112 log_lik, kl_div, log_prior = mll(output, train_y)
~\.conda\envs\antixk_full\lib\site-packages\gpytorch\models\abstract_variational_gp.py in __call__(self, inputs, **kwargs)
20 inputs = inputs.unsqueeze(-1)
21
---> 22 return self.variational_strategy(inputs)
~\.conda\envs\antixk_full\lib\site-packages\gpytorch\variational\variational_strategy.py in __call__(self, x)
179
180 def __call__(self, x):
--> 181 self.initialize_variational_dist()
182 if self.training:
183 if hasattr(self, "_memoize_cache"):
~\.conda\envs\antixk_full\lib\site-packages\gpytorch\variational\variational_strategy.py in initialize_variational_dist(self)
87 eval_prior_dist = torch.distributions.MultivariateNormal(
88 loc=prior_dist.mean,
---> 89 covariance_matrix=psd_safe_cholesky(prior_dist.covariance_matrix),
90 )
91 self.variational_distribution.initialize_variational_distribution(eval_prior_dist)
~\.conda\envs\antixk_full\lib\site-packages\torch\distributions\multivariate_normal.py in __init__(self, loc, covariance_matrix, precision_matrix, scale_tril, validate_args)
129 if precision_matrix is not None:
130 self.covariance_matrix = torch.inverse(precision_matrix).expand_as(loc_)
--> 131 self._unbroadcasted_scale_tril = torch.cholesky(self.covariance_matrix)
132
133 def expand(self, batch_shape, _instance=None):
RuntimeError: Lapack Error in potrf : the leading minor of order 22 is not positive definite at c:\a\w\1\s\tmp_conda_3.6_090826\conda\conda-bld\pytorch_1550394668685\work\aten\src\th\generic/THTensorLapack.cpp:658
System information
- GpyTorch version - ‘0.2.1’
- PyTorch version - ‘1.0.1’
Additional context
This seems to be dependent on the data and on the number of inducing points M
relative to the total number of training points N
. (In the above code, it works when M
<22)
But How I go about resolving this issue for any given real-world data?
Issue Analytics
- State:
- Created 4 years ago
- Comments:5 (4 by maintainers)
Top Results From Across the Web
Gaussian LDA - Modeling - The Stan Forums
Hi! I'm trying to use Stan to fit a Gaussian LDA model. The situation is that I have a list of genes, where...
Read more >Variational Strategies - GPyTorch's documentation
VariationalStrategy objects control how certain aspects of variational inference should be performed. In particular, they define two methods that get used ...
Read more >Variational Gaussian Process (VGP) — What To Do When ...
I will explain why a Bernoulli distribution prevents us from using the Bayes rule to compute the posterior. We call such posteriors intractable....
Read more >Gaussian variational approximation with a factor covariance ...
Gaussian variational approximation when the covariance matrix has a factor structure. By general here, we mean that the methods do not ...
Read more >Computing Positive definite covariance matrix in Variational ...
(∗)W−1k=W−10−mkβkm′k+μoβoμ′0+∑nqnkxnx′n. W−1k=W−10+all else=W−10−term1+term2+term3. For term1, plug in the defined value of mk:.
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
Just to echo what Geoff said, by far the most likely thing happening here is you are using the default lengthscales initialization but have features on the order of tens to hundreds, which means your prior kernel matrix is all ones. If you normalize your data (or initialize your lengthscales from the data), your problem will almost certainly go away.
Thanks for the super detailed error! However, I’m not able to reproduce the bug from your example. Could you try upgrading PyTorch?
How are you choosing the inducing points? I would recommend z-scoring your data (normalizing it so both X and Y have mean 0 and standard deviation 1). Additionally, for SVGP models, I have found that choosing the inducing points with
torch.randn
is usually the best bet. Doing this in my experience tends to be more numerically stable.