Use GMMConv in object detection task, but the model doesn't converge as I think
See original GitHub issue🐛 Describe the bug
I try to use GMMConv in object detection task. The node feature is the proposal feature (each object’s feature). The sparse adjacency matrix is caculated as follow: first transform the proposal features to a latent space Z by two fully-connected layers with ReLU activations; then the adjacency matrix is caculated by matrix multiplication (cosine similarity) ; finally for each proposal feature, we only retain the top-32 relevant nodes as neighbourhood. A more detailed description can be found in the figure below:
To capture the pairwise spatial information between objects, we use a pairwise polar pseudo-coordinate function u(i, j).
Then we use GMMConv to caculate the spatial weight (wk(u(i,j)) in follow figure):
The output of above graph convolution (which can be seen as enhanced feats) is concatenated to the original proposal features to improve both classification and localization.
The overall process is as follows
So I change GMMConv code as follows (mainly edit the message function):
from distutils.log import debug
from typing import Union, Tuple
from torch_geometric.typing import OptPairTensor, Adj, OptTensor, Size
import torch
from torch import Tensor
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.dense.linear import Linear
from torch_geometric.nn.inits import zeros, glorot
import math
# GMMConv改编版,edge weight = spatial realtion * feature similarity
class SGRNGMMConvSumNorm(MessagePassing):
Args:
in_channels (int or tuple): Size of each input sample, or :obj:`-1` to
derive the size from the first input(s) to the forward method.
A tuple corresponds to the sizes of source and target
dimensionalities.
out_channels (int): Size of each output sample.
dim (int): Pseudo-coordinate dimensionality.
kernel_size (int): Number of kernels :math:`K`.
separate_gaussians (bool, optional): If set to :obj:`True`, will
learn separate GMMs for every pair of input and output channel,
inspired by traditional CNNs. (default: :obj:`False`)
aggr (string, optional): The aggregation operator to use
(:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`).
(default: :obj:`"mean"`)
root_weight (bool, optional): If set to :obj:`False`, the layer will
not add transformed root node features to the output.
(default: :obj:`True`)
bias (bool, optional): If set to :obj:`False`, the layer will not learn
an additive bias. (default: :obj:`True`)
**kwargs (optional): Additional arguments of
:class:`torch_geometric.nn.conv.MessagePassing`.
"""
def __init__(self, in_channels: Union[int, Tuple[int, int]],
out_channels: int, dim: int, kernel_size: int,
separate_gaussians: bool = False, aggr: str = 'mean',
root_weight: bool = True, bias: bool = True, **kwargs):
super(SGRNGMMConvSumNorm, self).__init__(aggr=aggr, flow="target_to_source", **kwargs)
self.in_channels = in_channels
self.out_channels = out_channels
self.dim = dim
self.kernel_size = kernel_size
self.separate_gaussians = separate_gaussians
self.root_weight = root_weight
if isinstance(in_channels, int):
in_channels = (in_channels, in_channels)
self.rel_in_channels = in_channels[0]
if in_channels[0] > 0:
# [2049, 512*25]
self.g = Parameter(
Tensor(in_channels[0], out_channels * kernel_size))
if not self.separate_gaussians:
self.mu = Parameter(Tensor(kernel_size, dim))
self.sigma = Parameter(Tensor(kernel_size, dim))
if self.separate_gaussians:
self.mu = Parameter(
Tensor(in_channels[0], out_channels, kernel_size, dim))
self.sigma = Parameter(
Tensor(in_channels[0], out_channels, kernel_size, dim))
else:
self.g = torch.nn.parameter.UninitializedParameter()
self.mu = torch.nn.parameter.UninitializedParameter()
self.sigma = torch.nn.parameter.UninitializedParameter()
self._hook = self.register_forward_pre_hook(
self.initialize_parameters)
# TODO: 这里可以提个bug,若self.root_weight=False,则self.root未定义
self.root = None
if root_weight:
self.root = Linear(in_channels[1], out_channels * kernel_size, bias=False,
weight_initializer='glorot')
if bias:
self.bias = Parameter(torch.Tensor(out_channels * kernel_size))
else:
self.register_parameter('bias', None)
self.edge_weight_norm = None
self.reset_parameters()
def reset_parameters(self):
if not isinstance(self.g, torch.nn.UninitializedParameter):
glorot(self.g)
glorot(self.mu)
glorot(self.sigma)
if self.root_weight:
self.root.reset_parameters()
zeros(self.bias)
def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj,
edge_attr: OptTensor = None, size: Size = None, batch_size=None, proposal_num=None, debug=False):
""""""
if isinstance(x, Tensor):
x: OptPairTensor = (x, x)
# propagate_type: (x: OptPairTensor, edge_attr: OptTensor)
if not self.separate_gaussians:
#之所以要做线性变换,一是为了变换维度,二是为了并行处理k个图,一次性进行k个卷积
out: OptPairTensor = (torch.matmul(x[0], self.g), x[1])
out = self.propagate(edge_index, x=out, edge_attr=edge_attr,
size=size, batch_size=batch_size, proposal_num=proposal_num, debug=debug)
if debug:
print("out: ")
print(out)
print("out.mean: ", out[out.nonzero(as_tuple=True)].mean())
print("out.max: ", out.max())
print("out.std: ", out.std())
print("mu: ")
print(self.mu)
print("sigma: ")
print(self.sigma)
else:
out = self.propagate(edge_index, x=x, edge_attr=edge_attr,
size=size)
x_r = x[1]
if x_r is not None and self.root is not None:
out += self.root(x_r)
if self.bias is not None:
out += self.bias
return out
#message: j flow to i
def message(self, x_j: Tensor, edge_attr: Tensor, batch_size=None, proposal_num=None, debug=False):
# normalize pseudo-coordinates of each paired objects, u(i,j)
# shape: N, 2
edge_spatial = edge_attr[:, :2]
edge_spatial = edge_spatial/torch.tensor([1333, 2*math.pi]).unsqueeze(0).cuda()
# ei,j caculated by ZZ^T
# shape: N, 1
edge_feature_sim = edge_attr[:, 2].unsqueeze(-1)
EPS = 1e-15
# F: 1025, M: 512, E: 16384, D: 2, K: 16
F, M = self.rel_in_channels, self.out_channels
(E, D), K = edge_spatial.size(), self.kernel_size
if not self.separate_gaussians:
gaussian = -0.5 * (edge_spatial.view(E, 1, D) -
self.mu.view(1, K, D)).pow(2)
gaussian = gaussian / (EPS + self.sigma.view(1, K, D).pow(2))
gaussian = torch.exp(gaussian.sum(dim=-1)) # [E, K]
# shape: 16384, 16, 512/16=32
edge_weight = gaussian * edge_feature_sim
assert batch_size!=None and proposal_num!=None
edge_weight_norm = edge_weight.view(batch_size, -1, proposal_num, K)
if debug:
print("edge_weight before norm: ")
print(edge_weight_norm[0, :, 76, :].t())
edge_feature_sim_norm = edge_feature_sim.squeeze(-1).view(batch_size, -1, proposal_num)
print("edge_feature_sim before norm: ")
print(edge_feature_sim_norm[0, :, 76])
gaussian_norm = gaussian.view(batch_size, -1, proposal_num, K)
print("gaussian before norm: ")
print(gaussian_norm[0, :, 76, :].t())
# normalize the weight
edge_weight_norm = edge_weight_norm/(edge_weight_norm.sum(dim=1).unsqueeze(-3) + EPS)
if debug:
print("edge_weight after norm: ")
print(edge_weight_norm[0, :, 76, :].t())
edge_weight_norm = edge_weight_norm.view(E, K)
self.edge_weight_norm = edge_weight_norm.view(batch_size, -1, K).sum(dim=2)
enhanced_feature = x_j.view(E, K, M) * edge_weight_norm.view(E, K, 1)
# concatenate outputs of each gaussian kernel
enhanced_feature = enhanced_feature.view(E, -1)
return enhanced_feature
else:
gaussian = -0.5 * (edge_spatial.view(E, 1, 1, 1, D) -
self.mu.view(1, F, M, K, D)).pow(2)
gaussian = gaussian / (EPS + self.sigma.view(1, F, M, K, D).pow(2))
gaussian = torch.exp(gaussian.sum(dim=-1)) # [E, F, M, K]
gaussian = gaussian * self.g.view(1, F, M, K)
gaussian = gaussian.sum(dim=-1) # [E, F, M]
return (x_j.view(E, F, 1) * gaussian).sum(dim=-2) # [E, M]
@torch.no_grad()
def initialize_parameters(self, module, input):
if isinstance(self.g, torch.nn.parameter.UninitializedParameter):
x = input[0][0] if isinstance(input, tuple) else input[0]
in_channels = x.size(-1)
out_channels, kernel_size = self.out_channels, self.kernel_size
self.g.materialize((in_channels, out_channels * kernel_size))
if not self.separate_gaussians:
self.mu.materialize((kernel_size, self.dim))
self.sigma.materialize((kernel_size, self.dim))
else:
self.mu.materialize(
(in_channels, out_channels, kernel_size, self.dim))
self.sigma.materialize(
(in_channels, out_channels, kernel_size, self.dim))
glorot(self.g)
glorot(self.mu)
glorot(self.sigma)
module._hook.remove()
delattr(module, '_hook')
def __repr__(self) -> str:
return '{}({}, {}, dim={})'.format(self.__class__.__name__,
self.in_channels, self.out_channels,
self.dim)
I set the param of GMMConv as follow:
# I use two gmm_conv layers
self.gmm_conv_1 = SGRNGMMConvSumNorm(self.bbox_head[0].fc_cls.weight.shape[1]+1, int(512/16), dim=2, kernel_size=16, aggr='add', root_weight=False)
self.gmm_conv_2 = SGRNGMMConvSumNorm(512, int(256/16), dim=2, kernel_size=16, aggr='add', root_weight=False)
enhanced_feats = self.gmm_conv_1(x=batch.x, edge_index=batch.edge_index, edge_attr=batch.edge_attr,
batch_size=batch_size, proposal_num=proposal_num, debug=debug)
enhanced_feats = self.gmm_conv_2(x=enhanced_feats, edge_index=batch.edge_index, edge_attr=batch.edge_attr,
batch_size=batch_size, proposal_num=proposal_num, debug=debug)
Actually I get some improvements with GMMConv, but the learned eij (edge_feature_sim in the code) all become 1 when the model converged. I’m really confused and was stuck for almost a month because of this problem. Please take a moment to help me take a look. A thousand thanks! Some key outputs are as below:
# learned adjacency matrix before top-32, almost equals to 1
# shape 512*512
eps before norm:
tensor([[1.0000, 0.9998, 1.0000, ..., 0.9512, 1.0000, 0.9860],
[0.9998, 1.0000, 0.9999, ..., 0.9511, 0.9998, 0.9859],
[1.0000, 0.9999, 1.0000, ..., 0.9512, 1.0000, 0.9860],
...,
[0.9512, 0.9511, 0.9512, ..., 1.0000, 0.9512, 0.9785],
[1.0000, 0.9998, 1.0000, ..., 0.9512, 1.0000, 0.9860],
[0.9860, 0.9859, 0.9860, ..., 0.9785, 0.9860, 1.0000]],
device='cuda:0')
eps.mean(): tensor(0.9917, device='cuda:0')
eps.std(): tensor(0.0114, device='cuda:0')
# learned gaussian weight of 16 kernels
# shape: 16*32
gaussian before norm:
tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[9.8621e-01, 9.5153e-01, 9.1268e-01, 7.7990e-01, 7.9041e-01, 8.9888e-01,
4.4540e-01, 4.4663e-01, 4.2892e-01, 4.4877e-01, 4.5897e-01, 4.4313e-01,
4.5612e-01, 4.2930e-01, 4.2967e-01, 4.3281e-01, 4.4936e-01, 4.4166e-01,
4.5978e-01, 4.3069e-01, 7.8683e-01, 4.3227e-01, 4.3058e-01, 4.7086e-01,
9.2124e-01, 7.1439e-01, 4.5731e-01, 4.6405e-01, 9.1296e-01, 8.9270e-01,
8.9178e-01, 9.6949e-01],
[9.1900e-01, 8.6933e-01, 8.5632e-01, 7.1747e-01, 8.0397e-01, 8.5630e-01,
5.2810e-01, 5.2875e-01, 4.9564e-01, 5.1966e-01, 5.0314e-01, 5.2646e-01,
5.2128e-01, 4.9749e-01, 4.9732e-01, 4.9864e-01, 5.1737e-01, 5.1553e-01,
5.0304e-01, 4.9732e-01, 8.0230e-01, 4.9983e-01, 4.9810e-01, 6.2467e-01,
8.5850e-01, 7.2425e-01, 5.1605e-01, 5.0237e-01, 7.4426e-01, 7.0415e-01,
7.0243e-01, 8.7274e-01],
[9.5580e-01, 9.2650e-01, 9.0826e-01, 8.3535e-01, 8.5668e-01, 9.0339e-01,
6.7469e-01, 6.7534e-01, 6.6091e-01, 6.7432e-01, 6.7518e-01, 6.7339e-01,
6.7772e-01, 6.6146e-01, 6.6158e-01, 6.6320e-01, 6.7409e-01, 6.7048e-01,
6.7550e-01, 6.6202e-01, 8.5517e-01, 6.6322e-01, 6.6213e-01, 7.0708e-01,
9.1186e-01, 8.1439e-01, 6.7713e-01, 6.7714e-01, 8.9891e-01, 8.8553e-01,
8.8482e-01, 9.3965e-01],
[9.1225e-01, 8.7639e-01, 8.5348e-01, 7.8824e-01, 8.0097e-01, 8.4727e-01,
6.4957e-01, 6.5012e-01, 6.3947e-01, 6.4983e-01, 6.5184e-01, 6.4853e-01,
6.5278e-01, 6.3983e-01, 6.3995e-01, 6.4131e-01, 6.4979e-01, 6.4666e-01,
6.5214e-01, 6.4035e-01, 7.9957e-01, 6.4124e-01, 6.4039e-01, 6.7276e-01,
8.5784e-01, 7.6544e-01, 6.5263e-01, 6.5369e-01, 8.7001e-01, 8.6053e-01,
8.5986e-01, 8.9853e-01],
[3.1417e-01, 5.5450e-01, 7.3099e-01, 9.7314e-01, 9.2127e-01, 7.7294e-01,
5.5127e-01, 5.5428e-01, 5.3514e-01, 5.7033e-01, 6.1781e-01, 5.4621e-01,
5.9072e-01, 5.3444e-01, 5.3572e-01, 5.4384e-01, 5.7451e-01, 5.5327e-01,
6.2032e-01, 5.3878e-01, 9.2216e-01, 5.4103e-01, 5.3766e-01, 5.0787e-01,
7.0027e-01, 9.6038e-01, 5.9973e-01, 6.3367e-01, 3.5694e-01, 3.5752e-01,
3.6003e-01, 3.5161e-01],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[8.3350e-01, 7.7149e-01, 7.7079e-01, 6.0476e-01, 7.4303e-01, 7.7823e-01,
4.5586e-01, 4.5628e-01, 4.1425e-01, 4.4139e-01, 4.1253e-01, 4.5440e-01,
4.4055e-01, 4.1684e-01, 4.1641e-01, 4.1699e-01, 4.3767e-01, 4.3836e-01,
4.1207e-01, 4.1597e-01, 7.4197e-01, 4.1900e-01, 4.1718e-01, 6.0412e-01,
7.6995e-01, 6.4397e-01, 4.3220e-01, 4.0936e-01, 5.8806e-01, 5.3854e-01,
5.3653e-01, 7.6337e-01],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[2.1019e-44, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[9.9569e-01, 9.9820e-01, 9.8614e-01, 9.3473e-01, 9.3231e-01, 9.7991e-01,
7.3100e-01, 7.3193e-01, 7.1724e-01, 7.3313e-01, 7.3916e-01, 7.2928e-01,
7.3848e-01, 7.1766e-01, 7.1791e-01, 7.2029e-01, 7.3344e-01, 7.2777e-01,
7.3971e-01, 7.1866e-01, 9.3055e-01, 7.1999e-01, 7.1863e-01, 7.4314e-01,
9.8936e-01, 9.0037e-01, 7.3901e-01, 7.4257e-01, 9.7202e-01, 9.5997e-01,
9.5957e-01, 9.9661e-01],
[1.1665e-24, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 4.0501e-27, 1.5441e-27,
7.0279e-28, 7.8993e-28],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00]], device='cuda:0')
# learned mu and sigma
mu:
Parameter containing:
tensor([[-0.1083, 0.0030],
[-0.2634, -0.0258],
[-0.4494, -0.1646],
[-0.6424, -0.2236],
[-0.8206, -0.6540],
[ 0.4184, 0.5671],
[-0.0959, -0.0965],
[-0.1730, 0.1889],
[-0.5670, -0.2202],
[-0.2420, -0.1420],
[-0.0190, -0.2056],
[-0.0918, 0.1071],
[-0.1862, -0.1815],
[ 0.0568, 0.0996],
[ 0.0703, -0.2952],
[-0.2051, -0.1300]], device='cuda:0', requires_grad=True)
sigma:
Parameter containing:
tensor([[ 2.9739e-03, -1.2365e-01],
[-1.6083e+00, 8.3923e-01],
[-1.1475e+00, -1.3210e+00],
[ 2.4356e+00, 1.5482e+00],
[ 3.0892e+00, -1.9447e+00],
[ 1.0421e+00, -3.8633e-01],
[-1.3531e-05, 3.0792e-01],
[-4.2538e-03, 1.6721e-01],
[-9.6744e-01, 1.5292e+00],
[-1.1451e-02, -5.5723e-01],
[ 1.1270e-03, -2.0825e-01],
[ 6.4788e-03, -1.7011e-01],
[-2.1620e-01, 4.7752e-04],
[ 1.6017e+00, -1.1597e+00],
[-2.5537e-01, 2.8126e-02],
[-1.0400e-01, -6.4232e-03]], device='cuda:0', requires_grad=True)
Environment
- PyG version:
- PyTorch version:
- OS:
- Python version:
- CUDA/cuDNN version:
- How you installed PyTorch and PyG (
conda
,pip
, source): - Any other relevant information (e.g., version of
torch-scatter
):
Issue Analytics
- State:
- Created 2 years ago
- Comments:6 (2 by maintainers)
Top GitHub Comments
Maybe. Actually I’m reproducing a paper that uses MoNet. Just today, I get the source code. So I maybe able to check if it is the problem of PYG or me. I’ll back to update this issue later. Thanks!
It’s probably a problem with the paper itself, not PYG. Thanks!