Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

cuDNN error: CUDNN_STATUS_EXECUTION_FAILED

See original GitHub issue

I get the following error every time I try to do a forward call with apex:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-20-c83117740453> in <module>
      1 #%%pixie_debugger
      2 while True:
----> 3     train(verbose=False, optimize_memory=True, optimize_feature=False)
      4     with open('temp/memory.pkl', 'wb') as f:
      5         pickle.dump(net.memory_model.memory, f)

<ipython-input-19-7e6a3b51254d> in train(verbose, optimize_memory, optimize_feature)
     11         optimizer_both.zero_grad()
     12 
---> 13         similarities = net(batch_data)
     14 
     15         values, indices = similarities.max(1)

~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

<ipython-input-13-fa199304f042> in forward(self, images)
     23         queries = self.feature_model(images)
     24         #print(queries)
---> 25         similarities = self.memory_model(queries)
     26 #        print(sorted(similarities, reverse=True))
     27         return similarities

~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

<ipython-input-12-ea8dad5c6180> in forward(self, queries)
     44 
     45     def forward(self, queries):
---> 46         sim_vector = self.get_similarity_vectors(queries)
     47         return sim_vector

<ipython-input-12-ea8dad5c6180> in get_similarity_vectors(self, queries)
     39 
     40     def get_similarity_vectors(self, queries):
---> 41         similarity = self.apply_combined(queries, self.memory, self.head_model)
     42 #        print(similarity)
     43         return nn.functional.log_softmax(similarity * 10000) # multiply because of rounding errors

<ipython-input-12-ea8dad5c6180> in apply_combined(self, x, y, func)
     34         assert x.shape == y.shape
     35 
---> 36         res = func(x, y)
     37         res = res.view(n, m)
     38         return res

~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

~/Projects/Personal/Kaggle/humpwin/pancho111203/siamese/model.py in forward(self, x, y)
    131         out = nn.functional.relu(out, inplace=True)
    132         out = out.permute((0, 3, 1, 2))
--> 133         out = self.conv2(out)
    134         out = out.view(batch_size, n_features)
    135 

~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
    318     def forward(self, input):
    319         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 320                         self.padding, self.dilation, self.groups)
    321 
    322 

~/miniconda3/lib/python3.6/site-packages/apex-0.1-py3.6-linux-x86_64.egg/apex/amp/wrap.py in wrapper(*args, **kwargs)
     24                                      args,
     25                                      kwargs)
---> 26         return orig_fn(*new_args, **kwargs)
     27     return wrapper
     28 

RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED

CUDNN logs: https://gist.github.com/pancho111203/3e91f0b46ab0be3b04f1edc9c1405684