Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

RuntimeError: nms_impl: implementation for device cuda:0 not found.

See original GitHub issue

When I run /usr/src/app/demo/inference_demo.ipynb, an error reported:

/usr/src/app/mmdet/datasets/utils.py:65: UserWarning: "ImageToTensor" pipeline is replaced by "DefaultFormatBundle" for batch inference. It is recommended to manually replace it in the test data pipeline in your config file.
  warnings.warn(
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-4-4d78d4937daf> in <module>
      1 # test a single image
      2 img = 'demo.jpg'
----> 3 result = inference_detector(model, img)

/usr/src/app/mmdet/apis/inference.py in inference_detector(model, imgs)
    145     # forward the model
    146     with torch.no_grad():
--> 147         results = model(return_loss=False, rescale=True, **data)
    148 
    149     if not is_batch:

/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1013         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1014                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1015             return forward_call(*input, **kwargs)
   1016         # Do not call functions when jit is used
   1017         full_backward_hooks, non_full_backward_hooks = [], []

/opt/conda/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
     96                                 'method of nn.Module')
     97             if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
---> 98                 return old_func(*args, **kwargs)
     99 
    100             # get the arg spec of the decorated method

/usr/src/app/mmdet/models/detectors/base.py in forward(self, img, img_metas, return_loss, **kwargs)
    172             return self.forward_train(img, img_metas, **kwargs)
    173         else:
--> 174             return self.forward_test(img, img_metas, **kwargs)
    175 
    176     def _parse_losses(self, losses):

/usr/src/app/mmdet/models/detectors/base.py in forward_test(self, imgs, img_metas, **kwargs)
    145             if 'proposals' in kwargs:
    146                 kwargs['proposals'] = kwargs['proposals'][0]
--> 147             return self.simple_test(imgs[0], img_metas[0], **kwargs)
    148         else:
    149             assert imgs[0].size(0) == 1, 'aug test does not support ' \

/usr/src/app/mmdet/models/detectors/two_stage.py in simple_test(self, img, img_metas, proposals, rescale)
    177         x = self.extract_feat(img)
    178         if proposals is None:
--> 179             proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
    180         else:
    181             proposal_list = proposals

/usr/src/app/mmdet/models/dense_heads/dense_test_mixins.py in simple_test_rpn(self, x, img_metas)
    128         """
    129         rpn_outs = self(x)
--> 130         proposal_list = self.get_bboxes(*rpn_outs, img_metas=img_metas)
    131         return proposal_list
    132 

/opt/conda/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
    184                                 'method of nn.Module')
    185             if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
--> 186                 return old_func(*args, **kwargs)
    187             # get the arg spec of the decorated method
    188             args_info = getfullargspec(old_func)

/usr/src/app/mmdet/models/dense_heads/base_dense_head.py in get_bboxes(self, cls_scores, bbox_preds, score_factors, img_metas, cfg, rescale, with_nms, **kwargs)
     91                 score_factor_list = [None for _ in range(num_levels)]
     92 
---> 93             results = self._get_bboxes_single(cls_score_list, bbox_pred_list,
     94                                               score_factor_list, mlvl_priors,
     95                                               img_meta, cfg, rescale, with_nms,

/usr/src/app/mmdet/models/dense_heads/rpn_head.py in _get_bboxes_single(self, cls_score_list, bbox_pred_list, score_factor_list, mlvl_anchors, img_meta, cfg, rescale, with_nms, **kwargs)
    183                                 dtype=torch.long))
    184 
--> 185         return self._bbox_post_process(mlvl_scores, mlvl_bbox_preds,
    186                                        mlvl_valid_anchors, level_ids, cfg,
    187                                        img_shape)

/usr/src/app/mmdet/models/dense_heads/rpn_head.py in _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors, level_ids, cfg, img_shape, **kwargs)
    230 
    231         if proposals.numel() > 0:
--> 232             dets, _ = batched_nms(proposals, scores, ids, cfg.nms)
    233         else:
    234             return proposals.new_zeros(0, 5)

/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic)
    305     # Won't split to multiple nms nodes when exporting to onnx
    306     if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
--> 307         dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
    308         boxes = boxes[keep]
    309         # -1 indexing works abnormal in TensorRT

/opt/conda/lib/python3.8/site-packages/mmcv/utils/misc.py in new_func(*args, **kwargs)
    338 
    339             # apply converted arguments to the decorated method
--> 340             output = old_func(*args, **kwargs)
    341             return output
    342 

/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in nms(boxes, scores, iou_threshold, offset, score_threshold, max_num)
    169         inds = ext_module.nms(*indata_list, **indata_dict)
    170     else:
--> 171         inds = NMSop.apply(boxes, scores, iou_threshold, offset,
    172                            score_threshold, max_num)
    173     dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)

/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num)
     24                 valid_mask, as_tuple=False).squeeze(dim=1)
     25 
---> 26         inds = ext_module.nms(
     27             bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
     28 

RuntimeError: nms_impl: implementation for device cuda:0 not found.

My environment:

sys.platform: linux
Python: 3.8.8 (default, Feb 24 2021, 21:46:12) [GCC 7.3.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.3.r11.3/compiler.29745058_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.9.0a0+2ecb2c7
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v1.8.0 (Git Hash N/A)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86
  - CuDNN 8.2
  - Magma 2.5.2
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, 

TorchVision: 0.9.0a0
OpenCV: 3.4.11
MMCV: 1.4.0
MMCV Compiler: GCC 9.3
MMCV CUDA Compiler: not available
MMDetection: 2.19.0+f3817df

Issue Analytics

State:
Created 2 years ago
Comments:8 (1 by maintainers)

Top GitHub Comments

5reactions

daydayup-bitcommented, Dec 15, 2021

Run python mmdet/utils/collect_env.py

2reactions

gaojundongcommented, Feb 21, 2022

hello, did you solve this problem? I also meet this problem, and I am sure I installed mmcv-full, but it is also not work. this is my envy: sys.platform: linux Python: 3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 06:08:21) [GCC 9.4.0] CUDA available: True GPU 0: NVIDIA GeForce RTX 2080 Ti CUDA_HOME: None GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44) PyTorch: 1.10.2 PyTorch compiling details: PyTorch built with:

GCC 7.3
C++ Version: 201402
Intel® oneAPI Math Kernel Library Version 2022.0-Product Build 20211112 for Intel® 64 architecture applications
Intel® MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
OpenMP 201511 (a.k.a. OpenMP 4.5)
LAPACK is enabled (usually provided by MKL)
NNPACK is enabled
CPU capability usage: AVX512
CUDA Runtime 10.2
NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
CuDNN 7.6.5
Magma 2.5.2
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.2, CUDNN_VERSION=7.6.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.10.2, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,