RuntimeError: nms_impl: implementation for device cuda:0 not found.
See original GitHub issueWhen I run /usr/src/app/demo/inference_demo.ipynb
, an error reported:
/usr/src/app/mmdet/datasets/utils.py:65: UserWarning: "ImageToTensor" pipeline is replaced by "DefaultFormatBundle" for batch inference. It is recommended to manually replace it in the test data pipeline in your config file.
warnings.warn(
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-4-4d78d4937daf> in <module>
1 # test a single image
2 img = 'demo.jpg'
----> 3 result = inference_detector(model, img)
/usr/src/app/mmdet/apis/inference.py in inference_detector(model, imgs)
145 # forward the model
146 with torch.no_grad():
--> 147 results = model(return_loss=False, rescale=True, **data)
148
149 if not is_batch:
/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1013 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1014 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1015 return forward_call(*input, **kwargs)
1016 # Do not call functions when jit is used
1017 full_backward_hooks, non_full_backward_hooks = [], []
/opt/conda/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
96 'method of nn.Module')
97 if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
---> 98 return old_func(*args, **kwargs)
99
100 # get the arg spec of the decorated method
/usr/src/app/mmdet/models/detectors/base.py in forward(self, img, img_metas, return_loss, **kwargs)
172 return self.forward_train(img, img_metas, **kwargs)
173 else:
--> 174 return self.forward_test(img, img_metas, **kwargs)
175
176 def _parse_losses(self, losses):
/usr/src/app/mmdet/models/detectors/base.py in forward_test(self, imgs, img_metas, **kwargs)
145 if 'proposals' in kwargs:
146 kwargs['proposals'] = kwargs['proposals'][0]
--> 147 return self.simple_test(imgs[0], img_metas[0], **kwargs)
148 else:
149 assert imgs[0].size(0) == 1, 'aug test does not support ' \
/usr/src/app/mmdet/models/detectors/two_stage.py in simple_test(self, img, img_metas, proposals, rescale)
177 x = self.extract_feat(img)
178 if proposals is None:
--> 179 proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
180 else:
181 proposal_list = proposals
/usr/src/app/mmdet/models/dense_heads/dense_test_mixins.py in simple_test_rpn(self, x, img_metas)
128 """
129 rpn_outs = self(x)
--> 130 proposal_list = self.get_bboxes(*rpn_outs, img_metas=img_metas)
131 return proposal_list
132
/opt/conda/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
184 'method of nn.Module')
185 if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
--> 186 return old_func(*args, **kwargs)
187 # get the arg spec of the decorated method
188 args_info = getfullargspec(old_func)
/usr/src/app/mmdet/models/dense_heads/base_dense_head.py in get_bboxes(self, cls_scores, bbox_preds, score_factors, img_metas, cfg, rescale, with_nms, **kwargs)
91 score_factor_list = [None for _ in range(num_levels)]
92
---> 93 results = self._get_bboxes_single(cls_score_list, bbox_pred_list,
94 score_factor_list, mlvl_priors,
95 img_meta, cfg, rescale, with_nms,
/usr/src/app/mmdet/models/dense_heads/rpn_head.py in _get_bboxes_single(self, cls_score_list, bbox_pred_list, score_factor_list, mlvl_anchors, img_meta, cfg, rescale, with_nms, **kwargs)
183 dtype=torch.long))
184
--> 185 return self._bbox_post_process(mlvl_scores, mlvl_bbox_preds,
186 mlvl_valid_anchors, level_ids, cfg,
187 img_shape)
/usr/src/app/mmdet/models/dense_heads/rpn_head.py in _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors, level_ids, cfg, img_shape, **kwargs)
230
231 if proposals.numel() > 0:
--> 232 dets, _ = batched_nms(proposals, scores, ids, cfg.nms)
233 else:
234 return proposals.new_zeros(0, 5)
/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic)
305 # Won't split to multiple nms nodes when exporting to onnx
306 if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
--> 307 dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
308 boxes = boxes[keep]
309 # -1 indexing works abnormal in TensorRT
/opt/conda/lib/python3.8/site-packages/mmcv/utils/misc.py in new_func(*args, **kwargs)
338
339 # apply converted arguments to the decorated method
--> 340 output = old_func(*args, **kwargs)
341 return output
342
/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in nms(boxes, scores, iou_threshold, offset, score_threshold, max_num)
169 inds = ext_module.nms(*indata_list, **indata_dict)
170 else:
--> 171 inds = NMSop.apply(boxes, scores, iou_threshold, offset,
172 score_threshold, max_num)
173 dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
/opt/conda/lib/python3.8/site-packages/mmcv/ops/nms.py in forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num)
24 valid_mask, as_tuple=False).squeeze(dim=1)
25
---> 26 inds = ext_module.nms(
27 bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
28
RuntimeError: nms_impl: implementation for device cuda:0 not found.
My environment:
sys.platform: linux
Python: 3.8.8 (default, Feb 24 2021, 21:46:12) [GCC 7.3.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.3.r11.3/compiler.29745058_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.9.0a0+2ecb2c7
PyTorch compiling details: PyTorch built with:
- GCC 9.3
- C++ Version: 201402
- Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
- Intel(R) MKL-DNN v1.8.0 (Git Hash N/A)
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- NNPACK is enabled
- CPU capability usage: AVX2
- CUDA Runtime 11.3
- NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_86,code=compute_86
- CuDNN 8.2
- Magma 2.5.2
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.2.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
TorchVision: 0.9.0a0
OpenCV: 3.4.11
MMCV: 1.4.0
MMCV Compiler: GCC 9.3
MMCV CUDA Compiler: not available
MMDetection: 2.19.0+f3817df
Issue Analytics
- State:
- Created 2 years ago
- Comments:8 (1 by maintainers)
Top Results From Across the Web
RuntimeError: nms_impl: implementation for device cuda:0 not ...
RuntimeError : nms_impl: implementation for device cuda:0 not found.
Read more >Why torch.device('cuda', 0) is not working and ... - Stack Overflow
torch.device('cuda', 0) Found GPU%d %s which is of cuda capability %d.%d. PyTorch no longer supports this GPU because it is too old.
Read more >Prerequisites — MMDetection 2.10.0 documentation
The code can be built for CPU only environment (where CUDA isn't available). In CPU mode you can run the demo/webcam_demo.py for example....
Read more >CUDA semantics — PyTorch 1.13 documentation
CUDA semantics. torch.cuda is used to set up and run CUDA operations. It keeps track of the currently selected GPU, and all CUDA...
Read more >runtimeerror: expected all tensors to be on the same device ...
'/device:GPU:0': print('no hay gpu') print('Found GPU at: ... be on the same device, but found at least two devices, cuda:0 and cpu! when...
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Run
python mmdet/utils/collect_env.py
hello, did you solve this problem? I also meet this problem, and I am sure I installed mmcv-full, but it is also not work. this is my envy: sys.platform: linux Python: 3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 06:08:21) [GCC 9.4.0] CUDA available: True GPU 0: NVIDIA GeForce RTX 2080 Ti CUDA_HOME: None GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44) PyTorch: 1.10.2 PyTorch compiling details: PyTorch built with:
TorchVision: 0.11.3 OpenCV: 4.5.5 MMCV: 1.4.5 MMCV Compiler: GCC 7.3 MMCV CUDA Compiler: not available MMDetection: 2.19.0+f08548b