when batchsize > 32, the gradient backward of deformconv failed.
See original GitHub issueThanks for reporting the unexpected results and we appreciate it a lot.
Checklist
- I have searched related issues but cannot get the expected help.
- I have read the FAQ documentation but cannot get the expected help.
- The unexpected results still exist in the latest version.
Describe the Issue When I use the deformconv for a classic classification task, if the batchsize > 32, the forwards pass will be ok, but the backward pass will occurs an error like
Exception has occurred: RuntimeError
shape '[1, 1, 640, 640, 3]' is invalid for input of size 3686400
File "/home/yuan/code/mmdetection/mmcv/ops/deform_conv.py", line 145, in backward
im2col_step=cur_im2col_step)
File "/home/yuan/code/DeepEMD/tmp3.py", line 238, in <module>
loss.backward()
however, if the batchsize <= 32, it will be fine. Is that deformconv doesn`t support batchsize > 32?
Reproduction
- What command, code, or script did you run?
from math import sqrt
import torch.nn as nn
import torch
import torch.nn.functional as F
from torch.nn.modules import padding
from einops import rearrange
from Models.models.MyAttention import ResAttention
from mmcv.ops import DeformConv2dPack
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
def deform_conv3x3(in_planes, out_planes, stride=1):
"""3x3 deform convolution with padding"""
return DeformConv2dPack(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
"""
类似与官方resnet中的Bottleneck,
"""
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, drop_rate=0.0, drop_block=False, block_size=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.LeakyReLU(0.1)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = conv3x3(planes, planes)
self.bn3 = nn.BatchNorm2d(planes)
self.maxpool = nn.MaxPool2d(stride)
self.downsample = downsample
self.stride = stride
self.drop_rate = drop_rate
self.num_batches_tracked = 0
self.drop_block = drop_block
self.block_size = block_size
def forward(self, x):
self.num_batches_tracked += 1
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
out = self.maxpool(out)
if self.drop_rate > 0:
out = F.dropout(out, p=self.drop_rate,
training=self.training, inplace=True)
return out
class DeformBasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, drop_rate=0.0, drop_block=False, block_size=1):
super(DeformBasicBlock, self).__init__()
self.conv1 = deform_conv3x3(inplanes, planes)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.LeakyReLU(0.1)
self.conv2 = deform_conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = deform_conv3x3(planes, planes)
self.bn3 = nn.BatchNorm2d(planes)
self.maxpool = nn.MaxPool2d(stride)
self.downsample = downsample
self.stride = stride
self.drop_rate = drop_rate
self.num_batches_tracked = 0
self.drop_block = drop_block
self.block_size = block_size
def forward(self, x):
self.num_batches_tracked += 1
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
out = self.maxpool(out)
if self.drop_rate > 0:
out = F.dropout(out, p=self.drop_rate,
training=self.training, inplace=True)
return out
class ResNet(nn.Module):
def __init__(self, args, block=BasicBlock, keep_prob=1.0, avg_pool=False, drop_rate=0.0, dropblock_size=5):
self.inplanes = 3
super(ResNet, self).__init__()
self.layer1 = self._make_layer(
block, 64, stride=2, drop_rate=drop_rate)
if args.use_deformconv:
# 除layer1外,layer2,3,4均使用DeformBasicBlock
block = DeformBasicBlock
self.layer2 = self._make_layer(
block, 160, stride=2, drop_rate=drop_rate)
self.layer3 = self._make_layer(block, 320, stride=2, drop_rate=drop_rate, drop_block=True,
block_size=dropblock_size)
self.layer4 = self._make_layer(block, 640, stride=2, drop_rate=drop_rate, drop_block=True,
block_size=dropblock_size)
if avg_pool:
self.avgpool = nn.AvgPool2d(5, stride=1)
self.keep_prob = keep_prob
self.keep_avg_pool = avg_pool
self.dropout = nn.Dropout(p=1 - self.keep_prob, inplace=False)
self.drop_rate = drop_rate
self.args = args
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='leaky_relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, stride=1, drop_rate=0.0, drop_block=False, block_size=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride,
downsample, drop_rate, drop_block, block_size))
self.inplanes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x) # [bs, 640, 5, 5]
return x
class Args:
def __init__(self) -> None:
pass
if __name__ == '__main__':
args = Args()
args.use_deformconv = True
args.lr = 0.1
fc = nn.Linear(640,64).cuda()
model = ResNet(args).cuda()
optimizer = torch.optim.SGD([{'params': model.parameters(), 'lr': args.lr},
{'params': fc.parameters(), 'lr': args.lr}
], momentum=0.9, nesterov=True, weight_decay=0.0005)
for i in range(5):
input = torch.randn(64, 3, 84, 84).cuda()
label = torch.randint(64,(64,)).cuda()
out = model(input)
out = F.adaptive_avg_pool2d(out, 1).squeeze(-1).squeeze(-1)
out = fc(out)
loss = F.cross_entropy(out, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
- Did you make any modifications on the code? Did you understand what you have modified? No modifications.
Environment
- Please run
python -c "from mmcv.utils import collect_env; print(collect_env())"
to collect necessary environment information and paste it here.
{'sys.platform': 'linux', 'Python': '3.7.5 (default, Oct 25 2019, 15:51:11) [GCC 7.3.0]', 'CUDA available': True, 'GPU 0': 'GeForce RTX 2080 Ti', 'CUDA_HOME': '/usr/local/cuda-10.1', 'NVCC': 'Cuda compilation tools, release 10.1, V10.1.243', 'GCC': 'gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0', 'PyTorch': '1.7.1', 'PyTorch compiling details': 'PyTorch built with:\n - GCC 7.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 10.1\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37\n - CuDNN 7.6.3\n - Magma 2.5.2\n - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n', 'TorchVision': '0.8.2', 'OpenCV': '4.5.1', 'MMCV': '1.3.5', 'MMCV Compiler': 'GCC 7.3', 'MMCV CUDA Compiler': '10.1'}
- You may add addition that may be helpful for locating the problem, such as
- How you installed PyTorch [e.g., pip, conda, source]
conda install pytorch
offered by the official code. - Other environment variables that may be related (such as
$PATH
,$LD_LIBRARY_PATH
,$PYTHONPATH
, etc.)
- How you installed PyTorch [e.g., pip, conda, source]
Error traceback If applicable, paste the error traceback here.
A placeholder for traceback.
Bug fix If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
The reason is maybe the size of batch.
Issue Analytics
- State:
- Created 2 years ago
- Comments:5 (3 by maintainers)
Top Results From Across the Web
Deformable convolution fails when batch size is more than 32?
Deformable convolution fails when batch size is more than 32. To check if this is a regular behavior, it was tested against the...
Read more >How to Control the Stability of Training Neural Networks With ...
A batch size of 32 means that 32 samples from the training dataset will be used to estimate the error gradient before the...
Read more >CUDA out of memory error, cannot reduce batch size
As long as a single sample can fit into GPU memory, you do not have to reduce the effective batch size: you can...
Read more >Effective Training Techniques - PyTorch Lightning
Accumulated gradients run K small batches of size N before doing a backward pass. The effect is a large effective batch size of...
Read more >An error will occur if the batch size is set to 1 or more
Batch size cannot be greater than 1. I'm trying a batch size of 32 ... 198 torch.autograd.backward(self, gradient, retain_graph, ...
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
I found that the
foward()
inclass DeformConv2dFunction(Funtion)
has a parameter looks likeim2col_step=32
. When I set theim2col_step = BATCH_SIZE
manually eg:im2col_step=128
, the foward pass and backward pass both worked properly and the gradient also propagated with no error. I dont know if this is the right way to useDeformConv2dFunction
or not.I want to know will it cause some unexpected computation when I manually set the
im2col_step
param?Thanks for your bug reporting, we have fixed it in PR #1212, you can try it.