Training error: Target size must be the same as input size
See original GitHub issueTo train Cascade mask rcnn and the config file is following:
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=2))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=1,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/train.json',
img_prefix=data_root + 'train/',
img_scale=(512, 512),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/val.json',
img_prefix=data_root + 'val/',
img_scale=(512, 512),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/test.json',
img_prefix=data_root + 'test/',
img_scale=(1024, 1024),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=300,
warmup_ratio=1.0 / 3,
step=[8, 10])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=20,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cm_rcnn_10164'
load_from = None
resume_from = None
workflow = [('train', 1)]
Then I got following error about mismatching tensor size:
2019-04-15 17:56:39,451 - INFO - Distributed training: True
2019-04-15 17:56:41,176 - INFO - load model from: open-mmlab://resnext101_64x4d
2019-04-15 17:56:41,399 - WARNING - missing keys in source state_dict: layer3.4.bn3.num_batches_tracked, layer3.16.bn2.num_batches_tracked, layer3.6.bn2.num_batches_tracked, layer1.0.bn3.num_batches_tracked, layer1.0.bn2.num_batches_tracked, layer2.3.bn1.num_batches_tracked, layer3.1.bn3.num_batches_tracked, layer4.0.bn2.num_batches_tracked, layer2.1.bn3.num_batches_tracked, layer3.6.bn1.num_batches_tracked, layer3.21.bn1.num_batches_tracked, layer3.18.bn3.num_batches_tracked, layer3.5.bn3.num_batches_tracked, layer3.8.bn1.num_batches_tracked, layer4.2.bn1.num_batches_tracked, layer3.1.bn2.num_batches_tracked, layer2.2.bn1.num_batches_tracked, layer2.1.bn1.num_batches_tracked, layer3.16.bn1.num_batches_tracked, layer3.0.downsample.1.num_batches_tracked, layer2.0.bn3.num_batches_tracked, layer4.0.bn1.num_batches_tracked, layer3.13.bn3.num_batches_tracked, layer3.7.bn2.num_batches_tracked, layer3.0.bn2.num_batches_tracked, layer3.20.bn2.num_batches_tracked, layer3.9.bn2.num_batches_tracked, layer3.18.bn2.num_batches_tracked, layer3.6.bn3.num_batches_tracked, layer3.17.bn3.num_batches_tracked, layer3.0.bn3.num_batches_tracked, layer4.2.bn3.num_batches_tracked, layer3.9.bn1.num_batches_tracked, layer3.12.bn3.num_batches_tracked, layer4.2.bn2.num_batches_tracked, layer3.20.bn3.num_batches_tracked, bn1.num_batches_tracked, layer3.5.bn2.num_batches_tracked, layer3.22.bn2.num_batches_tracked, layer1.1.bn3.num_batches_tracked, layer3.14.bn3.num_batches_tracked, layer3.1.bn1.num_batches_tracked, layer3.12.bn2.num_batches_tracked, layer3.2.bn1.num_batches_tracked, layer3.14.bn2.num_batches_tracked, layer1.2.bn1.num_batches_tracked, layer4.1.bn2.num_batches_tracked, layer1.2.bn2.num_batches_tracked, layer1.0.bn1.num_batches_tracked, layer3.16.bn3.num_batches_tracked, layer1.2.bn3.num_batches_tracked, layer3.4.bn1.num_batches_tracked, layer2.2.bn3.num_batches_tracked, layer2.0.downsample.1.num_batches_tracked, layer3.17.bn1.num_batches_tracked, layer3.19.bn3.num_batches_tracked, layer3.8.bn3.num_batches_tracked, layer3.14.bn1.num_batches_tracked, layer3.10.bn1.num_batches_tracked, layer2.1.bn2.num_batches_tracked, layer2.0.bn2.num_batches_tracked, layer3.3.bn1.num_batches_tracked, layer3.11.bn3.num_batches_tracked, layer3.15.bn3.num_batches_tracked, layer3.13.bn1.num_batches_tracked, layer3.15.bn2.num_batches_tracked, layer3.21.bn2.num_batches_tracked, layer4.0.downsample.1.num_batches_tracked, layer3.20.bn1.num_batches_tracked, layer3.11.bn2.num_batches_tracked, layer2.3.bn3.num_batches_tracked, layer3.8.bn2.num_batches_tracked, layer3.7.bn3.num_batches_tracked, layer3.2.bn2.num_batches_tracked, layer3.11.bn1.num_batches_tracked, layer3.0.bn1.num_batches_tracked, layer3.22.bn3.num_batches_tracked, layer3.5.bn1.num_batches_tracked, layer3.22.bn1.num_batches_tracked, layer1.1.bn1.num_batches_tracked, layer4.1.bn3.num_batches_tracked, layer3.3.bn3.num_batches_tracked, layer3.17.bn2.num_batches_tracked, layer1.1.bn2.num_batches_tracked, layer3.12.bn1.num_batches_tracked, layer3.10.bn3.num_batches_tracked, layer3.3.bn2.num_batches_tracked, layer3.19.bn2.num_batches_tracked, layer3.2.bn3.num_batches_tracked, layer3.13.bn2.num_batches_tracked, layer1.0.downsample.1.num_batches_tracked, layer4.0.bn3.num_batches_tracked, layer3.7.bn1.num_batches_tracked, layer3.18.bn1.num_batches_tracked, layer3.19.bn1.num_batches_tracked, layer2.2.bn2.num_batches_tracked, layer2.0.bn1.num_batches_tracked, layer3.4.bn2.num_batches_tracked, layer2.3.bn2.num_batches_tracked, layer3.10.bn2.num_batches_tracked, layer3.21.bn3.num_batches_tracked, layer3.9.bn3.num_batches_tracked, layer4.1.bn1.num_batches_tracked, layer3.15.bn1.num_batches_tracked
loading annotations into memory...
loading annotations into memory...
Done (t=0.03s)
creating index...
Done (t=0.03s)
creating index...
index created!
index created!
loading annotations into memory...
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Done (t=0.00s)
creating index...
index created!
2019-04-15 17:56:44,931 - INFO - Start running, host: forestriveral@HanazawaKana, work_dir: /home/forestriveral/DeepLearning/object-detection/mmdetection/work_dirs/cm_rcnn_10164
2019-04-15 17:56:44,932 - INFO - workflow: [('train', 1)], max: 12 epochs
THCudaCheck FAIL file=/pytorch/aten/src/THC/THCGeneral.cpp line=405 error=11 : invalid argument
Traceback (most recent call last):
File "./tools/train.py", line 90, in <module>
main()
File "./tools/train.py", line 86, in main
logger=logger)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/apis/train.py", line 57, in train_detector
_dist_train(model, dataset, cfg, validate=validate)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/apis/train.py", line 96, in _dist_train
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/mmcv/runner/runner.py", line 355, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/mmcv/runner/runner.py", line 261, in train
self.model, data_batch, train_mode=True, **kwargs)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/apis/train.py", line 37, in batch_processor
losses = model(**data)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/mmcv/parallel/distributed.py", line 50, in forward
return self.module(*inputs[0], **kwargs[0])
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/detectors/base.py", line 80, in forward
return self.forward_train(img, img_meta, **kwargs)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/detectors/cascade_rcnn.py", line 125, in forward_train
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/anchor_heads/rpn_head.py", line 51, in loss
gt_bboxes_ignore=gt_bboxes_ignore)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 210, in loss
cfg=cfg)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/core/utils/misc.py", line 24, in multi_apply
return tuple(map(list, zip(*map_results)))
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 155, in loss_single
cls_score, labels, label_weights, avg_factor=num_total_samples)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/core/loss/losses.py", line 30, in weighted_binary_cross_entropy
reduction='sum')[None] / avg_factor
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/torch/nn/functional.py", line 2075, in binary_cross_entropy_with_logits
raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
ValueError: Target size (torch.Size([98304])) must be the same as input size (torch.Size([98304, 1]))
Traceback (most recent call last):
File "./tools/train.py", line 90, in <module>
main()
File "./tools/train.py", line 86, in main
logger=logger)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/apis/train.py", line 57, in train_detector
_dist_train(model, dataset, cfg, validate=validate)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/apis/train.py", line 96, in _dist_train
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/mmcv/runner/runner.py", line 355, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/mmcv/runner/runner.py", line 261, in train
self.model, data_batch, train_mode=True, **kwargs)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/apis/train.py", line 37, in batch_processor
losses = model(**data)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/mmcv/parallel/distributed.py", line 50, in forward
return self.module(*inputs[0], **kwargs[0])
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/detectors/base.py", line 80, in forward
return self.forward_train(img, img_meta, **kwargs)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/detectors/cascade_rcnn.py", line 125, in forward_train
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/anchor_heads/rpn_head.py", line 51, in loss
gt_bboxes_ignore=gt_bboxes_ignore)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 210, in loss
cfg=cfg)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/core/utils/misc.py", line 24, in multi_apply
return tuple(map(list, zip(*map_results)))
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 155, in loss_single
cls_score, labels, label_weights, avg_factor=num_total_samples)
File "/home/forestriveral/DeepLearning/object-detection/mmdetection/mmdet/core/loss/losses.py", line 30, in weighted_binary_cross_entropy
reduction='sum')[None] / avg_factor
File "/home/forestriveral/anaconda3/envs/mmdetection/lib/python3.6/site-packages/torch/nn/functional.py", line 2075, in binary_cross_entropy_with_logits
raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
ValueError: Target size (torch.Size([98304])) must be the same as input size (torch.Size([98304, 1]))
Any ideas or suggestions? Thanks
Issue Analytics
- State:
- Created 4 years ago
- Comments:7
Top Results From Across the Web
ValueError: Target size (torch.Size([16])) must be the same as ...
Since the error says ValueError: Target size (torch.Size([16])) must be the same as input size (torch.Size([16, 1])) . I don't understand, where ...
Read more >ValueError: Target size (torch.Size([8])) must be the same as ...
I'm having trouble getting my model to train. It keeps returning the error: ValueError: Target size (torch.Size([8])) must be the same as ......
Read more >Target size that is different to the input size - PyTorch Lightning
fit(model, data_module) Igot this error : Using a target size (torch. Size([16])) that is different to the input size (torch. Size([16, 10])) ...
Read more >Target size (torch.Size([10])) must be the same as input size ...
I think this error is coming from your loss function - loss = loss_func(output, b_y) . The size of output and b_y is...
Read more >Target size is not the same as input size fast ai - Zindi
Thank you for the reply, but when i change my input to 64 then it throws an error. Target size (torch.Size([64])) must be...
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
When you pull the latest master, have you setup again? I just try mask r50 and it is okay
Hi @yhcao6
Thank you for pointing it out 😃 I didn’t know I would need to set it up again. 😅