vitdet use faster_rcnn #12033

twisti14 · 2024-11-06T15:38:09Z

I want to use faster-rcnn to do object detection instead of mask rcnn for splitting, now it always shows insufficient video memory, in fact my video memory still has space, maybe there is a problem with my base configuration file? Here's my full profile
auto_scale_lr = dict(base_batch_size=2)
backbone_norm_cfg = dict(requires_grad=True, type='LN')
backend_args = None
batch_augments = [
dict(pad_mask=True, size=(
512,
512,
), type='BatchFixedSizePad'),
]
custom_hooks = [
dict(type='Fp16CompresssionHook'),
]
custom_imports = dict(imports=[
'projects.ViTDet.vitdet',
])
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
default_hooks = dict(
checkpoint=dict(
by_epoch=True,
interval=1,
max_keep_ckpts=5,
save_last=True,
type='CheckpointHook'),
logger=dict(interval=2, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
dynamic_intervals = [
(
180001,
184375,
),
]
env_cfg = dict(
cudnn_benchmark=False,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
image_size = (
1024,
1024,
)
interval = 5000
launcher = 'none'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=False, type='LogProcessor', window_size=50)
max_epochs = 20
max_iters = None
model = dict(
backbone=dict(
depth=12,
drop_path_rate=0.1,
embed_dim=768,
img_size=1024,
init_cfg=dict(
checkpoint='mae_pretrain_vit_base.pth', type='Pretrained'),
mlp_ratio=4,
norm_cfg=dict(requires_grad=True, type='LN'),
num_heads=12,
patch_size=16,
qkv_bias=True,
type='ViT',
use_rel_pos=True,
window_block_indexes=[
0,
1,
3,
4,
6,
7,
9,
10,
],
window_size=14),
data_preprocessor=dict(
batch_augments=[
dict(pad_mask=True, size=(
1024,
1024,
), type='BatchFixedSizePad'),
],
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_size_divisor=32,
std=[
58.395,
57.12,
57.375,
],
type='DetDataPreprocessor'),
neck=dict(
backbone_channel=768,
in_channels=[
192,
384,
768,
768,
],
norm_cfg=dict(requires_grad=True, type='LN2d'),
num_outs=5,
out_channels=256,
type='SimpleFPN'),
roi_head=dict(
bbox_head=dict(
bbox_coder=dict(
target_means=[
0.0,
0.0,
0.0,
0.0,
],
target_stds=[
0.1,
0.1,
0.2,
0.2,
],
type='DeltaXYWHBBoxCoder'),
conv_out_channels=256,
fc_out_channels=1024,
in_channels=256,
loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
loss_cls=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
norm_cfg=dict(requires_grad=True, type='LN2d'),
num_classes=4,
reg_class_agnostic=False,
roi_feat_size=7,
type='Shared4Conv1FCBBoxHead'),
bbox_roi_extractor=dict(
featmap_strides=[
4,
8,
16,
32,
],
out_channels=256,
roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
type='SingleRoIExtractor'),
type='StandardRoIHead'),
rpn_head=dict(
anchor_generator=dict(
ratios=[
0.5,
1.0,
2.0,
],
scales=[
8,
],
strides=[
4,
8,
16,
32,
64,
],
type='AnchorGenerator'),
bbox_coder=dict(
target_means=[
0.0,
0.0,
0.0,
0.0,
],
target_stds=[
1.0,
1.0,
1.0,
1.0,
],
type='DeltaXYWHBBoxCoder'),
feat_channels=256,
in_channels=256,
loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
loss_cls=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
num_convs=2,
type='RPNHead'),
test_cfg=dict(
rcnn=dict(
max_per_img=100,
nms=dict(iou_threshold=0.5, type='nms'),
score_thr=0.05),
rpn=dict(
max_per_img=1000,
min_bbox_size=0,
nms=dict(iou_threshold=0.7, type='nms'),
nms_pre=1000)),
train_cfg=dict(
rcnn=dict(
assigner=dict(
ignore_iof_thr=-1,
match_low_quality=False,
min_pos_iou=0.5,
neg_iou_thr=0.5,
pos_iou_thr=0.5,
type='MaxIoUAssigner'),
debug=False,
pos_weight=-1,
sampler=dict(
add_gt_as_proposals=True,
neg_pos_ub=-1,
num=512,
pos_fraction=0.25,
type='RandomSampler')),
rpn=dict(
allowed_border=-1,
assigner=dict(
ignore_iof_thr=-1,
match_low_quality=True,
min_pos_iou=0.3,
neg_iou_thr=0.3,
pos_iou_thr=0.7,
type='MaxIoUAssigner'),
debug=False,
pos_weight=-1,
sampler=dict(
add_gt_as_proposals=False,
neg_pos_ub=-1,
num=256,
pos_fraction=0.5,
type='RandomSampler')),
rpn_proposal=dict(
max_per_img=1000,
min_bbox_size=0,
nms=dict(iou_threshold=0.7, type='nms'),
nms_pre=2000)),
type='FasterRCNN')
norm_cfg = dict(requires_grad=True, type='LN2d')
optim_wrapper = dict(
constructor='LayerDecayOptimizerConstructor',
optimizer=dict(
betas=(
0.9,
0.999,
), lr=0.0001, type='AdamW', weight_decay=0.01),
paramwise_cfg=dict(decay_rate=0.7, decay_type='layer_wise', num_layers=12),
type='AmpOptimWrapper')
param_scheduler = [
dict(begin=0, by_epoch=True, end=20, start_factor=0.001, type='LinearLR'),
dict(
begin=0,
by_epoch=True,
end=20,
gamma=0.1,
milestones=[
15,
18,
],
type='MultiStepLR'),
]
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=2,
dataset=dict(
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img='val2017/'),
data_root='data/coco/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
1024,
1024,
), type='Resize'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
1024,
1024,
),
type='Pad'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
ann_file='data/coco/annotations/instances_val2017.json',
format_only=False,
metric=[
'bbox',
],
type='CocoMetric')
train_cfg = dict(
dynamic_intervals=[
(
180001,
184375,
),
],
max_epochs=20,
type='EpochBasedTrainLoop',
val_interval=1)
train_dataloader = dict(
batch_size=8,
dataset=dict(
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
data_root='data/coco/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
1024,
1024,
), type='Resize'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
1024,
1024,
),
type='Pad'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=False,
type='CocoDataset'),
drop_last=False,
num_workers=8,
persistent_workers=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_evaluator = dict(
ann_file='data/coco/annotations/instances_train2017.json',
format_only=False,
metric=[
'bbox',
],
type='CocoMetric')
work_dir = './work_dirs\faster_vitdet'

twisti14 · 2024-11-07T07:57:05Z

When I use mask-vitdet,mAP shows 0,I hope get some advice.

twisti14 added the reimplementation Issues in model reimplementation label Nov 6, 2024

mm-assistant bot assigned jbwang1997 Nov 6, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

vitdet use faster_rcnn #12033

vitdet use faster_rcnn #12033

twisti14 commented Nov 6, 2024

twisti14 commented Nov 7, 2024

vitdet use faster_rcnn #12033

vitdet use faster_rcnn #12033

Comments

twisti14 commented Nov 6, 2024

twisti14 commented Nov 7, 2024