Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vitdet use faster_rcnn #12033

Open
twisti14 opened this issue Nov 6, 2024 · 1 comment
Open

vitdet use faster_rcnn #12033

twisti14 opened this issue Nov 6, 2024 · 1 comment
Assignees
Labels
reimplementation Issues in model reimplementation

Comments

@twisti14
Copy link

twisti14 commented Nov 6, 2024

I want to use faster-rcnn to do object detection instead of mask rcnn for splitting, now it always shows insufficient video memory, in fact my video memory still has space, maybe there is a problem with my base configuration file? Here's my full profile
auto_scale_lr = dict(base_batch_size=2)
backbone_norm_cfg = dict(requires_grad=True, type='LN')
backend_args = None
batch_augments = [
dict(pad_mask=True, size=(
512,
512,
), type='BatchFixedSizePad'),
]
custom_hooks = [
dict(type='Fp16CompresssionHook'),
]
custom_imports = dict(imports=[
'projects.ViTDet.vitdet',
])
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
default_hooks = dict(
checkpoint=dict(
by_epoch=True,
interval=1,
max_keep_ckpts=5,
save_last=True,
type='CheckpointHook'),
logger=dict(interval=2, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
dynamic_intervals = [
(
180001,
184375,
),
]
env_cfg = dict(
cudnn_benchmark=False,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
image_size = (
1024,
1024,
)
interval = 5000
launcher = 'none'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=False, type='LogProcessor', window_size=50)
max_epochs = 20
max_iters = None
model = dict(
backbone=dict(
depth=12,
drop_path_rate=0.1,
embed_dim=768,
img_size=1024,
init_cfg=dict(
checkpoint='mae_pretrain_vit_base.pth', type='Pretrained'),
mlp_ratio=4,
norm_cfg=dict(requires_grad=True, type='LN'),
num_heads=12,
patch_size=16,
qkv_bias=True,
type='ViT',
use_rel_pos=True,
window_block_indexes=[
0,
1,
3,
4,
6,
7,
9,
10,
],
window_size=14),
data_preprocessor=dict(
batch_augments=[
dict(pad_mask=True, size=(
1024,
1024,
), type='BatchFixedSizePad'),
],
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_size_divisor=32,
std=[
58.395,
57.12,
57.375,
],
type='DetDataPreprocessor'),
neck=dict(
backbone_channel=768,
in_channels=[
192,
384,
768,
768,
],
norm_cfg=dict(requires_grad=True, type='LN2d'),
num_outs=5,
out_channels=256,
type='SimpleFPN'),
roi_head=dict(
bbox_head=dict(
bbox_coder=dict(
target_means=[
0.0,
0.0,
0.0,
0.0,
],
target_stds=[
0.1,
0.1,
0.2,
0.2,
],
type='DeltaXYWHBBoxCoder'),
conv_out_channels=256,
fc_out_channels=1024,
in_channels=256,
loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
loss_cls=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
norm_cfg=dict(requires_grad=True, type='LN2d'),
num_classes=4,
reg_class_agnostic=False,
roi_feat_size=7,
type='Shared4Conv1FCBBoxHead'),
bbox_roi_extractor=dict(
featmap_strides=[
4,
8,
16,
32,
],
out_channels=256,
roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
type='SingleRoIExtractor'),
type='StandardRoIHead'),
rpn_head=dict(
anchor_generator=dict(
ratios=[
0.5,
1.0,
2.0,
],
scales=[
8,
],
strides=[
4,
8,
16,
32,
64,
],
type='AnchorGenerator'),
bbox_coder=dict(
target_means=[
0.0,
0.0,
0.0,
0.0,
],
target_stds=[
1.0,
1.0,
1.0,
1.0,
],
type='DeltaXYWHBBoxCoder'),
feat_channels=256,
in_channels=256,
loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
loss_cls=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
num_convs=2,
type='RPNHead'),
test_cfg=dict(
rcnn=dict(
max_per_img=100,
nms=dict(iou_threshold=0.5, type='nms'),
score_thr=0.05),
rpn=dict(
max_per_img=1000,
min_bbox_size=0,
nms=dict(iou_threshold=0.7, type='nms'),
nms_pre=1000)),
train_cfg=dict(
rcnn=dict(
assigner=dict(
ignore_iof_thr=-1,
match_low_quality=False,
min_pos_iou=0.5,
neg_iou_thr=0.5,
pos_iou_thr=0.5,
type='MaxIoUAssigner'),
debug=False,
pos_weight=-1,
sampler=dict(
add_gt_as_proposals=True,
neg_pos_ub=-1,
num=512,
pos_fraction=0.25,
type='RandomSampler')),
rpn=dict(
allowed_border=-1,
assigner=dict(
ignore_iof_thr=-1,
match_low_quality=True,
min_pos_iou=0.3,
neg_iou_thr=0.3,
pos_iou_thr=0.7,
type='MaxIoUAssigner'),
debug=False,
pos_weight=-1,
sampler=dict(
add_gt_as_proposals=False,
neg_pos_ub=-1,
num=256,
pos_fraction=0.5,
type='RandomSampler')),
rpn_proposal=dict(
max_per_img=1000,
min_bbox_size=0,
nms=dict(iou_threshold=0.7, type='nms'),
nms_pre=2000)),
type='FasterRCNN')
norm_cfg = dict(requires_grad=True, type='LN2d')
optim_wrapper = dict(
constructor='LayerDecayOptimizerConstructor',
optimizer=dict(
betas=(
0.9,
0.999,
), lr=0.0001, type='AdamW', weight_decay=0.01),
paramwise_cfg=dict(decay_rate=0.7, decay_type='layer_wise', num_layers=12),
type='AmpOptimWrapper')
param_scheduler = [
dict(begin=0, by_epoch=True, end=20, start_factor=0.001, type='LinearLR'),
dict(
begin=0,
by_epoch=True,
end=20,
gamma=0.1,
milestones=[
15,
18,
],
type='MultiStepLR'),
]
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=2,
dataset=dict(
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img='val2017/'),
data_root='data/coco/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
1024,
1024,
), type='Resize'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
1024,
1024,
),
type='Pad'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
ann_file='data/coco/annotations/instances_val2017.json',
format_only=False,
metric=[
'bbox',
],
type='CocoMetric')
train_cfg = dict(
dynamic_intervals=[
(
180001,
184375,
),
],
max_epochs=20,
type='EpochBasedTrainLoop',
val_interval=1)
train_dataloader = dict(
batch_size=8,
dataset=dict(
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
data_root='data/coco/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
1024,
1024,
), type='Resize'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
1024,
1024,
),
type='Pad'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=False,
type='CocoDataset'),
drop_last=False,
num_workers=8,
persistent_workers=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_evaluator = dict(
ann_file='data/coco/annotations/instances_train2017.json',
format_only=False,
metric=[
'bbox',
],
type='CocoMetric')
work_dir = './work_dirs\faster_vitdet'

@twisti14 twisti14 added the reimplementation Issues in model reimplementation label Nov 6, 2024
@twisti14
Copy link
Author

twisti14 commented Nov 7, 2024

When I use mask-vitdet,mAP shows 0,I hope get some advice.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
reimplementation Issues in model reimplementation
Projects
None yet
Development

No branches or pull requests

2 participants