-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
160 lines (151 loc) · 5.33 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import yaml
DEFAULTS = {
# random seed for reproducibility, a large number is preferred
"init_rand_seed": 12345678,
# dataset loader, specify the dataset here
"dataset_name": "epic",
"devices": ['cuda:0'], # default: single gpu
"train_split": ('training',),
"val_split": ('validation',),
"model_name": "LocPointTransformer",
"dataset": {
# temporal stride of the feats
"feat_stride": 16,
# number of frames for each feat
"num_frames": 32,
# default fps, may vary across datasets; Set to none for read from json file
"default_fps": None,
# input video feat dim
"input_vid_dim": 2304,
# input video feat dim
"input_txt_dim": 512,
# number of classes
"num_classes": 1,
# downsampling rate of features, 1 to use original resolution
"downsample_rate": 1,
# max sequence length during training
"max_seq_len": 2560,
"enable_temporal_jittering": False,
},
"loader": {
"batch_size": 2,
"num_workers": 4,
},
# network architecture
"model": {
# type of backbone (convTransformer | conv)
"backbone_type": 'convTransformer',
# type of FPN (fpn | identity)
"fpn_type": "identity",
"backbone_arch": (2, 2, 2, 0, 6),
# scale factor between pyramid levels
"scale_factor": 2,
# regression range for pyramid levels
"regression_range": [(0, 4), (4, 8), (8, 16), (16, 32), (32, 64), (64, 10000)],
# number of heads in self-attention
"n_head": 4,
# window size for self attention; <=1 to use full seq (ie global attention)
"n_mha_win_size": -1,
# kernel size for embedding network
"embd_kernel_size": 3,
# (output) feature dim for embedding network
"embd_dim": 512,
# if attach group norm to embedding network
"embd_with_ln": True,
# feat dim for FPN
"fpn_dim": 512,
# if add ln at the end of fpn outputs
"fpn_with_ln": True,
# starting level for fpn
"fpn_start_level": 0,
# feat dim for head
"head_dim": 512,
# kernel size for reg/cls/center heads
"head_kernel_size": 3,
# number of layers in the head (including the final one)
"head_num_layers": 3,
# if attach group norm to heads
"head_with_ln": True,
# defines the max length of the buffered points
"max_buffer_len_factor": 4.0,
# disable abs position encoding (added to input embedding)
"use_abs_pe": False,
# use rel position encoding (added to self-attention)
"use_rel_pe": False,
},
"train_cfg": {
# radius | none (if to use center sampling)
"center_sample": "radius",
"center_sample_radius": 1.5,
"loss_weight": 1.0, # on reg_loss, use -1 to enable auto balancing
"cls_prior_prob": 0.01,
"init_loss_norm": 2000,
# gradient cliping, not needed for pre-LN transformer
"clip_grad_l2norm": -1,
# cls head without data (a fix to epic-kitchens / thumos)
"head_empty_cls": [],
# dropout ratios for tranformers
"dropout": 0.0,
# ratio for drop path
"droppath": 0.1,
# if to use label smoothing (>0.0)
"label_smoothing": 0.0,
},
"test_cfg": {
"pre_nms_thresh": 0.001,
"pre_nms_topk": 5000,
"iou_threshold": 0.1,
"min_score": 0.01,
"max_seg_num": 1000,
"nms_method": 'soft', # soft | hard | none
"nms_sigma": 0.5,
"duration_thresh": 0.05,
"multiclass_nms": True,
"ext_score_file": None,
"voting_thresh": 0.75,
},
# optimizer (for training)
"opt": {
# solver
"type": "AdamW", # SGD or AdamW
# solver params
"momentum": 0.9,
"weight_decay": 0.0,
"learning_rate": 1e-3,
"backbone_lr_weight": 1,
# excluding the warmup epochs
"epochs": 30,
# lr scheduler: cosine / multistep
"warmup": True,
"warmup_epochs": 5,
"schedule_type": "cosine",
# in #epochs excluding warmup
"schedule_steps": [],
"schedule_gamma": 0.1,
}
}
def _merge(src, dst):
for k, v in src.items():
if k in dst:
if isinstance(v, dict):
_merge(src[k], dst[k])
else:
dst[k] = v
def load_default_config():
config = DEFAULTS
return config
def _update_config(config):
# fill in derived fields
config["model"]["input_vid_dim"] = config["dataset"]["input_vid_dim"]
config["model"]["input_txt_dim"] = config["dataset"]["input_txt_dim"]
config["model"]["num_classes"] = config["dataset"]["num_classes"]
config["model"]["max_seq_len"] = config["dataset"]["max_seq_len"]
config["model"]["train_cfg"] = config["train_cfg"]
config["model"]["test_cfg"] = config["test_cfg"]
return config
def load_config(config_file, defaults=DEFAULTS):
with open(config_file, "r") as fd:
config = yaml.load(fd, Loader=yaml.FullLoader)
_merge(defaults, config)
config = _update_config(config)
return config