Skip to content

Commit

Permalink
update to new version
Browse files Browse the repository at this point in the history
  • Loading branch information
zhreshold committed Jun 26, 2017
1 parent 5c59ddc commit b550a8f
Show file tree
Hide file tree
Showing 20 changed files with 903 additions and 68 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ remarkable traits of MXNet.
* The result is almost identical to the original version. However, due to different implementation details, the results might differ slightly.

### What's new
* Added multiple trained models.
* Added a much simpler way to compose network from mainstream classification networks (resnet, inception...) and [Guide](symbol/README.md).
* Update to the latest version according to caffe version, with 5% mAP increase.
* Use C++ record iterator based on back-end multi-thread engine to achieve huge speed up on multi-gpu environments.
* Monitor validation mAP during training.
Expand Down Expand Up @@ -103,7 +105,7 @@ tar -xvf VOCtrainval_11-May-2012.tar
tar -xvf VOCtrainval_06-Nov-2007.tar
tar -xvf VOCtest_06-Nov-2007.tar
```
* We are goint to use `trainval` set in VOC2007/2012 as a common strategy.
* We are going to use `trainval` set in VOC2007/2012 as a common strategy.
The suggested directory structure is to store `VOC2007` and `VOC2012` directories
in the same `VOCdevkit` folder.
* Then link `VOCdevkit` folder to `data/VOCdevkit` by default:
Expand Down Expand Up @@ -160,3 +162,12 @@ python convert_model.py deploy.prototxt name_of_pretrained_caffe_model.caffemode
python demo.py --prefix ssd_converted --epoch 1 --deploy
```
There is no guarantee that conversion will always work, but at least it's good for now.

### Legacy models
Since the new interface for composing network is introduced, the old models have inconsistent names for weights.
You can still load the previous model by rename the symbol to `legacy_xxx.py`
and call with `python train/demo.py --network legacy_xxx `
For example:
```
python demo.py --network 'legacy_vgg16_ssd_300.py' --prefix model/ssd_300 --epoch 0
```
3 changes: 2 additions & 1 deletion config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
cfg.train.rand_mirror_prob = 0.5
cfg.train.shuffle = True
cfg.train.seed = 233
cfg.train.preprocess_threads = 6
cfg.train.preprocess_threads = 48
cfg.train = config_as_dict(cfg.train) # convert to normal dict

# validation
Expand All @@ -64,4 +64,5 @@
cfg.valid.rand_mirror_prob = 0
cfg.valid.shuffle = False
cfg.valid.seed = 0
cfg.valid.preprocess_threads = 32
cfg.valid = config_as_dict(cfg.valid) # convert to normal dict
17 changes: 17 additions & 0 deletions dataset/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,20 @@ def progress_bar(count, total, suffix=''):
f.write(line)
else:
raise RuntimeError("No image in imdb")

def _load_class_names(self, filename, dirname):
"""
load class names from text file
Parameters:
----------
filename: str
file stores class names
dirname: str
file directory
"""
full_path = osp.join(dirname, filename)
classes = []
with open(full_path, 'r') as f:
classes = [l.strip() for l in f.readlines()]
return classes
20 changes: 3 additions & 17 deletions dataset/mscoco.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ class Coco(Imdb):
whether initially shuffle image list
"""
def __init__(self, anno_file, image_dir, shuffle=True, names='mscoco.txt'):
def __init__(self, anno_file, image_dir, shuffle=True, names='mscoco.names'):
assert os.path.isfile(anno_file), "Invalid annotation file: " + anno_file
basename = os.path.splitext(os.path.basename(anno_file))[0]
super(Coco, self).__init__('coco_' + basename)
self.image_dir = image_dir

self._load_class_names(names, os.path.join(os.path.dirname(__file__), 'names'))
self.classes = self._load_class_names(names,
os.path.join(os.path.dirname(__file__), 'names'))

self.num_classes = len(self.classes)
self._load_all(anno_file, shuffle)
Expand Down Expand Up @@ -112,18 +113,3 @@ def _load_all(self, anno_file, shuffle):
# store the results
self.image_set_index = image_set_index
self.labels = labels

def _load_class_names(self, filename, dirname):
"""
load class names from text file
Parameters:
----------
filename: str
file stores class names
dirname: str
file directory
"""
full_path = os.path.join(dirname, filename)
with open(full_path, 'r') as f:
self.classes = [l.strip() for l in f.readlines()]
20 changes: 20 additions & 0 deletions dataset/names/pascal_voc.names
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
10 changes: 4 additions & 6 deletions dataset/pascal_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class PascalVoc(Imdb):
is_train : boolean
if true, will load annotations
"""
def __init__(self, image_set, year, devkit_path, shuffle=False, is_train=False):
def __init__(self, image_set, year, devkit_path, shuffle=False, is_train=False,
names='pascal_voc.names'):
super(PascalVoc, self).__init__('voc_' + year + '_' + image_set)
self.image_set = image_set
self.year = year
Expand All @@ -33,11 +34,8 @@ def __init__(self, image_set, year, devkit_path, shuffle=False, is_train=False):
self.extension = '.jpg'
self.is_train = is_train

self.classes = ['aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
self.classes = self._load_class_names(names,
os.path.join(os.path.dirname(__file__), 'names'))

self.config = {'use_difficult': True,
'comp_id': 'comp4',}
Expand Down
14 changes: 2 additions & 12 deletions dataset/yolo_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def label_from_index(self, index):
ground-truths of this image
"""
assert self.labels is not None, "Labels not processed"
return self.labels[index, :, :]
return self.labels[index]

def _label_path_from_index(self, index):
"""
Expand Down Expand Up @@ -130,7 +130,6 @@ def _load_image_labels(self):
labels packed in [num_images x max_num_objects x 5] tensor
"""
temp = []
max_objects = 0

# load ground-truths
for idx in self.image_set_index:
Expand All @@ -151,13 +150,4 @@ def _load_image_labels(self):
ymax = y + half_height
label.append([cls_id, xmin, ymin, xmax, ymax])
temp.append(np.array(label))
max_objects = max(max_objects, len(label))
# add padding to labels so that the dimensions match in each batch
assert max_objects > 0, "No objects found for any of the images"
self.padding = max_objects
labels = []
for label in temp:
label = np.lib.pad(label, ((0, max_objects-label.shape[0]), (0,0)), \
'constant', constant_values=(-1, -1))
labels.append(label)
return np.array(labels)
return temp
8 changes: 4 additions & 4 deletions evaluate/eval_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def voc_ap(rec, prec, use_07_metric=False):
ap += p / 11.
else:
# append sentinel values at both ends
mrec = np.concatenate([0.], rec, [1.])
mpre = np.concatenate([0.], prec, [0.])
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))

# compute precision integration ladder
for i in range(mpre.size - 1, 0, -1):
Expand Down Expand Up @@ -93,10 +93,10 @@ def voc_eval(detpath, annopath, imageset_file, classname, cache_dir, ovthresh=0.
if ind % 100 == 0:
print('reading annotations for {:d}/{:d}'.format(ind + 1, len(image_filenames)))
print('saving annotations cache to {:s}'.format(cache_file))
with open(cache_file, 'w') as f:
with open(cache_file, 'wb') as f:
pickle.dump(recs, f)
else:
with open(cache_file, 'r') as f:
with open(cache_file, 'rb') as f:
recs = pickle.load(f)

# extract objects in :param classname:
Expand Down
6 changes: 3 additions & 3 deletions evaluate/evaluate_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from config.config import cfg
from evaluate.eval_metric import MApMetric, VOC07MApMetric
import logging
from symbol.symbol_factory import get_symbol

def evaluate_net(net, path_imgrec, num_classes, mean_pixels, data_shape,
model_prefix, epoch, ctx=mx.cpu(), batch_size=1,
Expand Down Expand Up @@ -71,9 +72,8 @@ class names in string, must correspond to num_classes if set
if net is None:
net = load_net
else:
sys.path.append(os.path.join(cfg.ROOT_DIR, 'symbol'))
net = importlib.import_module("symbol_" + net) \
.get_symbol(num_classes, nms_thresh, force_nms)
net = get_symbol(net, data_shape[1], num_classes=num_classes,
nms_thresh=nms_thresh, force_suppress=force_nms)
if not 'label' in net.list_arguments():
label = mx.sym.Variable(name='label')
net = mx.sym.Group([net, label])
Expand Down
49 changes: 49 additions & 0 deletions symbol/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
## How to compose SSD network on top of mainstream classification networks

1. Have the base network ready in this directory as `name.py`, such as `inceptionv3.py`.
2. Add configuration to `symbol_factory.py`, an example would be:
```
if network == 'vgg16_reduced':
if data_shape >= 448:
from_layers = ['relu4_3', 'relu7', '', '', '', '', '']
num_filters = [512, -1, 512, 256, 256, 256, 256]
strides = [-1, -1, 2, 2, 2, 2, 1]
pads = [-1, -1, 1, 1, 1, 1, 1]
sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
[.75, .8216], [.9, .9721]]
ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
[1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
normalizations = [20, -1, -1, -1, -1, -1, -1]
steps = [] if data_shape != 512 else [x / 512.0 for x in
[8, 16, 32, 64, 128, 256, 512]]
else:
from_layers = ['relu4_3', 'relu7', '', '', '', '']
num_filters = [512, -1, 512, 256, 256, 256]
strides = [-1, -1, 2, 2, 1, 1]
pads = [-1, -1, 1, 1, 0, 0]
sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
[1,2,.5], [1,2,.5]]
normalizations = [20, -1, -1, -1, -1, -1]
steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
return locals()
elif network == 'inceptionv3':
from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', '']
num_filters = [-1, -1, 512, 256, 256, 128]
strides = [-1, -1, 2, 2, 2, 2]
pads = [-1, -1, 1, 1, 1, 1]
sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
[1,2,.5], [1,2,.5]]
normalizations = -1
steps = []
return locals()
```
Here `from_layers` indicate the feature layer you would like to extract from the base network.
`''` indicate that we want add extra new layers on top of the last feature layer,
and the number of filters must be specified in `num_filters`. Similarly, `strides` and `pads`
are required to compose these new layers. `sizes` and `ratios` are the parameters controlling
the anchor generation algorithm. `normalizations` is used to normalize and rescale feature if
not `-1`. `steps`: optional, used to calculate the anchor sliding steps.

3. Train or test with arguments `--network name --data-shape xxx --pretrained pretrained_model`
79 changes: 67 additions & 12 deletions symbol/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,74 @@ def conv_act_layer(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), \
----------
(conv, relu) mx.Symbols
"""
assert not use_batchnorm, "batchnorm not yet supported"
bias = mx.symbol.Variable(name="conv{}_bias".format(name),
init=mx.init.Constant(0.0), attr={'__lr_mult__': '2.0'})
conv = mx.symbol.Convolution(data=from_layer, bias=bias, kernel=kernel, pad=pad, \
stride=stride, num_filter=num_filter, name="conv{}".format(name))
relu = mx.symbol.Activation(data=conv, act_type=act_type, \
name="{}{}".format(act_type, name))
conv = mx.symbol.Convolution(data=from_layer, kernel=kernel, pad=pad, \
stride=stride, num_filter=num_filter, name="{}_conv".format(name))
if use_batchnorm:
relu = mx.symbol.BatchNorm(data=relu, name="bn{}".format(name))
return conv, relu
conv = mx.symbol.BatchNorm(data=conv, name="{}_bn".format(name))
relu = mx.symbol.Activation(data=conv, act_type=act_type, \
name="{}_{}".format(name, act_type))
return relu

def multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=128):
"""Wrapper function to extract features from base network, attaching extra
layers and SSD specific layers
Parameters
----------
from_layers : list of str
feature extraction layers, use '' for add extra layers
For example:
from_layers = ['relu4_3', 'fc7', '', '', '', '']
which means extract feature from relu4_3 and fc7, adding 4 extra layers
on top of fc7
num_filters : list of int
number of filters for extra layers, you can use -1 for extracted features,
however, if normalization and scale is applied, the number of filter for
that layer must be provided.
For example:
num_filters = [512, -1, 512, 256, 256, 256]
strides : list of int
strides for the 3x3 convolution appended, -1 can be used for extracted
feature layers
pads : list of int
paddings for the 3x3 convolution, -1 can be used for extracted layers
min_filter : int
minimum number of filters used in 1x1 convolution
Returns
-------
list of mx.Symbols
"""
# arguments check
assert len(from_layers) > 0
assert isinstance(from_layers[0], str) and len(from_layers[0].strip()) > 0
assert len(from_layers) == len(num_filters) == len(strides) == len(pads)

internals = body.get_internals()
layers = []
for k, params in enumerate(zip(from_layers, num_filters, strides, pads)):
from_layer, num_filter, s, p = params
if from_layer.strip():
# extract from base network
layer = internals[from_layer.strip() + '_output']
layers.append(layer)
else:
# attach from last feature layer
assert len(layers) > 0
assert num_filter > 0
layer = layers[-1]
num_1x1 = max(min_filter, num_filter // 2)
conv_1x1 = conv_act_layer(layer, 'multi_feat_%d_conv_1x1' % (k),
num_1x1, kernel=(1, 1), pad=(0, 0), stride=(1, 1), act_type='relu')
conv_3x3 = conv_act_layer(conv_1x1, 'multi_feat_%d_conv_3x3' % (k),
num_filter, kernel=(3, 3), pad=(p, p), stride=(s, s), act_type='relu')
layers.append(conv_3x3)
return layers

def multibox_layer(from_layers, num_classes, sizes=[.2, .95],
ratios=[1], normalization=-1, num_channels=[],
clip=True, interm_layer=0, steps=[]):
clip=False, interm_layer=0, steps=[]):
"""
the basic aggregation module for SSD detection. Takes in multiple layers,
generate multiple object detection targets by customized layers
Expand Down Expand Up @@ -106,7 +160,7 @@ def multibox_layer(from_layers, num_classes, sizes=[.2, .95],
normalization = [normalization] * len(from_layers)
assert len(normalization) == len(from_layers)

assert sum(x > 0 for x in normalization) == len(num_channels), \
assert sum(x > 0 for x in normalization) <= len(num_channels), \
"must provide number of channels for each normalized layer"

if steps:
Expand All @@ -125,7 +179,8 @@ def multibox_layer(from_layers, num_classes, sizes=[.2, .95],
mode="channel", name="{}_norm".format(from_name))
scale = mx.symbol.Variable(name="{}_scale".format(from_name),
shape=(1, num_channels.pop(0), 1, 1),
init=mx.init.Constant(normalization[k]))
init=mx.init.Constant(normalization[k]),
attr={'__wd_mult__': '0.1'})
from_layer = mx.symbol.broadcast_mul(lhs=scale, rhs=from_layer)
if interm_layer > 0:
from_layer = mx.symbol.Convolution(data=from_layer, kernel=(3,3), \
Expand Down
Loading

0 comments on commit b550a8f

Please sign in to comment.