From 682db0d61af10bcdd80cfd1362cdf3f92a447e4a Mon Sep 17 00:00:00 2001 From: avBuffer Date: Thu, 16 Jul 2020 17:15:49 +0800 Subject: [PATCH] change relu from leaky relu into mesh in yolov5 --- core/config.py | 26 +++--- core/yolov4.py | 52 +++++------ core/yolov5.py | 91 ++++++++----------- ..._layer_img.py => show_layer_feature_map.py | 8 +- train.py | 34 +++---- 5 files changed, 99 insertions(+), 112 deletions(-) rename show_layer_img.py => show_layer_feature_map.py (83%) diff --git a/core/config.py b/core/config.py index 9e6fb71..1df3484 100644 --- a/core/config.py +++ b/core/config.py @@ -14,27 +14,27 @@ # Set the class name __C.YOLO.NET_TYPE = 'darknet53' # 'darknet53' 'mobilenetv2' -__C.YOLO.CLASSES = "./data/classes/coco.names" -__C.YOLO.ANCHORS = "./data/anchors/coco_anchors.txt" # yolov3/5 : yolo_anchors.txt; yolov4 : yolov4_anchors.txt +__C.YOLO.CLASSES = './data/classes/coco.names' +__C.YOLO.ANCHORS = './data/anchors/coco_anchors.txt' # yolov3/5 : yolo_anchors.txt; yolov4 : yolov4_anchors.txt __C.YOLO.MOVING_AVE_DECAY = 0.9995 __C.YOLO.STRIDES = [8, 16, 32] __C.YOLO.STRIDES_TINY = [16, 32] __C.YOLO.ANCHOR_PER_SCALE = 3 __C.YOLO.IOU_LOSS_THRESH = 0.5 -__C.YOLO.UPSAMPLE_METHOD = "resize" +__C.YOLO.UPSAMPLE_METHOD = 'resize' __C.YOLO.WIDTH_SCALE_V5 = 0.50 # yolov5 small:0.50 / middle:0.75 / large:1.00 / extend:1.25 __C.YOLO.DEPTH_SCALE_V5 = 0.33 # yolov5 small:0.33(1/3) / middle:0.67(2/3) / large:1.00 / extend:1.33(4/3) -__C.YOLO.ORIGINAL_WEIGHT = "./checkpoint/yolov3_coco.ckpt" -__C.YOLO.DEMO_WEIGHT = "./checkpoint/yolov3_coco_demo.ckpt" +__C.YOLO.ORIGINAL_WEIGHT = './checkpoint/yolov3_coco.ckpt' +__C.YOLO.DEMO_WEIGHT = './checkpoint/yolov3_coco_demo.ckpt' # Train options __C.TRAIN = edict() -__C.TRAIN.ANNOT_PATH = "./data/COCO/2017/train.txt" -__C.TRAIN.BATCH_SIZE = 6 +__C.TRAIN.ANNOT_PATH = './data/COCO/2017/train.txt' +__C.TRAIN.BATCH_SIZE = 16 if __C.YOLO.NET_TYPE == 'mobilenetv2' else 4 __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] __C.TRAIN.DATA_AUG = True __C.TRAIN.LEARN_RATE_INIT = 1e-4 @@ -42,21 +42,21 @@ __C.TRAIN.WARMUP_EPOCHS = 2 __C.TRAIN.FISRT_STAGE_EPOCHS = 20 __C.TRAIN.SECOND_STAGE_EPOCHS = 30 -__C.TRAIN.INITIAL_WEIGHT = "./weights/yolov4_coco.ckpt" -__C.TRAIN.CKPT_PATH = "./checkpoint" +__C.TRAIN.INITIAL_WEIGHT = './weights/yolov4_coco.ckpt' +__C.TRAIN.CKPT_PATH = './checkpoint' # TEST options __C.TEST = edict() -__C.TEST.ANNOT_PATH = "./data/dataset/voc_test.txt" +__C.TEST.ANNOT_PATH = './data/dataset/voc_test.txt' __C.TEST.BATCH_SIZE = 2 -__C.TEST.INPUT_SIZE = 544 +__C.TEST.INPUT_SIZE = 416 __C.TEST.DATA_AUG = False __C.TEST.WRITE_IMAGE = True -__C.TEST.WRITE_IMAGE_PATH = "./data/detection/" +__C.TEST.WRITE_IMAGE_PATH = './data/detection/' __C.TEST.WRITE_IMAGE_SHOW_LABEL = True -__C.TEST.WEIGHT_FILE = "./checkpoint/yolov3_test_loss=9.2099.ckpt-5" +__C.TEST.WEIGHT_FILE = './checkpoint/yolov3_test_loss=9.2099.ckpt-5' __C.TEST.SHOW_LABEL = True __C.TEST.SCORE_THRESHOLD = 0.3 __C.TEST.IOU_THRESHOLD = 0.45 diff --git a/core/yolov4.py b/core/yolov4.py index 641de59..e313a46 100644 --- a/core/yolov4.py +++ b/core/yolov4.py @@ -17,7 +17,7 @@ def mish(inputs): def conv(input_data, filters_shape, trainable, name, downsample=False, activate=True, bn=True, act_fun='leaky_relu'): - """Define Conv layer""" + '''Define Conv layer''' with tf.variable_scope(name): if downsample: pad_h, pad_w = (filters_shape[0] - 2) // 2 + 1, (filters_shape[1] - 2) // 2 + 1 @@ -57,7 +57,7 @@ def res_block(input_data, input_channel, filter_num1, filter_num2, trainable, na input_data = conv(input_data, filters_shape=(1, 1, input_channel, filter_num1), trainable=trainable, name='conv1', act_fun='mish') input_data = conv(input_data, filters_shape=(3, 3, filter_num1, filter_num2), - trainable=trainable, name='conv2', act_fun='mish') + trainable=trainable, name='conv2', act_fun='mish') residual_ouput = input_data + short_cut return residual_ouput @@ -78,11 +78,11 @@ def upsample(input_data, name, method='deconv'): def cspfirst_stage(input_data, trainable, filters): - """First csp stage. + '''First csp stage. param input_data: The input tensor param trainable: A bool parameter, True ==> training, False ==> not train. param filters: Filter nums - return: Output tensors and the last Conv layer counter of this stage""" + return: Output tensors and the last Conv layer counter of this stage''' c = filters route = input_data route = conv(route, (1, 1, c, c), trainable=trainable, name='conv2', act_fun='mish') @@ -98,7 +98,7 @@ def cspfirst_stage(input_data, trainable, filters): def cspstage(input_data, trainable, filters, loop, layer_nums, route_nums, res_nums): - """CSPNets stage + '''CSPNets stage param input_data: The input tensor param trainable: A bool parameter, True ==> training, False ==> not train. param filters: Filter nums @@ -106,7 +106,7 @@ def cspstage(input_data, trainable, filters, loop, layer_nums, route_nums, res_n param layer_nums: Counter of Conv layers param route_nums: Counter of route nums param res_nums: Counter of ResBlock nums - return: Output tensors and the last Conv layer counter of this stage""" + return: Output tensors and the last Conv layer counter of this stage''' c = filters out_layer = layer_nums + 1 + loop + 1 route = input_data @@ -122,10 +122,10 @@ def cspstage(input_data, trainable, filters, loop, layer_nums, route_nums, res_n def cspdarknet53(input_data, trainable): - """CSPDarknet53 body; source: https://arxiv.org/pdf/1911.11929.pdf + '''CSPDarknet53 body; source: https://arxiv.org/pdf/1911.11929.pdf param input_data: Input tensor param trainable: A bool parameter, True ==> training, False ==> not train. - return: Three stage tensors""" + return: Three stage tensors''' input_data = conv(input_data, (3, 3, 3, 32), trainable=trainable, name='conv0', act_fun='mish') input_data = conv(input_data, (3, 3, 32, 64), trainable=trainable, name='conv1', downsample=True, act_fun='mish') @@ -181,8 +181,8 @@ def __init__(self, input_data, trainable): def __build_network(self, input_data): - """Build yolov4 body, including SPP, PAN, Yolov3 Head/Neck. - param input_data: Input tensor, return: Three stage outputs""" + '''Build yolov4 body, including SPP, PAN, Yolov3 Head/Neck. + param input_data: Input tensor, return: Three stage outputs''' route_1, route_2, input_data = cspdarknet53(input_data, self.trainable) # 19 x 19 head @@ -261,11 +261,11 @@ def __build_network(self, input_data): def decode(self, conv_ouput, anchors, strides): - """Decode yolov4, use sigmoid decode conv_output. + '''Decode yolov4, use sigmoid decode conv_output. param conv_ouput: The output of yolov4 body. param anchors: The anchors param strides: Three dimensions, default [8, 16, 32] - return: The predict of conv_output""" + return: The predict of conv_output''' conv_shape = tf.shape(conv_ouput) batch_size = conv_shape[0] output_size = conv_shape[1] @@ -294,10 +294,10 @@ def decode(self, conv_ouput, anchors, strides): def bbox_iou(self, boxes1, boxes2): - """Calculate bbox iou; source: + '''Calculate bbox iou; source: param boxes1: Tensor, shape=(i1,...,iN, 4), xywh param boxes2: Tensor, shape=(j, 4), xywh - return: Tensor, shape=(i1,...,iN, j)""" + return: Tensor, shape=(i1,...,iN, j)''' boxes1_area = boxes1[..., 2] * boxes1[..., 3] boxes2_area = boxes2[..., 2] * boxes2[..., 3] @@ -316,10 +316,10 @@ def bbox_iou(self, boxes1, boxes2): def bbox_giou(self, boxes1, boxes2): - """Calculate giou loss; source: https://arxiv.org/abs/1902.09630 + '''Calculate giou loss; source: https://arxiv.org/abs/1902.09630 param boxes1: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh param boxes2: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh - return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)""" + return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)''' boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) @@ -347,10 +347,10 @@ def bbox_giou(self, boxes1, boxes2): def bbox_diou(self, boxes1, boxes2): - """Calculate diou; source: https://arxiv.org/pdf/1911.08287v1.pdf + '''Calculate diou; source: https://arxiv.org/pdf/1911.08287v1.pdf param boxes1: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh param boxes2: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh - return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)""" + return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)''' boxes1_center, boxes2_center = boxes1[..., :2], boxes2[..., :2] boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) @@ -380,10 +380,10 @@ def bbox_diou(self, boxes1, boxes2): def bbox_ciou(self, boxes1, boxes2): - """Calculate ciou; source: https://arxiv.org/pdf/1911.08287v1.pdf + '''Calculate ciou; source: https://arxiv.org/pdf/1911.08287v1.pdf param boxes1: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh param boxes2: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh - return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)""" + return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)''' boxes1_1, boxes2_1 = boxes1, boxes2 boxes1_center, boxes2_center = boxes1[..., :2], boxes2[..., :2] @@ -419,24 +419,24 @@ def bbox_ciou(self, boxes1, boxes2): def focal_loss(self, y_true, y_pred, gamma=2.0, alpha=1): - """Compute focal loss; source:https://arxiv.org/abs/1708.02002 + '''Compute focal loss; source:https://arxiv.org/abs/1708.02002 param y_true: Ground truth targets, tensor of shape (?, num_boxes, num_classes). param y_pred: Predicted logits, tensor of shape (?, num_boxes, num_classes). param gamma: Exponent of the modulating factor (1 - p_t) ^ gamma. param alpha: Optional alpha weighting factor to balance positives vs negatives. - return: Focal factor""" + return: Focal factor''' focal_loss = alpha * tf.pow(tf.abs(y_true - y_pred), gamma) return focal_loss def _label_smoothing(self, y_true, label_smoothing): - """Label smoothing. source: https://arxiv.org/pdf/1906.02629.pdf""" + '''Label smoothing. source: https://arxiv.org/pdf/1906.02629.pdf''' label_smoothing = tf.constant(label_smoothing, dtype=tf.float32) return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing def yolov4_loss(self, conv, pred, label, bboxes, stride, iou_use=1, focal_use=False, label_smoothing=0): - """Reture yolov4_loss tensor. + '''Reture yolov4_loss tensor. param conv: The outputs of yolov4 body, conv_sbbox, conv_mbbox, conv_lbbox param pred: The outputs of decode, pred_sbbox, pred_mbbox, pred_lbbox param label: The input label boxes @@ -445,7 +445,7 @@ def yolov4_loss(self, conv, pred, label, bboxes, stride, iou_use=1, focal_use=Fa param iou_use: The iou loss (0, 1, 2) ==> (giou, diou, ciou) param focal_use: The focal loss (0, 1, 2) ==> (normal, sigmoid_focal, focal) param label_smoothing: The label smoothing - return: Tensor, shape=(1,)""" + return: Tensor, shape=(1,)''' conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] @@ -498,7 +498,7 @@ def yolov4_loss(self, conv, pred, label, bboxes, stride, iou_use=1, focal_use=Fa def compute_loss(self, label_sbbox, label_mbbox, label_lbbox, true_sbbox, true_mbbox, true_lbbox, iou_use, focal_use, label_smoothing): - """Compute loss; location loss, confidence loss, class prob loss """ + '''Compute loss; location loss, confidence loss, class prob loss ''' with tf.name_scope('smaller_box_loss'): loss_sbbox = self.yolov4_loss(self.conv_sbbox, self.pred_sbbox, label_sbbox, true_sbbox, stride=self.strides[0], iou_use=iou_use, focal_use=focal_use, label_smoothing=label_smoothing) diff --git a/core/yolov5.py b/core/yolov5.py index 06ebdca..07c00cd 100644 --- a/core/yolov5.py +++ b/core/yolov5.py @@ -54,9 +54,9 @@ def res_block(input_data, input_channel, filter_num1, filter_num2, trainable, na short_cut = input_data with tf.variable_scope(name): input_data = conv(input_data, filters_shape=(1, 1, input_channel, filter_num1), - trainable=trainable, name='conv1') + trainable=trainable, name='conv1', act_fun='mish') input_data = conv(input_data, filters_shape=(3, 3, filter_num1, filter_num2), - trainable=trainable, name='conv2', ) + trainable=trainable, name='conv2', act_fun='mish') residual_ouput = input_data + short_cut return residual_ouput @@ -76,28 +76,8 @@ def upsample(input_data, name, method='deconv'): return output -def cspfirst_stage(input_data, trainable, filters): - """First csp stage. - param input_data: The input tensor - param trainable: A bool parameter, True ==> training, False ==> not train. - param filters: Filter nums - return: Output tensors and the last Conv layer counter of this stage""" - c = filters - route = input_data - route = conv(route, (1, 1, c, c), trainable=trainable, name='conv2') - input_data = conv(input_data, (1, 1, c, c), trainable=trainable, name='conv3') - - for i in range(1): - input_data = res_block(input_data, c, c / 2, c, trainable=trainable, name='residual%d' % (i + 0)) - - input_data = conv(input_data, (1, 1, c, c), trainable=trainable, name='conv6') - input_data = tf.concat([input_data, route], axis=-1) - layer_nums = 6 - return input_data, layer_nums - - def cspstage(input_data, trainable, filters, loop, layer_nums, route_nums, res_nums): - """CSPNets stage + '''CSPNets stage param input_data: The input tensor param trainable: A bool parameter, True ==> training, False ==> not train. param filters: Filter nums @@ -105,54 +85,58 @@ def cspstage(input_data, trainable, filters, loop, layer_nums, route_nums, res_n param layer_nums: Counter of Conv layers param route_nums: Counter of route nums param res_nums: Counter of ResBlock nums - return: Output tensors and the last Conv layer counter of this stage""" + return: Output tensors and the last Conv layer counter of this stage''' c = filters out_layer = layer_nums + 1 + loop + 1 route = input_data - route = conv(route, (1, 1, c, c / 2), trainable=trainable, name='conv_route%d' % route_nums) - input_data = conv(input_data, (1, 1, c, c / 2), trainable=trainable, name='conv%d' % (layer_nums + 1)) + route = conv(route, (1, 1, c, c / 2), trainable=trainable, name='conv_route%d' % route_nums, act_fun='mish') + input_data = conv(input_data, (1, 1, c, c / 2), trainable=trainable, name='conv%d' % (layer_nums + 1), act_fun='mish') for i in range(loop): input_data = res_block(input_data, c / 2, c / 2, c / 2, trainable=trainable, name='residual%d' % (i + res_nums)) - input_data = conv(input_data, (1, 1, c / 2, c / 2), trainable=trainable, name='conv%d' % out_layer) + input_data = conv(input_data, (1, 1, c / 2, c / 2), trainable=trainable, name='conv%d' % out_layer, act_fun='mish') input_data = tf.concat([input_data, route], axis=-1) return input_data, out_layer def cspdarknet53(input_data, trainable, init_width_size, init_depth_size): - """CSPDarknet53 body; source: https://arxiv.org/pdf/1911.11929.pdf + '''CSPDarknet53 body; source: https://arxiv.org/pdf/1911.11929.pdf param input_data: Input tensor param trainable: A bool parameter, True ==> training, False ==> not train. - return: Three stage tensors""" + return: Three stage tensors''' # for debug to print net layers' shape, need to remark while train/val/test phase #input_data = tf.reshape(input_data, [-1, 608, 608, 3]) # 3x608x608 -> 64x608x608 - input_data = conv(input_data, (3, 3, 3, init_width_size), trainable=trainable, name='conv0') + input_data = conv(input_data, (3, 3, 3, init_width_size), trainable=trainable, name='conv0', act_fun='mish') # 64x608x608 -> 128x304x304 - input_data = conv(input_data, (1, 1, init_width_size, 2*init_width_size), trainable=trainable, name='conv1', downsample=True) + input_data = conv(input_data, (1, 1, init_width_size, 2*init_width_size), trainable=trainable, name='conv1', + downsample=True, act_fun='mish') layer_num = 1 input_data, layer_num = cspstage(input_data, trainable, 2*init_width_size, init_depth_size, layer_num, 1, 1) # 128x304x304 -> 256x152x152 - input_data = conv(input_data, (3, 3, 2*init_width_size, 4*init_width_size), trainable=trainable, name='conv%d' % (layer_num + 1), downsample=True) + input_data = conv(input_data, (3, 3, 2*init_width_size, 4*init_width_size), trainable=trainable, + name='conv%d' % (layer_num + 1), downsample=True, act_fun='mish') route_1 = input_data layer_num = layer_num + 1 input_data, layer_num = cspstage(input_data, trainable, 4*init_width_size, 3*init_depth_size, layer_num, 2, 1+init_depth_size) # 256x152x152 -> 512x76x76 - input_data = conv(input_data, (3, 3, 4*init_width_size, 8*init_width_size), trainable=trainable, name='conv%d' % (layer_num + 1), downsample=True) + input_data = conv(input_data, (3, 3, 4*init_width_size, 8*init_width_size), trainable=trainable, + name='conv%d' % (layer_num + 1), downsample=True, act_fun='mish') route_2 = input_data layer_num = layer_num + 1 input_data, layer_num = cspstage(input_data, trainable, 8*init_width_size, 3*init_depth_size, layer_num, 3, 1+4*init_depth_size) # 512x76x76 -> 1024x38x38 - input_data = conv(input_data, (3, 3, 8*init_width_size, 16*init_width_size), trainable=trainable, name='conv%d' % (layer_num + 1), downsample=True) + input_data = conv(input_data, (3, 3, 8*init_width_size, 16*init_width_size), trainable=trainable, + name='conv%d' % (layer_num + 1), downsample=True, act_fun='mish') route_3 = input_data #SPP @@ -162,7 +146,8 @@ def cspdarknet53(input_data, trainable, init_width_size, init_depth_size): input_data = tf.concat([maxpool1, maxpool2, maxpool3, input_data], axis=-1) # 4096x38x38 -> 1024x38x38 - input_data = conv(input_data, (1, 1, 64*init_width_size, 16*init_width_size), trainable=trainable, name='conv%d' % (layer_num + 2), downsample=True) + input_data = conv(input_data, (1, 1, 64*init_width_size, 16*init_width_size), trainable=trainable, + name='conv%d' % (layer_num + 2), downsample=True, act_fun='mish') last_layer_num = layer_num + 2 return route_1, route_2, route_3, input_data, last_layer_num @@ -195,8 +180,8 @@ def __init__(self, input_data, trainable): def __build_network(self, input_data): - """Build yolov5 body, including SPP, PAN, Yolov3/v4 Head/Neck. - param input_data: Input tensor, return: Three stage outputs""" + '''Build yolov5 body, including SPP, PAN, Yolov3/v4 Head/Neck. + param input_data: Input tensor, return: Three stage outputs''' init_width_size = int(64 * self.width_scale) if self.depth_scale == 0.33: @@ -294,11 +279,11 @@ def __build_network(self, input_data): def decode(self, conv_ouput, anchors, strides): - """Decode yolov5, use sigmoid decode conv_output. + '''Decode yolov5, use sigmoid decode conv_output. param conv_ouput: The output of yolov5 body. param anchors: The anchors param strides: Three dimensions, default [8, 16, 32] - return: The predict of conv_output""" + return: The predict of conv_output''' conv_shape = tf.shape(conv_ouput) batch_size = conv_shape[0] output_size = conv_shape[1] @@ -327,10 +312,10 @@ def decode(self, conv_ouput, anchors, strides): def bbox_iou(self, boxes1, boxes2): - """Calculate bbox iou; source: + '''Calculate bbox iou; source: param boxes1: Tensor, shape=(i1,...,iN, 4), xywh param boxes2: Tensor, shape=(j, 4), xywh - return: Tensor, shape=(i1,...,iN, j)""" + return: Tensor, shape=(i1,...,iN, j)''' boxes1_area = boxes1[..., 2] * boxes1[..., 3] boxes2_area = boxes2[..., 2] * boxes2[..., 3] @@ -349,10 +334,10 @@ def bbox_iou(self, boxes1, boxes2): def bbox_giou(self, boxes1, boxes2): - """Calculate giou loss; source: https://arxiv.org/abs/1902.09630 + '''Calculate giou loss; source: https://arxiv.org/abs/1902.09630 param boxes1: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh param boxes2: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh - return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)""" + return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)''' boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) @@ -380,10 +365,10 @@ def bbox_giou(self, boxes1, boxes2): def bbox_diou(self, boxes1, boxes2): - """Calculate diou; source: https://arxiv.org/pdf/1911.08287v1.pdf + '''Calculate diou; source: https://arxiv.org/pdf/1911.08287v1.pdf param boxes1: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh param boxes2: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh - return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)""" + return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)''' boxes1_center, boxes2_center = boxes1[..., :2], boxes2[..., :2] boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) @@ -413,10 +398,10 @@ def bbox_diou(self, boxes1, boxes2): def bbox_ciou(self, boxes1, boxes2): - """Calculate ciou; source: https://arxiv.org/pdf/1911.08287v1.pdf + '''Calculate ciou; source: https://arxiv.org/pdf/1911.08287v1.pdf param boxes1: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh param boxes2: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh - return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)""" + return: Tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)''' boxes1_1, boxes2_1 = boxes1, boxes2 boxes1_center, boxes2_center = boxes1[..., :2], boxes2[..., :2] @@ -452,18 +437,18 @@ def bbox_ciou(self, boxes1, boxes2): def focal_loss(self, y_true, y_pred, gamma=2.0, alpha=1): - """Compute focal loss; source:https://arxiv.org/abs/1708.02002 + '''Compute focal loss; source:https://arxiv.org/abs/1708.02002 param y_true: Ground truth targets, tensor of shape (?, num_boxes, num_classes). param y_pred: Predicted logits, tensor of shape (?, num_boxes, num_classes). param gamma: Exponent of the modulating factor (1 - p_t) ^ gamma. param alpha: Optional alpha weighting factor to balance positives vs negatives. - return: Focal factor""" + return: Focal factor''' focal_loss = alpha * tf.pow(tf.abs(y_true - y_pred), gamma) return focal_loss def _label_smoothing(self, y_true, label_smoothing): - """Label smoothing. source: https://arxiv.org/pdf/1906.02629.pdf""" + '''Label smoothing. source: https://arxiv.org/pdf/1906.02629.pdf''' label_smoothing = tf.constant(label_smoothing, dtype=tf.float32) #return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing @@ -476,7 +461,7 @@ def _label_smoothing(self, y_true, label_smoothing): def yolov5_loss(self, conv, pred, label, bboxes, stride, iou_use=1, focal_use=False, label_smoothing=0): - """Reture yolov5_loss tensor. + '''Reture yolov5_loss tensor. param conv: The outputs of yolov5 body, conv_sbbox, conv_mbbox, conv_lbbox param pred: The outputs of decode, pred_sbbox, pred_mbbox, pred_lbbox param label: The input label boxes @@ -485,7 +470,7 @@ def yolov5_loss(self, conv, pred, label, bboxes, stride, iou_use=1, focal_use=Fa param iou_use: The iou loss (0, 1, 2) ==> (giou, diou, ciou) param focal_use: The focal loss (0, 1, 2) ==> (normal, sigmoid_focal, focal) param label_smoothing: The label smoothing - return: Tensor, shape=(1,)""" + return: Tensor, shape=(1,)''' conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] @@ -538,7 +523,7 @@ def yolov5_loss(self, conv, pred, label, bboxes, stride, iou_use=1, focal_use=Fa def compute_loss(self, label_sbbox, label_mbbox, label_lbbox, true_sbbox, true_mbbox, true_lbbox, iou_use, focal_use, label_smoothing): - """Compute loss; location loss, confidence loss, class prob loss """ + '''Compute loss; location loss, confidence loss, class prob loss ''' with tf.name_scope('smaller_box_loss'): loss_sbbox = self.yolov5_loss(self.conv_sbbox, self.pred_sbbox, label_sbbox, true_sbbox, stride=self.strides[0], iou_use=iou_use, focal_use=focal_use, label_smoothing=label_smoothing) diff --git a/show_layer_img.py b/show_layer_feature_map.py similarity index 83% rename from show_layer_img.py rename to show_layer_feature_map.py index d5f251d..1eae5e8 100644 --- a/show_layer_img.py +++ b/show_layer_feature_map.py @@ -14,7 +14,7 @@ if __name__ == '__main__': argv = sys.argv if len(argv) < 5: - print('usage: python show_layer_img.py gpu_id pb_file img_file out_path') + print('usage: python show_layer_feature_map.py gpu_id pb_file img_file out_path') sys.exit() gpu_id = argv[1] @@ -33,7 +33,8 @@ out_path = argv[4] if not os.path.exists(out_path): os.makedirs(out_path) - print('show_layer_img gpu_id=%s, pb_file=%s, img_file=%s, out_path=%s' % (gpu_id, pb_file, img_file, out_path)) + print('show_layer_feature_map gpu_id=%s, pb_file=%s, img_file=%s, out_path=%s' % + (gpu_id, pb_file, img_file, out_path)) input_size = 416 img = cv2.imread(img_file) @@ -57,7 +58,8 @@ features = np.array(conv.eval({return_tensors[0]: image_data})) print('\n[%d/%d] %s' % (idx, len(conv_layer_names), layer_name), ' features.shape=', features.shape) - out_layer_path = os.path.join(out_path, '%s-%s' % (layer_name.replace('/', '_'), str(features.shape[3]))) + out_layer_path = os.path.join(out_path, '%s-%sx%sx%s' % (layer_name.replace('/', '_'), str(features.shape[1]), + str(features.shape[2]), str(features.shape[3]))) if not os.path.exists(out_layer_path): os.makedirs(out_layer_path) diff --git a/train.py b/train.py index c6031f4..5526108 100644 --- a/train.py +++ b/train.py @@ -38,7 +38,7 @@ def __init__(self, net_type): self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.max_bbox_per_scale = 150 - self.train_logdir = ("./%s/log/train" % net_type) + self.train_logdir = ('./%s/log/train' % net_type) if not os.path.exists(self.train_logdir): os.makedirs(self.train_logdir) @@ -68,7 +68,7 @@ def __init__(self, net_type): self.true_lbboxes = tf.placeholder(dtype=tf.float32, name='lbboxes') self.trainable = tf.placeholder(dtype=tf.bool, name='training') - with tf.name_scope("define_loss"): + with tf.name_scope('define_loss'): if self.net_type == 'tiny': self.model = YOLOV3Tiny(self.input_data, self.trainable) self.net_var = tf.global_variables() @@ -113,10 +113,10 @@ def __init__(self, net_type): (1 + tf.cos((self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))) global_step_update = tf.assign_add(self.global_step, 1.0) - with tf.name_scope("define_weight_decay"): + with tf.name_scope('define_weight_decay'): moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables()) - with tf.name_scope("define_first_stage_train"): + with tf.name_scope('define_first_stage_train'): self.first_stage_trainable_var_list = [] for var in tf.trainable_variables(): var_name = var.op.name @@ -136,7 +136,7 @@ def __init__(self, net_type): with tf.control_dependencies([moving_ave]): self.train_op_with_frozen_variables = tf.no_op() - with tf.name_scope("define_second_stage_train"): + with tf.name_scope('define_second_stage_train'): second_stage_trainable_var_list = tf.trainable_variables() second_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss, var_list=second_stage_trainable_var_list) @@ -150,13 +150,13 @@ def __init__(self, net_type): self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) with tf.name_scope('summary'): - tf.summary.scalar("learn_rate", self.learn_rate) - tf.summary.scalar("iou_loss", self.iou_loss) - tf.summary.scalar("conf_loss", self.conf_loss) - tf.summary.scalar("prob_loss", self.prob_loss) - tf.summary.scalar("total_loss", self.loss) + tf.summary.scalar('learn_rate', self.learn_rate) + tf.summary.scalar('iou_loss', self.iou_loss) + tf.summary.scalar('conf_loss', self.conf_loss) + tf.summary.scalar('prob_loss', self.prob_loss) + tf.summary.scalar('total_loss', self.loss) - logdir = ("./%s/log/" % self.net_type) + logdir = ('./%s/log/' % self.net_type) if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) @@ -172,7 +172,7 @@ def train(self): self.loader.restore(self.sess, self.initial_weight) except: print('=> %s does not exist !!!' % self.initial_weight) - print('=> Now it starts to train YOLOV4 from scratch ...') + print('=> Now it starts to train YOLO-%s from scratch ...' % self.net_type) self.first_stage_epochs = 0 saving = 0.0 @@ -203,7 +203,7 @@ def train(self): train_epoch_loss.append(train_step_loss) self.summary_writer.add_summary(summary, global_step_val) - pbar.set_description("train loss: %.2f" %train_step_loss) + pbar.set_description('train loss: %.2f' %train_step_loss) for test_data in self.testset: if net_type == 'tiny': @@ -223,20 +223,20 @@ def train(self): train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss) train_epoch_loss = np.mean(train_epoch_loss) - ckpt_file = os.path.join(self.ckpt_path, "yolov4_test_loss=%.4f.ckpt" % test_epoch_loss) + ckpt_file = os.path.join(self.ckpt_path, 'yolov4_test_loss=%.4f.ckpt' % test_epoch_loss) log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if saving == 0.0: saving = train_epoch_loss - print("=> Epoch: %2d Time: %s Train loss: %.2f" % (epoch, log_time, train_epoch_loss)) + print('=> Epoch: %2d Time: %s Train loss: %.2f' % (epoch, log_time, train_epoch_loss)) elif saving > train_epoch_loss: - print("=> Epoch: %2d Time: %s Train loss: %.2f Test loss: %.2f Saving %s ..." % + print('=> Epoch: %2d Time: %s Train loss: %.2f Test loss: %.2f Saving %s ...' % (epoch, log_time, train_epoch_loss, test_epoch_loss, ckpt_file)) self.saver.save(self.sess, ckpt_file, global_step=epoch) saving = train_epoch_loss else: - print("=> Epoch: %2d Time: %s Train loss: %.2f" % (epoch, log_time, train_epoch_loss)) + print('=> Epoch: %2d Time: %s Train loss: %.2f' % (epoch, log_time, train_epoch_loss)) self.saver.save(self.sess, '')