fizyr · madisi98 · Sep 20, 2020 · Sep 20, 2020 · Sep 20, 2020 · Oct 4, 2020
diff --git a/.gitignore b/.gitignore
@@ -20,4 +20,7 @@ __pycache__/
 .coverage
 .coverage.*
 coverage.xml
-*.cover
+*.cover
+
+# IDE
+.idea
diff --git a/keras_retinanet/bin/build_hdf5.py b/keras_retinanet/bin/build_hdf5.py
@@ -0,0 +1,115 @@
+import argparse
+
+import h5py
+import numpy as np
+from tqdm import tqdm
 import progressbar 
 import progressbar 
 import progressbar 
+
+from ..preprocessing.csv_generator import CSVGenerator
+from ..models import backbone
+
+
+def parse():
+    parser = argparse.ArgumentParser(description='Simple script for building an HDF5 file for retinanet training.')
+
+    parser.add_argument('--train-annotations',
+                        help='Path to CSV file containing annotations for training.',
+                        required=True)
+    parser.add_argument('--val-annotations',
+                        help='Path to CSV file containing annotations for validation (optional).')
+    parser.add_argument('--classes',
+                        help='Path to a CSV file containing class label mapping.',
+                        required=True)
+    parser.add_argument('--dest-file',
+                        help='Path to destination HDF5 file.',
+                        required=True)
+
+    parser.add_argument('--backbone-to-use',
+                        help='Backbone that will be used in training.',
+                        default='resnet50',
+                        type=str)
+    parser.add_argument('--image-min-side',
+                        help='Rescale the image so the smallest side is min_side.',
+                        type=int,
+                        default=800)
+    parser.add_argument('--image-max-side',
+                        help='Rescale the image if the largest side is larger than max_side.',
+                        type=int,
+                        default=1333)
+    parser.add_argument('--no-resize',
+                        help='Don\'t rescale the image.',
+                        action='store_true')
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse()
+    annotations_csv = {
+        'train': args.train_annotations,
+        'val': args.val_annotations,
+    }
+    classes_csv = args.classes
+    dataset_file = args.dest_file
+
+    common_args = {
+        'batch_size'       : 1,
+        'image_min_side'   : args.image_min_side,
+        'image_max_side'   : args.image_max_side,
+        'no_resize'        : args.no_resize,
+        'preprocess_image' : backbone(args.backbone_to_use).preprocess_image,
+    }
+
+    transform_generator = None
+    visual_effect_generator = None
+
+    for split in ['train', 'val']:
+        if not annotations_csv[split]:
+            continue
+
+        generator = CSVGenerator(
+            annotations_csv[split],
+            classes_csv,
+            transform_generator=transform_generator,
+            visual_effect_generator=visual_effect_generator,
+            **common_args
+        )
+
+        # Computing the data that will be stored
+        # H5py does not allow variable length arrays of more than 1 dimension
+        # so we save the shapes to be able to reconstruct them.
+        # Also preprocessed images are saved so they don't have to be preprocessed avery time they are used in training.
+        all_images_group = []
+        labels_group = []
+        bboxes_group = []
+        shapes_group = []
+
+        for i in tqdm(range(generator.size()), desc=f'{split}: '):
+            group = [i]
+            image_group = generator.load_image_group(group)
+            annotations_group = generator.load_annotations_group(group)
+
+            image_group, annotations_group = generator.filter_annotations(image_group, annotations_group, group)
+            image_group, annotations_group = generator.preprocess_group(image_group, annotations_group)
+
+            shapes_group += [image_group[0].shape]
+            all_images_group += [image_group[0].reshape(-1)]
+            labels_group += [annotations_group[0]['labels']]
+            bboxes_group += [annotations_group[0]['bboxes'].reshape(-1)]
+
+        save_classes = [k for k in generator.classes]
+
+        # Creating and filling the hdf5 file. We use special dtypes because we have variable lengths in our variables
+        dt = h5py.special_dtype(vlen=np.dtype('float64'))
+        st = h5py.special_dtype(vlen=str)
+        print(f'Saving {split}...')
+        with h5py.File(dataset_file, 'a') as hf:
+            hf.create_dataset(f'{split}/img', data=all_images_group, compression='gzip', compression_opts=9, dtype=dt)
+            hf.create_dataset(f'{split}/shapes', data=shapes_group, compression='gzip', compression_opts=9)
+            hf.create_dataset(f'{split}/labels', data=labels_group, compression='gzip', compression_opts=9, dtype=dt)
+            hf.create_dataset(f'{split}/bboxes', data=bboxes_group, compression='gzip', compression_opts=9, dtype=dt)
+            if split == 'train':
+                hf.create_dataset('classes', data=np.string_(save_classes), compression='gzip', compression_opts=9, dtype=st)
+        print(f'[OK] {split}')
+
diff --git a/keras_retinanet/bin/train.py b/keras_retinanet/bin/train.py
@@ -39,6 +39,7 @@
 from ..callbacks.eval import Evaluate
 from ..models.retinanet import retinanet_bbox
 from ..preprocessing.csv_generator import CSVGenerator
+from ..preprocessing.hdf5_generator import HDF5Generator
 from ..preprocessing.kitti import KittiGenerator
 from ..preprocessing.open_images import OpenImagesGenerator
 from ..preprocessing.pascal_voc import PascalVocGenerator
@@ -352,6 +353,23 @@ def create_generators(args, preprocess_image):
             shuffle_groups=False,
             **common_args
         )
+
+    elif args.dataset_type == 'hdf5':
+        train_generator = HDF5Generator(
+            args.dataset_file,
+            'train',
+            transform_generator=transform_generator,
+            visual_effect_generator=visual_effect_generator,
+            **common_args,
+        )
+        try:
+            validation_generator = HDF5Generator(
+                args.dataset_file,
+                'val',
+                **common_args)
+        except KeyError:
+            validation_generator = None
+
     else:
         raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
 
@@ -421,6 +439,9 @@ def csv_list(string):
     csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
     csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).')
 
+    hdf5_parser = subparsers.add_parser('hdf5')
+    hdf5_parser.add_argument('dataset_file', help='Path to HDF5 file containing dataset for training.')
+
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--snapshot',          help='Resume training from a snapshot.')
     group.add_argument('--imagenet-weights',  help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True)

diff --git a/keras_retinanet/preprocessing/hdf5_generator.py b/keras_retinanet/preprocessing/hdf5_generator.py
@@ -0,0 +1,104 @@
+from collections import OrderedDict
+
+import h5py
+
+from .generator import Generator
+
+
+class HDF5Generator(Generator):
+
+    def __init__(
+            self,
+            hdf5_file,
+            partition,
+            **kwargs
+    ):
+        with h5py.File(hdf5_file, 'r') as hf:
+            self.images = list(hf[partition]['img'])
+            shapes = list(hf[partition]['shapes'])
+            self.labels = list(hf[partition]['labels'])
+            self.bboxes = list(hf[partition]['bboxes'])
+            self.classes = list(hf['classes'])
+
+        # hdf5 only allows storage of unidimensional arrays if they have different lengths
+        self.images = [img.reshape(shapes[i]) for i, img in enumerate(self.images)]
+        self.bboxes = [box.reshape(-1, 4) for box in self.bboxes]
+        self.classes = OrderedDict({key: i for i, key in enumerate(self.classes)})
+
+        self.labels_dict = {}
+        for key, value in self.classes.items():
+            self.labels_dict[value] = key
+
+        super(HDF5Generator, self).__init__(**kwargs)
+
+    def size(self):
+        return len(self.images)
+
+    def num_classes(self):
+        """ Number of classes in the dataset.
+        """
+        return max(self.classes.values()) + 1
+
+    def image_aspect_ratio(self, image_index):
+        """ Compute the aspect ratio for an image with image_index.
+        """
+        return float(self.images[image_index].shape[1]) / float(self.images[image_index].shape[0])
+
+    def get_image_group(self, group):
+        return [self.images[i] for i in group]
+
+    def get_annotations_group(self, group):
+        return [{'labels': self.labels[i],
+                 'bboxes': self.bboxes[i]} for i in group]
+
+    def has_label(self, label):
+        """ Return True if label is a known label.
+        """
+        return label in self.labels_dict
+
+    def has_name(self, name):
+        """ Returns True if name is a known class.
+        """
+        return name in self.classes
+
+    def name_to_label(self, name):
+        """ Map name to label.
+        """
+        return self.classes[name]
+
+    def label_to_name(self, label):
+        """ Map label to name.
+        """
+        return self.labels_dict[label]
+
+    def image_path(self, image_index):
+        return str(image_index)
+
+    def load_image(self, image_index):
+        return self.images[image_index]
+
+    def load_annotations(self, image_index):
+        return {'labels': self.labels[image_index],
+                'bboxes': self.bboxes[image_index]}
+
+    def compute_input_output(self, group):
+        """ Compute inputs and target outputs for the network.
+        """
+        # load images and annotations
+        image_group = self.get_image_group(group)
+        annotations_group = self.get_annotations_group(group)
+
+        # randomly apply visual effect
+        image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group)
+
+        # randomly transform data
+        image_group, annotations_group = self.random_transform_group(image_group, annotations_group)
+
+        # compute network inputs
+        inputs = self.compute_inputs(image_group)
+
+        # compute network targets
+        targets = self.compute_targets(image_group, annotations_group)
+
+        return inputs, targets
+
diff --git a/setup.py b/setup.py
@@ -60,6 +60,7 @@ def run(self, *args, **kwargs):
             'retinanet-evaluate=keras_retinanet.bin.evaluate:main',
             'retinanet-debug=keras_retinanet.bin.debug:main',
             'retinanet-convert-model=keras_retinanet.bin.convert_model:main',
+            'retinanet-build-hdf5=keras_retinanet.bin.build_hdf5:main',
         ],
     },
     ext_modules    = extensions,