diff --git a/.gitignore b/.gitignore index 0293450db..472a25638 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,7 @@ __pycache__/ .coverage .coverage.* coverage.xml -*.cover \ No newline at end of file +*.cover + +# IDE +.idea diff --git a/keras_retinanet/bin/build_hdf5.py b/keras_retinanet/bin/build_hdf5.py new file mode 100644 index 000000000..3a2f31b43 --- /dev/null +++ b/keras_retinanet/bin/build_hdf5.py @@ -0,0 +1,115 @@ +import argparse + +import h5py +import numpy as np +from progressbar import progressbar + +from ..preprocessing.csv_generator import CSVGenerator +from ..models import backbone + + +def parse(): + parser = argparse.ArgumentParser(description='Simple script for building an HDF5 file for retinanet training.') + + parser.add_argument('--train-annotations', + help='Path to CSV file containing annotations for training.', + required=True) + parser.add_argument('--val-annotations', + help='Path to CSV file containing annotations for validation (optional).') + parser.add_argument('--classes', + help='Path to a CSV file containing class label mapping.', + required=True) + parser.add_argument('--dest-file', + help='Path to destination HDF5 file.', + required=True) + + parser.add_argument('--backbone-to-use', + help='Backbone that will be used in training.', + default='resnet50', + type=str) + parser.add_argument('--image-min-side', + help='Rescale the image so the smallest side is min_side.', + type=int, + default=800) + parser.add_argument('--image-max-side', + help='Rescale the image if the largest side is larger than max_side.', + type=int, + default=1333) + parser.add_argument('--no-resize', + help='Don\'t rescale the image.', + action='store_true') + + args = parser.parse_args() + + return args + + +def main(): + args = parse() + annotations_csv = { + 'train': args.train_annotations, + 'val': args.val_annotations, + } + classes_csv = args.classes + dataset_file = args.dest_file + + common_args = { + 'batch_size' : 1, + 'image_min_side' : args.image_min_side, + 'image_max_side' : args.image_max_side, + 'no_resize' : args.no_resize, + 'preprocess_image' : backbone(args.backbone_to_use).preprocess_image, + } + + transform_generator = None + visual_effect_generator = None + + for split in ['train', 'val']: + if not annotations_csv[split]: + continue + + generator = CSVGenerator( + annotations_csv[split], + classes_csv, + transform_generator=transform_generator, + visual_effect_generator=visual_effect_generator, + **common_args + ) + + # Computing the data that will be stored + # H5py does not allow variable length arrays of more than 1 dimension + # so we save the shapes to be able to reconstruct them. + # Also preprocessed images are saved so they don't have to be preprocessed avery time they are used in training. + all_images_group = [] + labels_group = [] + bboxes_group = [] + shapes_group = [] + + for i in progressbar(range(generator.size()), prefix=f'{split}: '): + group = [i] + image_group = generator.load_image_group(group) + annotations_group = generator.load_annotations_group(group) + + image_group, annotations_group = generator.filter_annotations(image_group, annotations_group, group) + image_group, annotations_group = generator.preprocess_group(image_group, annotations_group) + + shapes_group += [image_group[0].shape] + all_images_group += [image_group[0].reshape(-1)] + labels_group += [annotations_group[0]['labels']] + bboxes_group += [annotations_group[0]['bboxes'].reshape(-1)] + + save_classes = [k for k in generator.classes] + + # Creating and filling the hdf5 file. We use special dtypes because we have variable lengths in our variables + dt = h5py.special_dtype(vlen=np.dtype('float64')) + st = h5py.special_dtype(vlen=str) + print(f'Saving {split}...') + with h5py.File(dataset_file, 'a') as hf: + hf.create_dataset(f'{split}/img', data=all_images_group, compression='gzip', compression_opts=9, dtype=dt) + hf.create_dataset(f'{split}/shapes', data=shapes_group, compression='gzip', compression_opts=9) + hf.create_dataset(f'{split}/labels', data=labels_group, compression='gzip', compression_opts=9, dtype=dt) + hf.create_dataset(f'{split}/bboxes', data=bboxes_group, compression='gzip', compression_opts=9, dtype=dt) + if split == 'train': + hf.create_dataset('classes', data=np.string_(save_classes), compression='gzip', compression_opts=9, dtype=st) + print(f'[OK] {split}') + diff --git a/keras_retinanet/bin/train.py b/keras_retinanet/bin/train.py index 2626fed1d..23735bf25 100755 --- a/keras_retinanet/bin/train.py +++ b/keras_retinanet/bin/train.py @@ -39,6 +39,7 @@ from ..callbacks.eval import Evaluate from ..models.retinanet import retinanet_bbox from ..preprocessing.csv_generator import CSVGenerator +from ..preprocessing.hdf5_generator import HDF5Generator from ..preprocessing.kitti import KittiGenerator from ..preprocessing.open_images import OpenImagesGenerator from ..preprocessing.pascal_voc import PascalVocGenerator @@ -352,6 +353,23 @@ def create_generators(args, preprocess_image): shuffle_groups=False, **common_args ) + + elif args.dataset_type == 'hdf5': + train_generator = HDF5Generator( + args.dataset_file, + 'train', + transform_generator=transform_generator, + visual_effect_generator=visual_effect_generator, + **common_args, + ) + try: + validation_generator = HDF5Generator( + args.dataset_file, + 'val', + **common_args) + except KeyError: + validation_generator = None + else: raise ValueError('Invalid data type received: {}'.format(args.dataset_type)) @@ -421,6 +439,9 @@ def csv_list(string): csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.') csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).') + hdf5_parser = subparsers.add_parser('hdf5') + hdf5_parser.add_argument('dataset_file', help='Path to HDF5 file containing dataset for training.') + group = parser.add_mutually_exclusive_group() group.add_argument('--snapshot', help='Resume training from a snapshot.') group.add_argument('--imagenet-weights', help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True) diff --git a/keras_retinanet/preprocessing/hdf5_generator.py b/keras_retinanet/preprocessing/hdf5_generator.py new file mode 100644 index 000000000..bdbf92812 --- /dev/null +++ b/keras_retinanet/preprocessing/hdf5_generator.py @@ -0,0 +1,104 @@ +from collections import OrderedDict + +import h5py + +from .generator import Generator + + +class HDF5Generator(Generator): + + def __init__( + self, + hdf5_file, + partition, + **kwargs + ): + with h5py.File(hdf5_file, 'r') as hf: + self.images = list(hf[partition]['img']) + shapes = list(hf[partition]['shapes']) + self.labels = list(hf[partition]['labels']) + self.bboxes = list(hf[partition]['bboxes']) + self.classes = list(hf['classes']) + + # hdf5 only allows storage of unidimensional arrays if they have different lengths + self.images = [img.reshape(shapes[i]) for i, img in enumerate(self.images)] + self.bboxes = [box.reshape(-1, 4) for box in self.bboxes] + self.classes = OrderedDict({key: i for i, key in enumerate(self.classes)}) + + self.labels_dict = {} + for key, value in self.classes.items(): + self.labels_dict[value] = key + + super(HDF5Generator, self).__init__(**kwargs) + + def size(self): + return len(self.images) + + def num_classes(self): + """ Number of classes in the dataset. + """ + return max(self.classes.values()) + 1 + + def image_aspect_ratio(self, image_index): + """ Compute the aspect ratio for an image with image_index. + """ + return float(self.images[image_index].shape[1]) / float(self.images[image_index].shape[0]) + + def get_image_group(self, group): + return [self.images[i] for i in group] + + def get_annotations_group(self, group): + return [{'labels': self.labels[i], + 'bboxes': self.bboxes[i]} for i in group] + + def has_label(self, label): + """ Return True if label is a known label. + """ + return label in self.labels_dict + + def has_name(self, name): + """ Returns True if name is a known class. + """ + return name in self.classes + + def name_to_label(self, name): + """ Map name to label. + """ + return self.classes[name] + + def label_to_name(self, label): + """ Map label to name. + """ + return self.labels_dict[label] + + def image_path(self, image_index): + return str(image_index) + + def load_image(self, image_index): + return self.images[image_index] + + def load_annotations(self, image_index): + return {'labels': self.labels[image_index], + 'bboxes': self.bboxes[image_index]} + + def compute_input_output(self, group): + """ Compute inputs and target outputs for the network. + """ + # load images and annotations + image_group = self.get_image_group(group) + annotations_group = self.get_annotations_group(group) + + # randomly apply visual effect + image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group) + + # randomly transform data + image_group, annotations_group = self.random_transform_group(image_group, annotations_group) + + # compute network inputs + inputs = self.compute_inputs(image_group) + + # compute network targets + targets = self.compute_targets(image_group, annotations_group) + + return inputs, targets + diff --git a/setup.py b/setup.py index 13fe6e5cb..88870c86c 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def run(self, *args, **kwargs): 'retinanet-evaluate=keras_retinanet.bin.evaluate:main', 'retinanet-debug=keras_retinanet.bin.debug:main', 'retinanet-convert-model=keras_retinanet.bin.convert_model:main', + 'retinanet-build-hdf5=keras_retinanet.bin.build_hdf5:main', ], }, ext_modules = extensions,