diff --git a/.ci/macos-steps.yaml b/.ci/macos-steps.yaml index d867d975..61974f9c 100644 --- a/.ci/macos-steps.yaml +++ b/.ci/macos-steps.yaml @@ -29,7 +29,7 @@ steps: # Configure CMake Args for models. - script: | - mkdir data && mkdir build && cd build && cmake $(CMakeArgs-models) .. + mkdir build && cd build && cmake $(CMakeArgs-models) .. displayName: 'CMake for models' # Build mlpack diff --git a/.ci/windows-steps.yaml b/.ci/windows-steps.yaml index 57e1be52..dfbf0bf8 100644 --- a/.ci/windows-steps.yaml +++ b/.ci/windows-steps.yaml @@ -101,7 +101,6 @@ steps: - powershell: | mkdir build - mkdir data cp $(Agent.ToolsDirectory)\boost_libs\*.* build\ cp $(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\lib\x64\*.* build\ cp $(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.* build\ diff --git a/.gitignore b/.gitignore index 609ef60d..6fc3b1f2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,15 @@ xcode* .DS_Store .idea cmake-build-* +data/* *.csv *.tar *.zip *.tar.gz +*.xml +*.jpeg +*.jpg +*.png +*.txt .travis/configs.hpp Testing/* diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..9e6bdfd6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,25 @@ +sudo: required +dist: xenial +language: cpp + +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq --no-install-recommends cmake binutils-dev libopenblas-dev liblapack-dev build-essential libboost-all-dev + - curl -O http://masterblaster.mlpack.org:5005/armadillo-8.400.0.tar.gz -o armadillo-8.400.0.tar.gz && tar xvzf armadillo-8.400.0.tar.gz && cd armadillo-8.400.0 + - cmake . && make && sudo make install + - cd $TRAVIS_BUILD_DIR && git clone https://github.com/mlpack/mlpack.git --depth 1 + - cd mlpack && mkdir mlpack_build && cd mlpack_build && cmake -DUSE_OPENMP=OFF -DBUILD_CLI_EXECUTABLES=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_TESTS=OFF .. && make -j2 && sudo make install + +install: + - cd $TRAVIS_BUILD_DIR && mkdir build && cd build && cmake -DUSE_OPENMP=OFF .. +script: + - make -j2 + +notifications: + email: + - mlpack-git@lists.mlpack.org + irc: + channels: + - "chat.freenode.net#mlpack" + on_success: change +on_failure: always diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cb103c2..d5a32184 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,6 +164,7 @@ find_package(Boost 1.49 COMPONENTS filesystem system + regex program_options serialization unit_test_framework diff --git a/augmentation/CMakeLists.txt b/augmentation/CMakeLists.txt new file mode 100644 index 00000000..40510d03 --- /dev/null +++ b/augmentation/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR) +project(augmentation) + +option(DEBUG "DEBUG" OFF) + +set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/) +include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../") + +set(SOURCES + augmentation.hpp +) + +foreach(file ${SOURCES}) + set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) +endforeach() + +# Append sources (with directory name) to list of all models sources (used at +# the parent scope). +set(DIRS ${DIRS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/augmentation/augmentation.hpp b/augmentation/augmentation.hpp new file mode 100644 index 00000000..3ed7763d --- /dev/null +++ b/augmentation/augmentation.hpp @@ -0,0 +1,172 @@ +/** + * @file augmentation.hpp + * @author Kartik Dutt + * + * Definition of Augmentation class for augmenting data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#include +#include + +#ifndef MODELS_AUGMENTATION_HPP +#define MODELS_AUGMENTATION_HPP + +/** + * Augmentation class used to perform augmentations / transform the data. + * For the list of supported augmentation, take a look at our wiki page. + * + * @code + * Augmentation<> augmentation({"horizontal-flip", "resize = (224, 224)"}, 0.2); + * augmentation.Transform(dataloader.TrainFeatures); + * @endcode + * + * @tparam DatasetType Datatype on which augmentation will be done. + */ +template +class Augmentation +{ + public: + //! Create the augmentation class object. + Augmentation(); + + /** + * Constructor for augmentation class. + * + * @param augmentations List of strings containing one of the supported + * augmentations. + * @param augmentationProbability Probability of applying augmentation on + * the dataset. + * NOTE : This doesn't apply to augmentations + * such as resize. + */ + Augmentation(const std::vector& augmentations, + const double augmentationProbability); + + /** + * Applies augmentation to the passed dataset. + * + * @param dataset Dataset on which augmentation will be applied. + * @param datapointWidth Width of a single data point i.e. + * Since each column represents a seperate data + * point. + * @param datapointHeight Height of a single data point. + * @param datapointDepth Depth of a single data point. For 2-dimensional + * data point, set it to 1. Defaults to 1. + */ + void Transform(DatasetType& dataset, + const size_t datapointWidth, + const size_t datapointHeight, + const size_t datapointDepth = 1); + + /** + * Applies resize transform to the entire dataset. + * + * @param dataset Dataset on which augmentation will be applied. + * @param datapointWidth Width of a single data point i.e. + * Since each column represents a seperate data + * point. + * @param datapointHeight Height of a single data point. + * @param datapointDepth Depth of a single data point. For 2-dimensional + * data point, set it to 1. Defaults to 1. + * @param augmentation String containing the transform. + */ + void ResizeTransform(DatasetType& dataset, + const size_t datapointWidth, + const size_t datapointHeight, + const size_t datapointDepth, + const std::string& augmentation); + + private: + /** + * Initializes augmentation map for the class. + */ + void InitializeAugmentationMap(); + + /** + * Function to determine if augmentation has Resize function. + * @param augmentation Optional argument to check if a string has + * resize substring. + */ + bool HasResizeParam(const std::string& augmentation = "") + { + if (augmentation.length()) + return augmentation.find("resize") != std::string::npos; + + + // Search in augmentation vector. + return augmentations.size() <= 0 ? false : + augmentations[0].find("resize") != std::string::npos; + } + + /** + * Sets size of output width and output height of the new data. + * + * @param outWidth Output width of resized data point. + * @param outHeight Output height of resized data point. + * @param augmentation String from which output width and height + * are extracted. + */ + void GetResizeParam(size_t& outWidth, + size_t& outHeight, + const std::string& augmentation) + { + if (!HasResizeParam()) + return; + + + outWidth = 0; + outHeight = 0; + + // Use regex to find one / two numbers. If only one provided + // set output width equal to output height. + boost::regex regex{"[0-9]+"}; + + // Create an iterator to find matches. + boost::sregex_token_iterator matches(augmentation.begin(), + augmentation.end(), regex, 0), end; + + size_t matchesCount = std::distance(matches, end); + + if (matchesCount == 0) + { + mlpack::Log::Fatal << "Invalid size / shape in " << + augmentation << std::endl; + } + + if (matchesCount == 1) + { + outWidth = std::stoi(*matches); + outHeight = outWidth; + } + else + { + outWidth = std::stoi(*matches); + matches++; + outHeight = std::stoi(*matches); + } + } + + //! Locally held augmentations / transforms that need to be applied. + std::vector augmentations; + + //! Locally held value of augmentation probability. + double augmentationProbability; + + //! Locally help map for mapping functions and strings. + std::unordered_map augmentationMap; + + // The dataloader class should have access to internal functions of + // the dataloader. + template + friend class DataLoader; +}; + +#include "augmentation_impl.hpp" // Include implementation. + +#endif diff --git a/augmentation/augmentation_impl.hpp b/augmentation/augmentation_impl.hpp new file mode 100644 index 00000000..3d966571 --- /dev/null +++ b/augmentation/augmentation_impl.hpp @@ -0,0 +1,104 @@ +/** + * @file augmentation_impl.hpp + * @author Kartik Dutt + * + * Implementation of Augmentation class for augmenting data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +// Incase it has not been included already. +#include "augmentation.hpp" + +#ifndef MODELS_AUGMENTATION_IMPL_HPP +#define MODELS_AUGMENTATION_IMPL_HPP + +template +Augmentation::Augmentation() : + augmentations(std::vector()), + augmentationProbability(0.2) +{ + // Nothing to do here. +} + +template +Augmentation::Augmentation( + const std::vector& augmentations, + const double augmentationProbability) : + augmentations(augmentations), + augmentationProbability(augmentationProbability) +{ + // Sort the vector to place resize parameter to the front of the string. + // This prevents constant lookups for resize. + sort(this->augmentations.begin(), this->augmentations.end(), []( + std::string& str1, std::string& str2) + { + return str1.find("resize") != std::string::npos; + }); + + // Fill augmentation map with supported augmentations other than resize. + InitializeAugmentationMap(); +} + +template +void Augmentation::Transform(DatasetType& dataset, + const size_t datapointWidth, + const size_t datapointHeight, + const size_t datapointDepth) +{ + size_t i = 0; + if (this->HasResizeParam()) + { + this->ResizeTransform(dataset, datapointWidth, datapointHeight, + datapointDepth, augmentations[0]); + i++; + } + + for (; i < augmentations.size(); i++) + { + if (augmentationMap.count(augmentations[i])) + { + augmentationMap[augmentations[i]](dataset, datapointWidth, + datapointHeight, datapointDepth, augmentations[i]); + } + } +} + +template +void Augmentation::ResizeTransform( + DatasetType& dataset, + const size_t datapointWidth, + const size_t datapointHeight, + const size_t datapointDepth, + const std::string& augmentation) +{ + if (!this->HasResizeParam(augmentation)) + return; + + size_t outputWidth = 0, outputHeight = 0; + + // Get output width and output height. + GetResizeParam(outputWidth, outputHeight, augmentation); + + // We will use mlpack's bilinear interpolation layer to + // resize the input. + mlpack::ann::BilinearInterpolation resizeLayer( + datapointWidth, datapointHeight, outputWidth, outputHeight, + datapointDepth); + + // Not sure how to avoid a copy here. + DatasetType output; + resizeLayer.Forward(dataset, output); + dataset = std::move(output); +} + +template +void Augmentation::InitializeAugmentationMap() +{ + // Fill the map here. +} + +#endif diff --git a/data/PASCAL-VOC-Test.tar.gz b/data/PASCAL-VOC-Test.tar.gz new file mode 100644 index 00000000..82fec91a Binary files /dev/null and b/data/PASCAL-VOC-Test.tar.gz differ diff --git a/data/cifar-test.tar.gz b/data/cifar-test.tar.gz new file mode 100644 index 00000000..d472018a Binary files /dev/null and b/data/cifar-test.tar.gz differ diff --git a/dataloader/dataloader.hpp b/dataloader/dataloader.hpp index 9b0e9c9d..b8a40977 100644 --- a/dataloader/dataloader.hpp +++ b/dataloader/dataloader.hpp @@ -10,10 +10,14 @@ * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include +#include #include #include -#include +#include +#include #include +#include +#include #include #include @@ -117,6 +121,127 @@ class DataLoader std::vector(), const double augmentationProbability = 0.2); + /** + * Loads object detection dataset. It requires a single annotation file in XML format. + * Each XML file should correspond to a single image in images folder. + * + * XML file should containg the following : + * 1. Each XML file should be wrapped in annotation tag. + * 2. Filename of image in images folder will be depicted by filename tag. + * 3. Object tag depicting characteristics of bounding box. + * 4. Each object tag should contain name tag i.e. class of the object. + * 5. Each object tag should contain bndbox tag containing xmin, ymin, xmax, ymax. + * + * NOTE : Labels are assigned using classes vector. Set verbose to 1 to print labels + * and their corresponding class. + * + * @param pathToAnnotations Path to the folder containing XML type annotation files. + * @param pathToImages Path to folder containing images corresponding to annotations. + * @param classes Vector of strings containing list of classes. Labels are assigned + * according to this vector. + * @param validRatio Ratio of dataset that will be used for validation. + * @param shuffle Boolean to determine whether the dataset is shuffled. + * @param augmentation Vector strings of augmentations supported by mlpack. + * @param augmentationProbability Probability of applying augmentation to a particular cell. + * @param absolutePath Boolean to determine if absolute path is used. Defaults to false. + * @param baseXMLTag XML tag name which wraps around the annotation file. + * @param imageNameXMLTag XML tag name which holds the value of image filename. + * @param objectXMLTag XML tag name which holds details of bounding box i.e. class and + * coordinates of bounding box. + * @param bndboxXMLTag XML tag name which holds coordinates of bounding box. + * @param classNameXMLTag XML tag name inside objectXMLTag which holds the name of the + * class of bounding box. + * @param x1XMLTag XML tag name inside bndboxXMLTag which hold value of lower most + * x coordinate of bounding box. + * @param y1XMLTag XML tag name inside bndboxXMLTag which hold value of lower most + * y coordinate of bounding box. + * @param x2XMLTag XML tag name inside bndboxXMLTag which hold value of upper most + * x coordinate of bounding box. + * @param y2XMLTag XML tag name inside bndboxXMLTag which hold value of upper most + * y coordinate of bounding box. + */ + void LoadObjectDetectionDataset(const std::string& pathToAnnotations, + const std::string& pathToImages, + const std::vector& classes, + const double validRatio = 0.2, + const bool shuffle = true, + const std::vector& augmentation = + std::vector(), + const double augmentationProbability = 0.2, + const bool absolutePath = false, + const std::string& baseXMLTag = "annotation", + const std::string& imageNameXMLTag = + "filename", + const std::string& sizeXMLTag = "size", + const std::string& objectXMLTag = "object", + const std::string& bndboxXMLTag = "bndbox", + const std::string& classNameXMLTag = "name", + const std::string& x1XMLTag = "xmin", + const std::string& y1XMLTag = "ymin", + const std::string& x2XMLTag = "xmax", + const std::string& y2XMLTag = "ymax"); + + /** + * Load all images from directory. + * + * @param imagesPath Path to all images. + * @param dataset Armadillo type where images will be loaded. + * @param labels Armadillo type where labels will be loaded. + * @param imageWidth Width of images in dataset. + * @param imageHeight Height of images in dataset. + * @param imageDepth Depth of images in dataset. + * @param label Label which will be assigned to image. + * @param augmentation Vector strings of augmentations supported by mlpack. + * @param augmentationProbability Probability of applying augmentation to a particular cell. + */ + void LoadAllImagesFromDirectory(const std::string& imagesPath, + DatasetX& dataset, + DatasetY& labels, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t label = 0); + + void LoadXMLImageClassification(const std::string& pathToAnnotations, + const std::string& pathToImages, + const double validRatio, + const std::vector& classes, + const std::vector& augmentation = + std::vector(), + const double augmentationProbability = 0.2, + const bool absolutePath = false, + const std::string& baseXMLTag = "annotation", + const std::string& imageNameXMLTag = + "filename", + const std::string& sizeXMLTag = "size", + const std::string& objectXMLTag = "object", + const std::string& classNameXMLTag = "name"); + + /** + * Load all images from directory. + * + * @param pathToDataset Path to all folders containing all images. + * @param imageWidth Width of images in dataset. + * @param imageHeight Height of images in dataset. + * @param imageDepth Depth of images in dataset. + * @param trainData Determines whether data is training set or test set. + * @param shuffle Boolean to determine whether or not to shuffle the data. + * @param validRatio Ratio of dataset to be used for validation set. + * @param augmentation Vector strings of augmentations supported by mlpack. + * @param augmentationProbability Probability of applying augmentation to a particular cell. + */ + void LoadImageDatasetFromDirectory(const std::string& pathToDataset, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const bool trainData = true, + const double validRatio = 0.2, + const bool shuffle = true, + const std::vector& + augmentation = std::vector(), + const double augmentationProbability = + 0.2); + //! Get the training dataset features. DatasetX TrainFeatures() const { return trainFeatures; } @@ -181,7 +306,10 @@ class DataLoader { if (datasetMap[dataset].zipFile && (!Utils::PathExists( datasetMap[dataset].trainPath) || - !Utils::PathExists(datasetMap[dataset].testPath))) + !Utils::PathExists(datasetMap[dataset].testPath) || + !Utils::PathExists(datasetMap[dataset].trainingImagesPath) || + !Utils::PathExists(datasetMap[dataset].trainingAnnotationPath) || + !Utils::PathExists(datasetMap[dataset].testingImagesPath))) { Utils::DownloadFile(datasetMap[dataset].datasetURL, datasetMap[dataset].datasetPath, dataset + "_training_data.", @@ -233,6 +361,8 @@ class DataLoader void InitializeDatasets() { datasetMap.insert({"mnist", Datasets::MNIST()}); + datasetMap.insert({"voc-detection", + Datasets::VOCDetection()}); } /** diff --git a/dataloader/dataloader_impl.hpp b/dataloader/dataloader_impl.hpp index d58f4939..b1ccfbac 100644 --- a/dataloader/dataloader_impl.hpp +++ b/dataloader/dataloader_impl.hpp @@ -47,7 +47,7 @@ template< // Use utility functions to download the dataset. DownloadDataset(dataset); - if (datasetMap[dataset].loadCSV) + if (datasetMap[dataset].datasetType == "csv") { LoadCSV(datasetMap[dataset].trainPath, true, shuffle, validRatio, useScaler, datasetMap[dataset].dropHeader, @@ -61,6 +61,43 @@ template< datasetMap[dataset].startTestingInputFeatures, datasetMap[dataset].endTestingInputFeatures); } + else if (datasetMap[dataset].datasetType == "image-detection") + { + std::vector augmentations = augmentation; + + // If user doesn't set size for images, set size of images to {64, 64}. + if (augmentations.size() == 0) + { + augmentations.push_back("resize = {64, 64}"); + } + + LoadObjectDetectionDataset(datasetMap[dataset].trainingAnnotationPath, + datasetMap[dataset].trainingImagesPath, datasetMap[dataset].classes, + validRatio, shuffle, augmentations, augmentationProbability); + + // Load testing data if any. Most object detection dataset + // have private evaluation servers. + if (datasetMap[dataset].testingImagesPath.length() > 0) + { + LoadAllImagesFromDirectory(datasetMap[dataset].testingImagesPath, + testFeatures, testLabels, datasetMap[dataset].imageWidth, + datasetMap[dataset].imageHeight, datasetMap[dataset].imageDepth); + } + } + else if (datasetMap[dataset].datasetType == "image-classification") + { + LoadImageDatasetFromDirectory(datasetMap[dataset].trainingImagesPath, + datasetMap[dataset].imageWidth, datasetMap[dataset].imageHeight, + datasetMap[dataset].imageDepth, true, validRatio, shuffle, + augmentation, augmentationProbability); + + if (datasetMap[dataset].testingImagesPath.length() > 0) + { + LoadAllImagesFromDirectory(datasetMap[dataset].testingImagesPath, + testFeatures, testLabels, datasetMap[dataset].imageWidth, + datasetMap[dataset].imageHeight, datasetMap[dataset].imageDepth); + } + } // Preprocess the dataset. datasetMap[dataset].PreProcess(trainFeatures, trainLabels, @@ -128,7 +165,11 @@ template< scaler.Transform(trainFeatures, trainFeatures); scaler.Transform(validFeatures, validFeatures); } - // TODO : Add support for augmentation here. + + Augmentation augmentations(augmentation, + augmentationProbability); + augmentations.Transform(trainFeatures, 1, dataset.n_rows, 1); + mlpack::Log::Info << "Training Dataset Loaded." << std::endl; } else @@ -145,4 +186,304 @@ template< } } +template< + typename DatasetX, + typename DatasetY, + class ScalerType +> void DataLoader< + DatasetX, DatasetY, ScalerType +>::LoadObjectDetectionDataset(const std::string& pathToAnnotations, + const std::string& pathToImages, + const std::vector& classes, + const double validRatio, + const bool shuffle, + const std::vector& augmentations, + const double augmentationProbability, + const bool absolutePath, + const std::string& baseXMLTag, + const std::string& imageNameXMLTag, + const std::string& sizeXMLTag, + const std::string& objectXMLTag, + const std::string& bndboxXMLTag, + const std::string& classNameXMLTag, + const std::string& x1XMLTag, + const std::string& y1XMLTag, + const std::string& x2XMLTag, + const std::string& y2XMLTag) +{ + Augmentation augmentation(augmentations, augmentationProbability); + + std::vector annotationsDirectory; + + DatasetX dataset; + DatasetY labels; + + // Fill the directory. + Utils::ListDir(pathToAnnotations, annotationsDirectory, absolutePath); + + // Create a map for labels and corresponding class name. + // This provides faster access to class labels. + std::unordered_map classMap; + for (size_t i = 0; i < classes.size(); i++) + classMap.insert(std::make_pair(classes[i], i)); + + // Map to insert values in a column vector. + std::unordered_map indexMap; + indexMap.insert(std::make_pair(classNameXMLTag, 0)); + indexMap.insert(std::make_pair(x1XMLTag, 1)); + indexMap.insert(std::make_pair(y1XMLTag, 2)); + indexMap.insert(std::make_pair(x2XMLTag, 3)); + indexMap.insert(std::make_pair(y2XMLTag, 4)); + + // Keep track of files loaded. + size_t totalFiles = annotationsDirectory.size(), loadedFiles = 0; + size_t imageWidth = 0, imageHeight = 0, imageDepth = 0; + + // Read the XML file. + for (boost::filesystem::path annotationFile : annotationsDirectory) + { + if (annotationFile.string().length() <= 3 || + annotationFile.string().substr( + annotationFile.string().length() - 3) != "xml") + { + continue; + } + + loadedFiles++; + Log::Info << "Files Loaded : " << loadedFiles << " out of " << + totalFiles << "\r" << std::endl; + + // Read the XML file. + boost::property_tree::ptree xmlFile; + boost::property_tree::read_xml(annotationFile.string(), xmlFile); + + // Get annotation from XML file. + boost::property_tree::ptree annotation = xmlFile.get_child(baseXMLTag); + + // Read properties inside annotation file. + // Get image name. + std::string imgName = annotation.get_child(imageNameXMLTag).data(); + + // If image doesn't exist then skip the current XML file. + if (!Utils::PathExists(pathToImages + imgName, absolutePath)) + { + mlpack::Log::Warn << "Image not found! Tried finding " << + pathToImages + imgName << std::endl; + continue; + } + + // Get the size of image to create image info required + // by mlpack::data::Load function. + boost::property_tree::ptree sizeInfo = annotation.get_child(sizeXMLTag); + imageWidth = std::stoi(sizeInfo.get_child("width").data()); + imageHeight = std::stoi(sizeInfo.get_child("height").data()); + imageDepth = std::stoi(sizeInfo.get_child("depth").data()); + mlpack::data::ImageInfo imageInfo(imageWidth, imageHeight, imageDepth); + + // Load the image. + // The image loaded here will be in column format i.e. Output will + // be matrix with the following shape {1, cols * rows * slices} in + // column major format. + DatasetX image; + mlpack::data::Load(pathToImages + imgName, image, imageInfo); + + if (augmentation.HasResizeParam()) + { + augmentation.ResizeTransform(image, imageWidth, imageHeight, imageDepth, + augmentation.augmentations[0]); + augmentation.GetResizeParam(imageWidth, imageHeight, + augmentation.augmentations[0]); + } + + // Iterate over all object in annotation. + BOOST_FOREACH(boost::property_tree::ptree::value_type const& object, + annotation) + { + arma::vec predictions(5); + // Iterate over property of the object to get class label and + // bounding box coordinates. + if (object.first == objectXMLTag) + { + if (classMap.count(object.second.get_child(classNameXMLTag).data())) + { + predictions(indexMap[classNameXMLTag]) = classMap[ + object.second.get_child(classNameXMLTag).data()]; + boost::property_tree::ptree const &boundingBox = + object.second.get_child(bndboxXMLTag); + + BOOST_FOREACH(boost::property_tree::ptree::value_type + const& coordinate, boundingBox) + { + if (indexMap.count(coordinate.first)) + { + predictions(indexMap[coordinate.first]) = + std::stoi(coordinate.second.data()); + } + } + + // Add object to training set. + dataset.insert_cols(0, image); + labels.insert_cols(0, predictions); + } + } + } + } + + // Add data split here. + arma::mat completeDataset = arma::join_cols(dataset, labels); + arma::mat trainingData, validationData; + mlpack::data::Split(completeDataset, trainingData, validationData, + validRatio, shuffle); + + // Features are all rows except the last 5 rows which correspond + // to bounding box. + trainFeatures = trainingData.rows(0, trainingData.n_rows - 6); + trainLabels = trainingData.rows(trainingData.n_rows - 5, + trainingData.n_rows - 1); + + validFeatures = validationData.rows(0, validationData.n_rows - 6); + validLabels = validationData.rows(validationData.n_rows - 5, + validationData.n_rows - 1); + + // Augment the training data. + augmentation.Transform(trainFeatures, imageWidth, imageHeight, + imageDepth); +} + +template< + typename DatasetX, + typename DatasetY, + class ScalerType +> void DataLoader< + DatasetX, DatasetY, ScalerType +>::LoadAllImagesFromDirectory(const std::string& imagesPath, + DatasetX& dataset, + DatasetY& labels, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t label) +{ + // Get all files in given directory. + std::vector imagesDirectory; + Utils::ListDir(imagesPath, imagesDirectory); + + std::set supportedExtentions = {".jpg", ".png", ".tga", + ".bmp", ".psd", ".gif", ".hdr", ".pic", ".pnm"}; + + // We use to endls here as one of them will be replaced by print + // command below. + Log::Info << "Found " << imagesDirectory.size() << " belonging to " << + label << " class." << std::endl << std::endl; + + size_t loadedImages = 0; + for (boost::filesystem::path imageName : imagesDirectory) + { + if (imageName.string().length() <= 3 || + !boost::filesystem::is_regular_file(imageName) || + !supportedExtentions.count(imageName.extension().string())) + { + continue; + } + + mlpack::data::ImageInfo imageInfo(imageWidth, imageHeight, imageDepth); + + // Load the image. + // The image loaded here will be in column format i.e. Output will + // be matrix with the following shape {1, cols * rows * slices} in + // column major format. + DatasetX image; + mlpack::data::Load(imageName.string(), image, imageInfo); + + // Add object to training set. + dataset.insert_cols(0, image); + labels.insert_cols(0, arma::vec(1).fill(label)); + + loadedImages++; + mlpack::Log::Info << "Loaded " << loadedImages << " out of " << + imagesDirectory.size() << "\r" << std::endl; + } +} + +template< + typename DatasetX, + typename DatasetY, + class ScalerType +> void DataLoader< + DatasetX, DatasetY, ScalerType +>::LoadImageDatasetFromDirectory(const std::string& pathToDataset, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const bool trainData, + const double validRatio, + const bool shuffle, + const std::vector& augmentation, + const double augmentationProbability) +{ + Augmentation augmentations(augmentation, augmentationProbability); + size_t totalClasses = 0; + std::map classMap; + + // Fill classes in the vector. + std::vector classes; + Utils::ListDir(pathToDataset, classes); + + DatasetX dataset; + DatasetY labels; + + // Iterate the directory. + for (boost::filesystem::path className : classes) + { + if (boost::filesystem::is_directory(className)) + { + LoadAllImagesFromDirectory(className.string() + + "/", dataset, labels, imageWidth, imageHeight, imageDepth, + totalClasses); + classMap[className.string()] = totalClasses; + totalClasses++; + } + } + + if (!trainData) + { + testFeatures = std::move(dataset); + testLabels = std::move(labels); + + // Only resize augmentation will be applied on test set. + if (augmentations.HasResizeParam()) + { + augmentations.ResizeTransform(testFeatures, imageWidth, imageHeight, + imageDepth, augmentations.augmentations[0]); + } + + return; + } + + // Train-validation data split. + arma::mat completeDataset = arma::join_cols(dataset, labels); + arma::mat trainingData, validationData; + mlpack::data::Split(completeDataset, trainingData, validationData, + validRatio, shuffle); + + trainFeatures = trainingData.rows(0, trainingData.n_rows - 2); + trainLabels = trainingData.rows(trainingData.n_rows - 1, + trainingData.n_rows - 1); + + validFeatures = validationData.rows(0, validationData.n_rows - 2); + validLabels = validationData.rows(validationData.n_rows - 1, + validationData.n_rows - 1); + + augmentations.Transform(trainFeatures, imageWidth, imageHeight, imageDepth); + + mlpack::Log::Info << "Found " << totalClasses << " classes." << std::endl; + + // Print class-label mappings for ease. + for (std::pair classMapping : classMap) + { + mlpack::Log::Info << classMapping.first << " : " << classMapping.second << + std::endl; + } +} + #endif diff --git a/dataloader/datasets.hpp b/dataloader/datasets.hpp index 9b83403e..81048f30 100644 --- a/dataloader/datasets.hpp +++ b/dataloader/datasets.hpp @@ -42,9 +42,8 @@ struct DatasetDetails //! CRC-32 checksum for testing data file. std::string testHash; - //! Locally stored boolean to determine if dataset is of CSV or similar - //! format. - bool loadCSV; + //! Locally stored stored to determine type of dataset. + std::string datasetType; //! Locally stored path to file / directory for training data. std::string trainPath; @@ -90,6 +89,29 @@ struct DatasetDetails //! Whether or not to drop the first row from CSV. bool dropHeader; + // The following data members corresponds to image classification / detection + // type of datasets. + //! Locally stored path to images. + std::string trainingImagesPath; + + //! Locally stored path to testing images. + std::string testingImagesPath; + + //! Locally stored path to training annotations in xml format. + std::string trainingAnnotationPath; + + //! Locally stored classes of image classification /detection. + std::vector classes; + + //! Locally stored width of images. + size_t imageWidth; + + //! Locally stored heightof images. + size_t imageHeight; + + //! Locally stored depth of images. + size_t imageDepth; + // Default constructor. DatasetDetails() : datasetName(""), @@ -97,7 +119,7 @@ struct DatasetDetails testDownloadURL(""), trainHash(""), testHash(""), - loadCSV(false), + datasetType("none"), trainPath(""), testPath(""), zipFile(false), @@ -111,7 +133,14 @@ struct DatasetDetails endTrainingPredictionFeatures(0), startTestingInputFeatures(0), endTestingInputFeatures(0), - dropHeader(false) + dropHeader(false), + trainingImagesPath(""), + testingImagesPath(""), + trainingAnnotationPath(""), + classes(std::vector()), + imageWidth(0), + imageHeight(0), + imageDepth(0) {/* Nothing to do here. */} /** @@ -124,7 +153,7 @@ struct DatasetDetails * @param testDownloadURL URL for downloading testing data. * @param trainHash CRC-32 checksum for training data. * @param testHash CRC-32 checksum for testing data. - * @param loadCSV Determines if the format of dataset is similar to CSV. + * @param datasetType Determines if the format of dataset is similar to CSV. * @param trainPath Path for training dataset. * @param testPath Path for testing dataset. */ @@ -133,7 +162,7 @@ struct DatasetDetails const std::string& testDownloadURL, const std::string& trainHash, const std::string& testHash, - const bool loadCSV, + const std::string& datasetType, const std::string& trainPath, const std::string& testPath) : datasetName(datasetName), @@ -141,7 +170,7 @@ struct DatasetDetails testDownloadURL(testDownloadURL), trainHash(trainHash), testHash(testHash), - loadCSV(loadCSV), + datasetType(datasetType), trainPath(trainPath), testPath(testPath), zipFile(false), @@ -154,7 +183,14 @@ struct DatasetDetails endTrainingPredictionFeatures(0), startTestingInputFeatures(0), endTestingInputFeatures(0), - dropHeader(false) + dropHeader(false), + trainingImagesPath(""), + testingImagesPath(""), + trainingAnnotationPath(""), + classes(std::vector()), + imageWidth(0), + imageHeight(0), + imageDepth(0) { // Nothing to do here. } @@ -165,10 +201,12 @@ struct DatasetDetails * @param datasetName Name of dataset used for identification during * dataloader call. * @param zipFile Boolean to determine if dataset is stored in zip format. + * NOTE: For large dataset type such as images always set to + * true. * @param datasetURL URL for downloading dataset. * @param datasetPath Path where the dataset will be downloaded. * @param datasetHash CRC-32 checksum for dataset. - * @param loadCSV Determines if the format of dataset is similar to CSV. + * @param datasetType Determines the format of dataset. * @param trainPath Path for training dataset. * @param testPath Path for testing dataset. */ @@ -177,15 +215,15 @@ struct DatasetDetails const std::string& datasetURL, const std::string& datasetPath, const std::string& datasetHash, - const bool loadCSV, - const std::string& trainPath, - const std::string& testPath) : + const std::string& datasetType, + const std::string& trainPath = "", + const std::string& testPath = "") : datasetName(datasetName), zipFile(zipFile), datasetURL(datasetURL), datasetHash(datasetHash), datasetPath(datasetPath), - loadCSV(loadCSV), + datasetType(datasetType), trainPath(trainPath), testPath(testPath), trainDownloadURL(""), @@ -199,7 +237,14 @@ struct DatasetDetails endTrainingPredictionFeatures(0), startTestingInputFeatures(0), endTestingInputFeatures(0), - dropHeader(false) + dropHeader(false), + trainingImagesPath(""), + testingImagesPath(""), + trainingAnnotationPath(""), + classes(std::vector()), + imageWidth(0), + imageHeight(0), + imageDepth(0) { // Nothing to do here. } @@ -227,7 +272,7 @@ class Datasets "/datasets/mnist.tar.gz", "./../data/mnist.tar.gz", "33470ca3", - true, + "csv", "./../data/mnist-dataset/mnist_train.csv", "./../data/mnist-dataset/mnist_test.csv"); @@ -244,6 +289,32 @@ class Datasets mnistDetails.dropHeader = true; return mnistDetails; } + + const static DatasetDetails VOCDetection() + { + DatasetDetails VOCDetectionDetail( + "voc-detection", + true, + "/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar", + "./../data/VOCtrainval_11-May-2012.tar", + "504b9278", + "image-detection"); + + VOCDetectionDetail.trainingImagesPath = + "./../data/VOCdevkit/VOC2012/JPEGImages/"; + VOCDetectionDetail.trainingAnnotationPath = + "./../data/VOCdevkit/VOC2012/Annotations/"; + VOCDetectionDetail.serverName = "http://host.robots.ox.ac.uk"; + VOCDetectionDetail.PreProcess = PreProcessor::PascalVOC; + + // Set classes for dataset. + VOCDetectionDetail.classes = {"background", "aeroplane", "bicycle", + "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor"}; + + return VOCDetectionDetail; + } }; #endif diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp index 98dcfe68..0a2ee941 100644 --- a/dataloader/preprocessor.hpp +++ b/dataloader/preprocessor.hpp @@ -37,6 +37,15 @@ class PreProcessor trainY = trainY + 1; validY = validY + 1; } + + static void PascalVOC(DatasetX& /* trainX */, + DatasetY& /* trainY */, + DatasetY& /* validX */, + DatasetY& /* validY */, + DatasetX& /* testX */) + { + // Nothing to do here. Added to match the rest of the codebase. + } }; #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4414aa31..ded2245a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,7 @@ set(MODEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/) include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../") add_executable(models_test + augmentation_tests.cpp dataloader_tests.cpp utils_tests.cpp ) @@ -19,6 +20,7 @@ target_link_libraries(models_test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_SERIALIZATION_LIBRARY} + ${Boost_REGEX_LIBRARY} ${MLPACK_LIBRARIES} ) diff --git a/tests/augmentation_tests.cpp b/tests/augmentation_tests.cpp new file mode 100644 index 00000000..4ac52366 --- /dev/null +++ b/tests/augmentation_tests.cpp @@ -0,0 +1,58 @@ +/** + * @file augmentation.cpp + * @author Kartik Dutt + * + * Tests for various functionalities of utils. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#define BOOST_TEST_DYN_LINK +#include +#include +#include +using namespace boost::unit_test; + +BOOST_AUTO_TEST_SUITE(AugmentationTest); + +BOOST_AUTO_TEST_CASE(ResizeAugmentationTest) +{ + Augmentation<> augmentation(std::vector(1, + "resize (5, 4)"), 0.2); + + // Test on a square matrix. + arma::mat input; + size_t inputWidth = 2; + size_t inputHeight = 2; + size_t depth = 1; + input.zeros(inputWidth * inputHeight * depth, 2); + + // Resize function called. + augmentation.Transform(input, inputWidth, inputHeight, depth); + + // Check correctness of input. + BOOST_REQUIRE_EQUAL(input.n_cols, 2); + BOOST_REQUIRE_EQUAL(input.n_rows, 5 * 4); + + // Test on rectangular matrix. + inputWidth = 5; + inputHeight = 7; + depth = 1; + input.zeros(inputWidth * inputHeight * depth, 2); + + // Rectangular input to sqaure output. + std::vector augmentationVector = {"horizontal-flip", + "resize : 8"}; + Augmentation<> augmentation2(augmentationVector, 0.2); + + // Resize function called. + augmentation2.Transform(input, inputWidth, inputHeight, depth); + + // Check correctness of input. + BOOST_REQUIRE_EQUAL(input.n_cols, 2); + BOOST_REQUIRE_EQUAL(input.n_rows, 8 * 8); +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/tests/dataloader_tests.cpp b/tests/dataloader_tests.cpp index 912cb1ab..93b77676 100644 --- a/tests/dataloader_tests.cpp +++ b/tests/dataloader_tests.cpp @@ -86,4 +86,73 @@ BOOST_AUTO_TEST_CASE(MNISTDataLoaderTest) Utils::RemoveFile("./../data/mnist.tar.gz"); } +/** + * Simple Test for object detection dataloader. + */ +BOOST_AUTO_TEST_CASE(ObjectDetectionDataLoader) +{ + DataLoader<> dataloader; + Utils::ExtractFiles("./../data/PASCAL-VOC-Test.tar.gz", "./../data/"); + + // Set paths for dataset. + std::string basePath = "./../data/PASCAL-VOC-Test/"; + std::string annotaionPath = "Annotations/"; + std::string imagesPath = "Images/"; + double validRatio = 0.2; + bool shuffle = true; + + // Classes in the dataset. + std::vector classes = {"background", "aeroplane", "bicycle", + "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor"}; + + // Resize the image to 64 x 64. + std::vector augmentation = {"resize (64, 64)"}; + dataloader.LoadObjectDetectionDataset(basePath + annotaionPath, + basePath + imagesPath, classes, validRatio, shuffle, augmentation); + + // There are total 15 objects in images. + BOOST_REQUIRE_EQUAL(dataloader.TrainLabels().n_cols, 12); + // They correspond to class name, x1, y1, x2, y2. + BOOST_REQUIRE_EQUAL(dataloader.TrainLabels().n_rows, 5); + + // Rows will be equal to shape image depth * image width * image height. + BOOST_REQUIRE_EQUAL(dataloader.TrainFeatures().n_rows, 64 * 64 * 3); + // There are total 15 objects in the images. + BOOST_REQUIRE_EQUAL(dataloader.TrainFeatures().n_cols, 12); + + // There are total 15 objects in images. + BOOST_REQUIRE_EQUAL(dataloader.ValidLabels().n_cols, 3); + // They correspond to class name, x1, y1, x2, y2. + BOOST_REQUIRE_EQUAL(dataloader.ValidLabels().n_rows, 5); + + // Rows will be equal to shape image depth * image width * image height. + BOOST_REQUIRE_EQUAL(dataloader.ValidFeatures().n_rows, 64 * 64 * 3); + // There are total 15 objects in the images. + BOOST_REQUIRE_EQUAL(dataloader.ValidFeatures().n_cols, 3); +} + +BOOST_AUTO_TEST_CASE(LoadImageDatasetFromDirectoryTest) +{ + DataLoader<> dataloader; + Utils::ExtractFiles("./../data/cifar-test.tar.gz", "./../data/"); + dataloader.LoadImageDatasetFromDirectory("./../data/cifar-test/", + 32, 32, 3, true); + + // Check correctness of Training data. + BOOST_REQUIRE_EQUAL(dataloader.TrainFeatures().n_cols, 24); + BOOST_REQUIRE_EQUAL(dataloader.TrainFeatures().n_rows, 32 * 32 * 3); + + BOOST_REQUIRE_EQUAL(dataloader.TrainLabels().n_cols, 24); + BOOST_REQUIRE_EQUAL(dataloader.TrainLabels().n_rows, 1); + + // Check correctness of Validation data. + BOOST_REQUIRE_EQUAL(dataloader.ValidFeatures().n_cols, 6); + BOOST_REQUIRE_EQUAL(dataloader.ValidFeatures().n_rows, 32 * 32 * 3); + + BOOST_REQUIRE_EQUAL(dataloader.ValidLabels().n_cols, 6); + BOOST_REQUIRE_EQUAL(dataloader.ValidLabels().n_rows, 1); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/tests/utils_tests.cpp b/tests/utils_tests.cpp index 4a314496..7af8cc22 100644 --- a/tests/utils_tests.cpp +++ b/tests/utils_tests.cpp @@ -91,4 +91,25 @@ BOOST_AUTO_TEST_CASE(ExtractFilesTest) Utils::RemoveFile("./../data/USCensus1990.tar.gz"); } +/** + * Simple test for downloading using curl. + */ +BOOST_AUTO_TEST_CASE(CurlDownloadTest) +{ + std::string serverName = "https://raw.githubusercontent.com/kartikdutt18/"; + std::string path = + "mlpack-models-weights-and-datasets/master/2007_000243.jpg"; + + // Download file from an https server. + Utils::DownloadFile(path, "./../data/test_image.jpg", "", false, true, + serverName); + + // Check whether or not the image was downloaded. If yes, perform a checksum. + BOOST_REQUIRE(Utils::PathExists("./../data/test_image.jpg")); + BOOST_REQUIRE(Utils::CompareCRC32("./../data/test_image.jpg", "6d2473f8")); + + // Clean up. + Utils::RemoveFile("./../data/test_image.jpg"); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/utils/utils.hpp b/utils/utils.hpp index 7ddb6d61..ce91aff2 100644 --- a/utils/utils.hpp +++ b/utils/utils.hpp @@ -109,6 +109,35 @@ class Utils const bool zipFile = false, const std::string pathForExtraction = "./../data/") { + if (serverName != "www.mlpack.org") + { + // NOTE : curl is supported for all windows after 2018. + // Update to new version of windows if an error occurs, + // Else try downloading files from mlpack server or + // downloading curl executable for earlier version of windows. + std::string command = "curl "; + if (!silent) + command += "-# "; + + command += "-o "; + #ifdef _WIN32 + std::string downloadPathTemp = downloadPath; + std::replace(downloadPathTemp.begin(), downloadPathTemp.end(), '/', + '\\'); + command = command + downloadPathTemp; + #else + command = command + downloadPath; + #endif + + command = command + " " + serverName + url; + std::system(command.c_str()); + if (zipFile) + { + Utils::ExtractFiles(downloadPath, pathForExtraction); + } + return 0; + } + // IO functionality by boost core. boost::asio::io_service ioService; // Use TCP protocol by boost asio to make a connection to desired server. @@ -286,6 +315,13 @@ class Utils boost::filesystem::directory_iterator(), std::back_inserter(pathVector)); + // Remove hidden files. + pathVector.erase(std::remove_if(pathVector.begin(), pathVector.end(), + [](boost::filesystem::path curPath) + { + return curPath.filename().string()[0] == '.'; + }), pathVector.end()); + // Sort the path vector. std::sort(pathVector.begin(), pathVector.end()); }