mlpack · kartikdutt18 · Aug 14, 2020 · Aug 15, 2020 · Aug 15, 2020 · Aug 16, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -218,6 +218,7 @@ set(DIRS
   ensmallen_utils/
   dataloader/
   models/
+  loss_functions/
   tests/
 )
 

diff --git a/dataloader/preprocessor.hpp b/dataloader/preprocessor.hpp
@@ -1,5 +1,5 @@
 /**
- * @file dataloader.hpp
+ * @file preprocessor.hpp
  * @author Kartik Dutt
  * 
  * Definition of PreProcessor class for popular datasets.
@@ -93,6 +93,174 @@ class PreProcessor
           trainFeatures(i) = ((uint8_t)(trainFeatures(i)) / 255.0);
     }
   }
+
+  /**
+   * PreProcessor for YOLO model. Converts arma::field type annotations to
+   * arma::mat type for training YOLO model. Each column in target matrix has
+   * the size : gridWidth * gridHeight * (5 * numBoxes + classes).
+   *
+   * @param annotations Field object created using model's dataloader containing
+   *                    annotation for images.
+   * @param output Output matrix where output will be stored.
+   * @param imageWidth Width of image used for training YOLO model.
+   * @param imageHeight Height of image used for training YOLO model.
+   * @param gridWidth Width of output feature map of YOLO model.
+   * @param gridHeight Height of output feature map of YOLO model.
+   * @param numBoxes Number of bounding boxes per grid.
+   * @param numClasses Number of classes in training set.
+   * @param normalize Boolean to determine whether coordinates are to
+   *    to be normalized or not. Defaults to true.
+   *
+   * Note : This function must be called manually before model is used.
+   */
+  template<typename eT>
+  static void YOLOPreProcessor(const DatasetY& annotations,
+                               arma::Mat<eT>& output,
+                               const size_t version = 1,
+                               const size_t imageWidth = 224,
+                               const size_t imageHeight = 224,
+                               const size_t gridWidth = 7,
+                               const size_t gridHeight = 7,
+                               const size_t numBoxes = 2,
+                               const size_t numClasses = 20,
+                               const bool normalize = true)
+  {
+    // See if we can change this to v4 / v5.
+    mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \
+        are version 1 to version 3.");
+
+    mlpack::Log::Assert(typeid(annotations) == typeid(arma::field<arma::vec>),
+        "Use Field type to represent annotations.");
+
+    size_t batchSize = annotations.n_cols;
+    size_t numPredictions = 5 * numBoxes + numClasses;
+    if (version > 1)
+    {
+      // Each bounding boxes has a corresponding class.
+      numPredictions = numBoxes * (5 + numClasses);
+    }
+
+    double cellSizeHeight = (double) 1.0 / gridHeight;
+    double cellSizeWidth = (double) 1.0 / gridWidth;
+
+    // Set size of output and use cubes convenience.
+    output.set_size(gridWidth * gridHeight * numPredictions, batchSize);
+    output.zeros();
+
+    // Use offset to create a cube for a particular column / batch.
+    size_t offset = 0;
+    for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++)
+    {
+      arma::cube outputTemp(const_cast<arma::Mat<eT> &>(output).memptr() +
+        offset, gridHeight, gridWidth, numPredictions, false, false);
+      offset += gridWidth * gridHeight * numPredictions;
+
+      // Get the bounding box and labels corresponding to current image.
+      arma::mat labels(1, annotations(0, boxIdx).n_elem / 5);
+      arma::mat boundingBoxes(4, annotations(0, boxIdx).n_elem / 5);
+      for (size_t i = 0; i < boundingBoxes.n_cols; i++)
+      {
+        labels.col(i)(0) = annotations(0, boxIdx)(i * 5);
+        boundingBoxes.col(i) = annotations(0, boxIdx)(arma::span(i * 5 + 1,
+            (i + 1) * 5 - 1));
+      }
+
+      // For YOLOv2 or higher, each bounding box can represent a class
+      //  so we don't repeat labels as done for YOLOv1. We will use map
+      //  to store last inserted bounding box.
+      std::map<std::pair<size_t, size_t>, size_t> boundingBoxOffset;
+
+      // Normalize the coordinates.
+      boundingBoxes.row(0) /= imageWidth;
+      boundingBoxes.row(2) /= imageWidth;
+      boundingBoxes.row(1) /= imageHeight;
+      boundingBoxes.row(3) /= imageHeight;
+
+      // Get width and height as well as centres for the bounding box.
+      arma::mat widthAndHeight(2, boundingBoxes.n_cols);
+      widthAndHeight.row(0) = (boundingBoxes.row(2) - boundingBoxes.row(0));
+      widthAndHeight.row(1) = (boundingBoxes.row(3) - boundingBoxes.row(1));
+
+      arma::mat centres(2, boundingBoxes.n_cols);
+      centres.row(0) = (boundingBoxes.row(2) + boundingBoxes.row(0)) / 2.0;
+      centres.row(1) = (boundingBoxes.row(3) + boundingBoxes.row(1)) / 2.0;
+
+      // Assign bounding boxes to the grid.
+      for (size_t i = 0; i < boundingBoxes.n_cols; i++)
+      {
+        // Index for representing bounding box on grid.
+        arma::vec gridCoordinates = centres.col(i);
+        arma::vec centreCoordinates = centres.col(i);
+
+        if (normalize)
+        {
+          gridCoordinates(0) = std::ceil(gridCoordinates(0) /
+              cellSizeWidth) - 1;
+          gridCoordinates(1) = std::ceil(gridCoordinates(1) /
+              cellSizeHeight) - 1;
+        }
+        else
+        {
+          gridCoordinates(0) = std::ceil((gridCoordinates(0) /
+              imageWidth) / cellSizeWidth) - 1;
+          gridCoordinates(1) = std::ceil((gridCoordinates(1) /
+              imageHeight) / cellSizeHeight) - 1;
+        }
+
+
+        size_t gridX = gridCoordinates(0);
+        size_t gridY = gridCoordinates(1);
+        gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth;
+        gridCoordinates(1) = gridCoordinates(1) * cellSizeHeight;
+
+        // Normalize to 1.0.
+        gridCoordinates = centres.col(i) - gridCoordinates;
+        gridCoordinates(0) /= cellSizeWidth;
+        gridCoordinates(1) /= cellSizeHeight;
+
+        if (normalize)
+          centreCoordinates = gridCoordinates;
+
+        if (version == 1)
+        {
+          // Fill elements in the grid.
+          for (size_t k = 0; k < numBoxes; k++)
+          {
+            size_t s = 5 * k;
+            outputTemp(arma::span(gridX), arma::span(gridY),
+                arma::span(s, s + 1)) = centreCoordinates;
+            outputTemp(arma::span(gridX), arma::span(gridY),
+                arma::span(s + 2, s + 3)) = widthAndHeight.col(i);
+            outputTemp(gridX, gridY, s + 4) = 1.0;
+          }
+          outputTemp(gridX, gridY, 5 * numBoxes + labels.col(i)(0)) = 1;
+        }
+        else
+        {
+          size_t s = 0;
+          if (boundingBoxOffset.count({gridX, gridY}))
+          {
+            s = boundingBoxOffset[{gridX, gridY}] + 1;
+            boundingBoxOffset[{gridX, gridY}]++;
+          }
+          else
+            boundingBoxOffset.insert({{gridX, gridY}, s});
+
+          if (s > numBoxes)
+            continue;
+
+          size_t bBoxOffset = (5 + numClasses) * s;
+          outputTemp(arma::span(gridX), arma::span(gridY),
+              arma::span(bBoxOffset, bBoxOffset + 1)) = centreCoordinates;
+          outputTemp(arma::span(gridX), arma::span(gridY),
+              arma::span(bBoxOffset + 2,
+                  bBoxOffset + 3)) = widthAndHeight.col(i);
+          outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0;
+          outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1;
+        }
+      }
+    }
+  }
 };
 
 #endif
diff --git a/loss_functions/CMakeLists.txt b/loss_functions/CMakeLists.txt
@@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
+project(loss_functions)
+
+set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+set(SOURCES
+    yolo_loss.hpp
+    yolo_loss_impl.hpp
+)
+
+foreach(file ${SOURCES})
+   set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+
+# Append sources (with directory name) to list of all models sources (used at
+# the parent scope).
+set(DIRS ${DIRS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/loss_functions/yolo_loss.hpp b/loss_functions/yolo_loss.hpp
@@ -0,0 +1,158 @@
+/**
+ * @file yolo_loss.cpp
+ * @author Kartik Dutt
+ *
+ * Loss function for training YOLO model.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#ifndef MODELS_LOSS_FUNCTIONS_YOLO_LOSS_HPP
+#define MODELS_LOSS_FUNCTIONS_YOLO_LOSS_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/metrics/iou_metric.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The YOLO loss function is used to decode output of YOLO model and train
+ * it.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class YOLOLoss
+{
+ public:
+  /**
+   * Create the YOLOLoss object.
+   *
+   * @param version Version of YOLO model used in training.
+   * @param gridWidth Width of output feature map of YOLO model.
+   * @param gridHeight Height of output feature map of YOLO model.
+   * @param numBoxes Number of bounding boxes per grid.
+   * @param numClasses Number of classes in training set.
+   * @param lambdaCoordinates Multiplicative factor for loss obtained from
+   *    coordinates.
+   * @param lambdaObjectness Multiplicative factor for loss obtained from
+   *    misclassification.
+   */
+  YOLOLoss(const size_t version = 1,
+           const size_t gridWidth = 7,
+           const size_t gridHeight = 7,
+           const size_t numBoxes = 2,
+           const size_t numClasses = 20,
+           const double lambdaCoordinates = 5.0,
+           const double lambdaObjectness = 0.5);
+
+  /**
+   * Computes the YOLO loss function.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param target The target vector.
+   */
+  template<typename InputType, typename TargetType>
+  typename InputType::elem_type Forward(const InputType &input,
+                                        const TargetType &target);
+
+  /**
+   * Ordinary feed backward pass of a neural network.
+   *
+   * @param input The propagated input activation.
+   * @param target The target vector.
+   * @param output The calculated error.
+   */
+  template<typename InputType, typename TargetType, typename OutputType>
+  void Backward(const InputType& input,
+                const TargetType& target,
+                OutputType& output);
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the version.
+  size_t Version() const { return version; }
+  //! Modify the version.
+  size_t& Version() { return version; }
+
+  //! Get the Grid Width.
+  size_t GridWidth() const { return gridWidth; }
+  //! Modify the Grid Width.
+  size_t& GridWidth() { return gridWidth; }
+
+  //! Get the Grid Height.
+  size_t GridHeight() const { return gridHeight; }
+  //! Modify the Grid Height.
+  size_t& GridHeight() { return gridHeight; }
+
+  //! Get the Number of boxes.
+  size_t NumBoxes() const { return numBoxes; }
+  //! Modify the Number of boxes.
+  size_t& NumBoxes() { return numBoxes; }
+
+  //! Get the Number of classes.
+  size_t NumClasses() const { return numClasses; }
+  //! Modify the Number of classes.
+  size_t& NumClasses() { return numClasses; }
+
+  //! Get the lambdaCoordinates.
+  double LambdaCoordinates() const { return lambdaCoordinates; }
+  //! Modify the lambdaCoordinates.
+  double& LambdaCoordinates() { return lambdaCoordinates; }
+
+  //! Get the lambdaObjectness.
+  double LambdaObjectness() const { return lambdaObjectness; }
+  //! Modify the lambdaObjectness.
+  double& LambdaObjectness() { return lambdaObjectness; }
+
+  /**
+   * Serialize the layer.
+   */
+  template <typename Archive>
+  void serialize(Archive &ar, const unsigned int /* version */);
+
+ private:
+  //! Version of YOLO model used in training.
+  size_t version;
+
+  //! Width of output feature map of YOLO model.
+  size_t gridWidth;
+
+  //! Height of output feature map of YOLO model.
+  size_t gridHeight;
+
+  //! Number of bounding boxes per grid.
+  size_t numBoxes;
+
+  //! Number of classes in training set.
+  size_t numClasses;
+
+  //! Multiplicative factor for loss obtained from coordinates.
+  double lambdaCoordinates;
+
+  //! Multiplicative factor for loss obtained from misclassification.
+  double lambdaObjectness;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#include "yolo_loss_impl.hpp"
+
+#endif
-Original file line number
+Diff line change
@@ Expand Up / @@ -218,6 +218,7 @@ set(DIRS @@
       ensmallen_utils/
       dataloader/
       models/
+      loss_functions/
       tests/
     )
@@ Expand Down @@