Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YOLO Loss Function / Detector. #29

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ set(DIRS
ensmallen_utils/
dataloader/
models/
loss_functions/
tests/
)

Expand Down
170 changes: 169 additions & 1 deletion dataloader/preprocessor.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* @file dataloader.hpp
* @file preprocessor.hpp
* @author Kartik Dutt
*
* Definition of PreProcessor class for popular datasets.
Expand Down Expand Up @@ -93,6 +93,174 @@ class PreProcessor
trainFeatures(i) = ((uint8_t)(trainFeatures(i)) / 255.0);
}
}

/**
* PreProcessor for YOLO model. Converts arma::field type annotations to
* arma::mat type for training YOLO model. Each column in target matrix has
* the size : gridWidth * gridHeight * (5 * numBoxes + classes).
*
* @param annotations Field object created using model's dataloader containing
* annotation for images.
* @param output Output matrix where output will be stored.
* @param imageWidth Width of image used for training YOLO model.
* @param imageHeight Height of image used for training YOLO model.
* @param gridWidth Width of output feature map of YOLO model.
* @param gridHeight Height of output feature map of YOLO model.
* @param numBoxes Number of bounding boxes per grid.
* @param numClasses Number of classes in training set.
* @param normalize Boolean to determine whether coordinates are to
* to be normalized or not. Defaults to true.
*
* Note : This function must be called manually before model is used.
*/
template<typename eT>
static void YOLOPreProcessor(const DatasetY& annotations,
arma::Mat<eT>& output,
const size_t version = 1,
const size_t imageWidth = 224,
const size_t imageHeight = 224,
const size_t gridWidth = 7,
const size_t gridHeight = 7,
const size_t numBoxes = 2,
const size_t numClasses = 20,
const bool normalize = true)
{
// See if we can change this to v4 / v5.
mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \
are version 1 to version 3.");

mlpack::Log::Assert(typeid(annotations) == typeid(arma::field<arma::vec>),
"Use Field type to represent annotations.");

size_t batchSize = annotations.n_cols;
size_t numPredictions = 5 * numBoxes + numClasses;
if (version > 1)
{
// Each bounding boxes has a corresponding class.
numPredictions = numBoxes * (5 + numClasses);
}

double cellSizeHeight = (double) 1.0 / gridHeight;
double cellSizeWidth = (double) 1.0 / gridWidth;

// Set size of output and use cubes convenience.
output.set_size(gridWidth * gridHeight * numPredictions, batchSize);
output.zeros();

// Use offset to create a cube for a particular column / batch.
size_t offset = 0;
for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++)
{
arma::cube outputTemp(const_cast<arma::Mat<eT> &>(output).memptr() +
offset, gridHeight, gridWidth, numPredictions, false, false);
offset += gridWidth * gridHeight * numPredictions;

// Get the bounding box and labels corresponding to current image.
arma::mat labels(1, annotations(0, boxIdx).n_elem / 5);
arma::mat boundingBoxes(4, annotations(0, boxIdx).n_elem / 5);
for (size_t i = 0; i < boundingBoxes.n_cols; i++)
{
labels.col(i)(0) = annotations(0, boxIdx)(i * 5);
boundingBoxes.col(i) = annotations(0, boxIdx)(arma::span(i * 5 + 1,
(i + 1) * 5 - 1));
}

// For YOLOv2 or higher, each bounding box can represent a class
// so we don't repeat labels as done for YOLOv1. We will use map
// to store last inserted bounding box.
std::map<std::pair<size_t, size_t>, size_t> boundingBoxOffset;

// Normalize the coordinates.
boundingBoxes.row(0) /= imageWidth;
boundingBoxes.row(2) /= imageWidth;
boundingBoxes.row(1) /= imageHeight;
boundingBoxes.row(3) /= imageHeight;

// Get width and height as well as centres for the bounding box.
arma::mat widthAndHeight(2, boundingBoxes.n_cols);
widthAndHeight.row(0) = (boundingBoxes.row(2) - boundingBoxes.row(0));
widthAndHeight.row(1) = (boundingBoxes.row(3) - boundingBoxes.row(1));

arma::mat centres(2, boundingBoxes.n_cols);
centres.row(0) = (boundingBoxes.row(2) + boundingBoxes.row(0)) / 2.0;
centres.row(1) = (boundingBoxes.row(3) + boundingBoxes.row(1)) / 2.0;

// Assign bounding boxes to the grid.
for (size_t i = 0; i < boundingBoxes.n_cols; i++)
{
// Index for representing bounding box on grid.
arma::vec gridCoordinates = centres.col(i);
arma::vec centreCoordinates = centres.col(i);

if (normalize)
{
gridCoordinates(0) = std::ceil(gridCoordinates(0) /
cellSizeWidth) - 1;
gridCoordinates(1) = std::ceil(gridCoordinates(1) /
cellSizeHeight) - 1;
}
else
{
gridCoordinates(0) = std::ceil((gridCoordinates(0) /
imageWidth) / cellSizeWidth) - 1;
gridCoordinates(1) = std::ceil((gridCoordinates(1) /
imageHeight) / cellSizeHeight) - 1;
}


size_t gridX = gridCoordinates(0);
size_t gridY = gridCoordinates(1);
gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth;
gridCoordinates(1) = gridCoordinates(1) * cellSizeHeight;

// Normalize to 1.0.
gridCoordinates = centres.col(i) - gridCoordinates;
gridCoordinates(0) /= cellSizeWidth;
gridCoordinates(1) /= cellSizeHeight;

if (normalize)
centreCoordinates = gridCoordinates;

if (version == 1)
{
// Fill elements in the grid.
for (size_t k = 0; k < numBoxes; k++)
{
size_t s = 5 * k;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(s, s + 1)) = centreCoordinates;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(s + 2, s + 3)) = widthAndHeight.col(i);
outputTemp(gridX, gridY, s + 4) = 1.0;
}
outputTemp(gridX, gridY, 5 * numBoxes + labels.col(i)(0)) = 1;
}
else
{
size_t s = 0;
if (boundingBoxOffset.count({gridX, gridY}))
{
s = boundingBoxOffset[{gridX, gridY}] + 1;
boundingBoxOffset[{gridX, gridY}]++;
}
else
boundingBoxOffset.insert({{gridX, gridY}, s});

if (s > numBoxes)
continue;

size_t bBoxOffset = (5 + numClasses) * s;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(bBoxOffset, bBoxOffset + 1)) = centreCoordinates;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(bBoxOffset + 2,
bBoxOffset + 3)) = widthAndHeight.col(i);
outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0;
outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1;
}
}
}
}
};

#endif
17 changes: 17 additions & 0 deletions loss_functions/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
project(loss_functions)

set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/)

set(SOURCES
yolo_loss.hpp
yolo_loss_impl.hpp
)

foreach(file ${SOURCES})
set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
endforeach()

# Append sources (with directory name) to list of all models sources (used at
# the parent scope).
set(DIRS ${DIRS} ${DIR_SRCS} PARENT_SCOPE)
158 changes: 158 additions & 0 deletions loss_functions/yolo_loss.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/**
* @file yolo_loss.cpp
* @author Kartik Dutt
*
* Loss function for training YOLO model.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/

#ifndef MODELS_LOSS_FUNCTIONS_YOLO_LOSS_HPP
#define MODELS_LOSS_FUNCTIONS_YOLO_LOSS_HPP

#include <mlpack/prereqs.hpp>
#include <mlpack/core/metrics/iou_metric.hpp>

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

/**
* The YOLO loss function is used to decode output of YOLO model and train
* it.
*
* @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
* @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
*/
template <
typename InputDataType = arma::mat,
typename OutputDataType = arma::mat
>
class YOLOLoss
{
public:
/**
* Create the YOLOLoss object.
*
* @param version Version of YOLO model used in training.
* @param gridWidth Width of output feature map of YOLO model.
* @param gridHeight Height of output feature map of YOLO model.
* @param numBoxes Number of bounding boxes per grid.
* @param numClasses Number of classes in training set.
* @param lambdaCoordinates Multiplicative factor for loss obtained from
* coordinates.
* @param lambdaObjectness Multiplicative factor for loss obtained from
* misclassification.
*/
YOLOLoss(const size_t version = 1,
const size_t gridWidth = 7,
const size_t gridHeight = 7,
const size_t numBoxes = 2,
const size_t numClasses = 20,
const double lambdaCoordinates = 5.0,
const double lambdaObjectness = 0.5);

/**
* Computes the YOLO loss function.
*
* @param input Input data used for evaluating the specified function.
* @param target The target vector.
*/
template<typename InputType, typename TargetType>
typename InputType::elem_type Forward(const InputType &input,
const TargetType &target);

/**
* Ordinary feed backward pass of a neural network.
*
* @param input The propagated input activation.
* @param target The target vector.
* @param output The calculated error.
*/
template<typename InputType, typename TargetType, typename OutputType>
void Backward(const InputType& input,
const TargetType& target,
OutputType& output);

//! Get the output parameter.
OutputDataType& OutputParameter() const { return outputParameter; }
//! Modify the output parameter.
OutputDataType& OutputParameter() { return outputParameter; }

//! Get the version.
size_t Version() const { return version; }
//! Modify the version.
size_t& Version() { return version; }

//! Get the Grid Width.
size_t GridWidth() const { return gridWidth; }
//! Modify the Grid Width.
size_t& GridWidth() { return gridWidth; }

//! Get the Grid Height.
size_t GridHeight() const { return gridHeight; }
//! Modify the Grid Height.
size_t& GridHeight() { return gridHeight; }

//! Get the Number of boxes.
size_t NumBoxes() const { return numBoxes; }
//! Modify the Number of boxes.
size_t& NumBoxes() { return numBoxes; }

//! Get the Number of classes.
size_t NumClasses() const { return numClasses; }
//! Modify the Number of classes.
size_t& NumClasses() { return numClasses; }

//! Get the lambdaCoordinates.
double LambdaCoordinates() const { return lambdaCoordinates; }
//! Modify the lambdaCoordinates.
double& LambdaCoordinates() { return lambdaCoordinates; }

//! Get the lambdaObjectness.
double LambdaObjectness() const { return lambdaObjectness; }
//! Modify the lambdaObjectness.
double& LambdaObjectness() { return lambdaObjectness; }

/**
* Serialize the layer.
*/
template <typename Archive>
void serialize(Archive &ar, const unsigned int /* version */);

private:
//! Version of YOLO model used in training.
size_t version;

//! Width of output feature map of YOLO model.
size_t gridWidth;

//! Height of output feature map of YOLO model.
size_t gridHeight;

//! Number of bounding boxes per grid.
size_t numBoxes;

//! Number of classes in training set.
size_t numClasses;

//! Multiplicative factor for loss obtained from coordinates.
double lambdaCoordinates;

//! Multiplicative factor for loss obtained from misclassification.
double lambdaObjectness;

//! Locally-stored output parameter object.
OutputDataType outputParameter;
};

} // namespace ann
} // namespace mlpack

#include "yolo_loss_impl.hpp"

#endif
Loading