Skip to content

Commit

Permalink
Merge pull request #25 from kartikdutt18/YOLOPreProcessor
Browse files Browse the repository at this point in the history
Add YOLO PreProcessor
  • Loading branch information
saksham189 authored Sep 11, 2020
2 parents 5bb3309 + 7275abf commit 27c4814
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 1 deletion.
169 changes: 168 additions & 1 deletion dataloader/preprocessor.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* @file dataloader.hpp
* @file preprocessor.hpp
* @author Kartik Dutt
*
* Definition of PreProcessor class for popular datasets.
Expand Down Expand Up @@ -93,6 +93,173 @@ class PreProcessor
trainFeatures(i) = ((uint8_t)(trainFeatures(i)) / 255.0);
}
}

/**
* PreProcessor for YOLO model. Converts arma::field type annotations to
* arma::mat type for training YOLO model. Each column in target matrix has
* the size : gridWidth * gridHeight * (5 * numBoxes + classes).
*
* @param annotations Field object created using model's dataloader containing
* annotation for images.
* @param output Output matrix where output will be stored.
* @param imageWidth Width of image used for training YOLO model.
* @param imageHeight Height of image used for training YOLO model.
* @param gridWidth Width of output feature map of YOLO model.
* @param gridHeight Height of output feature map of YOLO model.
* @param numBoxes Number of bounding boxes per grid.
* @param numClasses Number of classes in training set.
* @param normalize Boolean to determine whether coordinates are to
* to be normalized or not. Defaults to true.
*
* Note : This function must be called manually before model is used.
*/
template<typename eT>
static void YOLOPreProcessor(const DatasetY& annotations,
arma::Mat<eT>& output,
const size_t version = 1,
const size_t imageWidth = 224,
const size_t imageHeight = 224,
const size_t gridWidth = 7,
const size_t gridHeight = 7,
const size_t numBoxes = 2,
const size_t numClasses = 20,
const bool normalize = true)
{
// See if we can change this to v4 / v5.
mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \
are version 1 to version 3.");

mlpack::Log::Assert(typeid(annotations) == typeid(arma::field<arma::vec>),
"Use Field type to represent annotations.");

size_t batchSize = annotations.n_cols;
size_t numPredictions = 5 * numBoxes + numClasses;
if (version > 1)
{
// Each bounding boxes has a corresponding class.
numPredictions = numBoxes * (5 + numClasses);
}

double cellSizeHeight = (double) 1.0 / gridHeight;
double cellSizeWidth = (double) 1.0 / gridWidth;

// Set size of output and use cubes convenience.
output.set_size(gridWidth * gridHeight * numPredictions, batchSize);
output.zeros();

// Use offset to create a cube for a particular column / batch.
size_t offset = 0;
for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++)
{
arma::cube outputTemp(const_cast<arma::Mat<eT> &>(output).memptr() +
offset, gridHeight, gridWidth, numPredictions, false, false);
offset += gridWidth * gridHeight * numPredictions;

// Get the bounding box and labels corresponding to current image.
arma::mat labels(1, annotations(0, boxIdx).n_elem / 5);
arma::mat boundingBoxes(4, annotations(0, boxIdx).n_elem / 5);
for (size_t i = 0; i < boundingBoxes.n_cols; i++)
{
labels.col(i)(0) = annotations(0, boxIdx)(i * 5);
boundingBoxes.col(i) = annotations(0, boxIdx)(arma::span(i * 5 + 1,
(i + 1) * 5 - 1));
}

// For YOLOv2 or higher, each bounding box can represent a class
// so we don't repeat labels as done for YOLOv1. We will use map
// to store last inserted bounding box.
std::map<std::pair<size_t, size_t>, size_t> boundingBoxOffset;

// Normalize the coordinates.
boundingBoxes.row(0) /= imageWidth;
boundingBoxes.row(2) /= imageWidth;
boundingBoxes.row(1) /= imageHeight;
boundingBoxes.row(3) /= imageHeight;

// Get width and height as well as centres for the bounding box.
arma::mat widthAndHeight(2, boundingBoxes.n_cols);
widthAndHeight.row(0) = (boundingBoxes.row(2) - boundingBoxes.row(0));
widthAndHeight.row(1) = (boundingBoxes.row(3) - boundingBoxes.row(1));

arma::mat centres(2, boundingBoxes.n_cols);
centres.row(0) = (boundingBoxes.row(2) + boundingBoxes.row(0)) / 2.0;
centres.row(1) = (boundingBoxes.row(3) + boundingBoxes.row(1)) / 2.0;

// Assign bounding boxes to the grid.
for (size_t i = 0; i < boundingBoxes.n_cols; i++)
{
// Index for representing bounding box on grid.
arma::vec gridCoordinates = centres.col(i);
arma::vec centreCoordinates = centres.col(i);

if (normalize)
{
gridCoordinates(0) = std::ceil(gridCoordinates(0) /
cellSizeWidth) - 1;
gridCoordinates(1) = std::ceil(gridCoordinates(1) /
cellSizeHeight) - 1;
}
else
{
gridCoordinates(0) = std::ceil((gridCoordinates(0) /
imageWidth) / cellSizeWidth) - 1;
gridCoordinates(1) = std::ceil((gridCoordinates(1) /
imageHeight) / cellSizeHeight) - 1;
}

size_t gridX = gridCoordinates(0);
size_t gridY = gridCoordinates(1);
gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth;
gridCoordinates(1) = gridCoordinates(1) * cellSizeHeight;

// Normalize to 1.0.
gridCoordinates = centres.col(i) - gridCoordinates;
gridCoordinates(0) /= cellSizeWidth;
gridCoordinates(1) /= cellSizeHeight;

if (normalize)
centreCoordinates = gridCoordinates;

if (version == 1)
{
// Fill elements in the grid.
for (size_t k = 0; k < numBoxes; k++)
{
size_t s = 5 * k;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(s, s + 1)) = centreCoordinates;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(s + 2, s + 3)) = widthAndHeight.col(i);
outputTemp(gridX, gridY, s + 4) = 1.0;
}
outputTemp(gridX, gridY, 5 * numBoxes + labels.col(i)(0)) = 1;
}
else
{
size_t s = 0;
if (boundingBoxOffset.count({gridX, gridY}))
{
s = boundingBoxOffset[{gridX, gridY}] + 1;
boundingBoxOffset[{gridX, gridY}]++;
}
else
boundingBoxOffset.insert({{gridX, gridY}, s});

if (s > numBoxes)
continue;

size_t bBoxOffset = (5 + numClasses) * s;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(bBoxOffset, bBoxOffset + 1)) = centreCoordinates;
outputTemp(arma::span(gridX), arma::span(gridY),
arma::span(bBoxOffset + 2,
bBoxOffset + 3)) = widthAndHeight.col(i);
outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0;
outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1;
}
}
}
}
};

#endif
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_executable(models_test
augmentation_tests.cpp
ffn_model_tests.cpp
dataloader_tests.cpp
preprocessor_tests.cpp
utils_tests.cpp
)

Expand Down
103 changes: 103 additions & 0 deletions tests/preprocessor_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/**
* @file preprocessor_tests.cpp
* @author Kartik Dutt
*
* Tests for various functionalities of PreProcessor class.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/

#define BOOST_TEST_DYN_LINK
#include <dataloader/preprocessor.hpp>
#include <dataloader/dataloader.hpp>
#include <boost/test/unit_test.hpp>

using namespace boost::unit_test;

BOOST_AUTO_TEST_SUITE(PreProcessorsTest);

BOOST_AUTO_TEST_CASE(YOLOPreProcessor)
{
arma::field<arma::vec> input;
input.set_size(1, 1);

arma::vec bBox(5);
bBox << 2 << 84 << 48 << 493 << 387 << arma::endr;
input(0, 0) = bBox;
arma::mat output;

// Single input check.
PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
input, output, 1, 500, 387);
BOOST_REQUIRE_CLOSE(arma::accu(output), 8.3342, 1e-3);

input.clear();
input.set_size(1, 3);
input(0, 0) = bBox;

// Multiple bounding boxes check.
bBox.clear();
bBox.set_size(15);
bBox << 8 << 341 << 217 << 487 << 375 << 8 << 114 << 209 << 183 <<
298 << 19 << 237 << 110 << 320 << 176 << arma::endr;
input(0, 1) = bBox;

bBox.clear();
bBox.set_size(5);
bBox << 7 << 157 << 90 << 486 << 372 << arma::endr;
input(0, 2) = bBox;

PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
input, output, 1, 500, 387);

arma::vec desiredSum(3);
desiredSum << 8.3342 << 18.4093 << 7.13195 << arma::endr;
for (size_t i = 0; i < output.n_cols; i++)
BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3);

desiredSum << 4.6671 << 10.70465 << 4.065975 << arma::endr;
PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
input, output, 3, 500, 387);
for (size_t i = 0; i < output.n_cols; i++)
BOOST_REQUIRE_CLOSE(arma::accu(output.col(i)), desiredSum(i), 1e-3);


// For better unit testing, we create a very small output grid of size
// numBoxes * 5 + numClasses, where numBoxes = 1, numClasses = 2.
// The grid width and height will be 2 x 2. Hence, for
// single input label, target map will be of size 1 x 2 x 2 x 7.
input.clear();
input.set_size(1, 1);
bBox.clear();
bBox.set_size(5);
bBox << 0 << 157 << 90 << 486 << 300 << arma::endr;
input(0, 0) = bBox;

PreProcessor<arma::mat, arma::field<arma::vec>>::YOLOPreProcessor(
input, output, 1, 500, 387, 2, 2, 1, 2);

arma::mat desiredOutput(2 * 2 * 7, 1);
desiredOutput.zeros();
// To convert 4d Tensor to 1D array use tensor.numpy().ravel().
desiredOutput(3) = 0.2860;
desiredOutput(7) = 0.0078;
desiredOutput(11) = 0.6580;
desiredOutput(15) = 0.5426;
desiredOutput(19) = 1.0;
desiredOutput(23) = 1.0;

// check for each value in matrix.
double tolerance = 1e-1;
for (size_t i = 0; i < output.n_elem; i++)
{
if (std::abs(output(i)) < tolerance / 2)
BOOST_REQUIRE_SMALL(desiredOutput(i), tolerance / 2);
else
BOOST_REQUIRE_CLOSE(desiredOutput(i), output(i), 1e-2);
}
}

BOOST_AUTO_TEST_SUITE_END();

0 comments on commit 27c4814

Please sign in to comment.