Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add YOLO class. #24

Merged
merged 8 commits into from
Aug 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions models/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
project(models)

add_subdirectory(darknet)
add_subdirectory(yolo)

# Add directory name to sources.
set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/)
Expand Down
16 changes: 16 additions & 0 deletions models/yolo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
project(yolo)

set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../")

set(SOURCES
yolo.hpp
yolo_impl.hpp
)

foreach(file ${SOURCES})
set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
endforeach()

set(DIRS ${DIRS} ${DIR_SRCS} PARENT_SCOPE)
254 changes: 254 additions & 0 deletions models/yolo/yolo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
/**
* @file yolo.hpp
* @author Kartik Dutt
*
* Definition of Yolo models.
*
* For more information, kindly refer to the following paper.
*
* Paper for YOLOv1.
*
* @code
* @article{Redmon2016,
* author = {Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi},
* title = {You Only Look Once : Unified, Real-Time Object Detection},
* year = {2016},
* url = {https://arxiv.org/pdf/1506.02640.pdf}
* }
* @endcode
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/

#ifndef MODELS_YOLO_HPP
#define MODELS_YOLO_HPP

#include <mlpack/core.hpp>
#include <mlpack/methods/ann/layer/layer.hpp>
#include <mlpack/methods/ann/ffn.hpp>
#include <mlpack/methods/ann/layer/layer_types.hpp>
#include <mlpack/methods/ann/init_rules/random_init.hpp>


namespace mlpack {
namespace ann /** Artificial Neural Network. */{

/**
* Definition of a YOLO object detection models.
*
* @tparam OutputLayerType The output layer type used to evaluate the network.
* @tparam InitializationRuleType Rule used to initialize the weight matrix.
* @tparam YOLOVersion Version of YOLO model.
*/
template<
typename OutputLayerType = NegativeLogLikelihood<>,
typename InitializationRuleType = RandomInitialization
>
class YOLO
{
public:
//! Create the YOLO model.
YOLO();

/**
* YOLO constructor intializes input shape and number of classes.
*
* @param inputChannels Number of input channels of the input image.
* @param inputWidth Width of the input image.
* @param inputHeight Height of the input image.
* @param yoloVersion Version of YOLO model.
* @param numClasses Optional number of classes to classify images into,
* only to be specified if includeTop is true.
* @param numBoxes Number of bounding boxes per image.
* @param featureSizeWidth Width of output feature map.
* @param featureSizeHeight Height of output feature map.
* @param weights One of 'none', 'voc'(pre-training on VOC-2012) or path to weights.
* @param includeTop Must be set to true if weights are set.
*/
YOLO(const size_t inputChannel,
const size_t inputWidth,
const size_t inputHeight,
const std::string yoloVersion = "v1-tiny",
const size_t numClasses = 20,
const size_t numBoxes = 2,
const size_t featureSizeWidth = 7,
const size_t featureSizeHeight = 7,
const std::string& weights = "none",
const bool includeTop = true);

/**
* YOLO constructor intializes input shape and number of classes.
*
* @param inputShape A three-valued tuple indicating input shape.
* First value is number of Channels (Channels-First).
* Second value is input height. Third value is input width.
* @param yoloVersion Version of YOLO model.
* @param numClasses Optional number of classes to classify images into,
* only to be specified if includeTop is true.
* @param numBoxes Number of bounding boxes per image.
* @param featureShape A twp-valued tuple indicating width and height of output feature
* map.
* @param weights One of 'none', 'voc'(pre-training on VOC) or path to weights.
*/
YOLO(const std::tuple<size_t, size_t, size_t> inputShape,
const std::string yoloVersion = "v1-tiny",
const size_t numClasses = 1000,
const size_t numBoxes = 2,
const std::tuple<size_t, size_t> featureShape = {7, 7},
const std::string& weights = "none",
const bool includeTop = true);

//! Get Layers of the model.
FFN<OutputLayerType, InitializationRuleType>& GetModel() { return yolo; }

//! Load weights into the model.
void LoadModel(const std::string& filePath);

//! Save weights for the model.
void SaveModel(const std::string& filePath);

private:
/**
* Adds Convolution Block.
*
* @tparam SequentialType Layer type in which convolution block will
* be added.
*
* @param inSize Number of input maps.
* @param outSize Number of output maps.
* @param kernelWidth Width of the filter/kernel.
* @param kernelHeight Height of the filter/kernel.
* @param strideWidth Stride of filter application in the x direction.
* @param strideHeight Stride of filter application in the y direction.
* @param padW Padding width of the input.
* @param padH Padding height of the input.
* @param batchNorm Boolean to determine whether a batch normalization
* layer is added.
* @param baseLayer Layer in which Convolution block will be added, if
* NULL added to YOLO FFN.
*/
template<typename SequentialType = Sequential<>>
void ConvolutionBlock(const size_t inSize,
const size_t outSize,
const size_t kernelWidth,
const size_t kernelHeight,
const size_t strideWidth = 1,
const size_t strideHeight = 1,
const size_t padW = 0,
const size_t padH = 0,
const bool batchNorm = false,
SequentialType* baseLayer = NULL)
{
Sequential<>* bottleNeck = new Sequential<>();
bottleNeck->Add(new Convolution<>(inSize, outSize, kernelWidth,
kernelHeight, strideWidth, strideHeight, padW, padH, inputWidth,
inputHeight));

mlpack::Log::Info << "Conv Layer. ";
mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight <<
", " << inSize << ") ----> ";

inputWidth = ConvOutSize(inputWidth, kernelWidth, strideWidth, padW);
inputHeight = ConvOutSize(inputHeight, kernelHeight, strideHeight, padH);
mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight <<
", " << outSize << ")" << std::endl;

if (batchNorm)
bottleNeck->Add(new BatchNorm<>(outSize, 1e-8, false));

bottleNeck->Add(new LeakyReLU<>(0.01));

if (baseLayer != NULL)
baseLayer->Add(bottleNeck);
else
yolo.Add(bottleNeck);
}

/**
* Adds Pooling Block.
*
* @param factor The factor by which input dimensions will be divided.
* @param type One of "max" or "mean". Determines whether add mean pooling
* layer or max pooling layer.
*/
void PoolingBlock(const size_t factor = 2,
const std::string type = "max")
{
if (type == "max")
{
yolo.Add(new AdaptiveMaxPooling<>(std::ceil(inputWidth * 1.0 / factor),
std::ceil(inputHeight * 1.0 / factor)));
}
else
{
yolo.Add(new AdaptiveMeanPooling<>(std::ceil(inputWidth * 1.0 /
factor), std::ceil(inputHeight * 1.0 / factor)));
}

mlpack::Log::Info << "Pooling Layer. ";
mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight <<
") ----> ";
// Update inputWidth and inputHeight.
inputWidth = std::ceil(inputWidth * 1.0 / factor);
inputHeight = std::ceil(inputHeight * 1.0 / factor);

mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight << ")" << std::endl;
}

/**
* Return the convolution output size.
*
* @param size The size of the input (row or column).
* @param k The size of the filter (width or height).
* @param s The stride size (x or y direction).
* @param padding The size of the padding (width or height) on one side.
* @return The convolution output size.
*/
size_t ConvOutSize(const size_t size,
const size_t k,
const size_t s,
const size_t padding)
{
return std::floor(size + 2 * padding - k) / s + 1;
}

//! Locally stored YOLO Model.
FFN<OutputLayerType, InitializationRuleType> yolo;

//! Locally stored width of the image.
size_t inputWidth;

//! Locally stored height of the image.
size_t inputHeight;

//! Locally stored number of channels in the image.
size_t inputChannel;

//! Locally stored number of output classes.
size_t numClasses;

//! Locally stored number of output bounding boxes.
size_t numBoxes;

//! Locally stored width of output feature map.
size_t featureWidth;

//! Locally stored height of output feature map.
size_t featureHeight;

//! Locally stored type of pre-trained weights.
std::string weights;

//! Locally stored version of yolo model.
std::string yoloVersion;
}; // YOLO class.

} // namespace ann
} // namespace mlpack

# include "yolo_impl.hpp"

#endif
Loading