Skip to content

Commit

Permalink
Merge pull request #24 from kartikdutt18/Yolov1
Browse files Browse the repository at this point in the history
Add YOLO class.
  • Loading branch information
KimSangYeon-DGU authored Aug 31, 2020
2 parents cfa9d70 + f1c3bbf commit 49dcafb
Show file tree
Hide file tree
Showing 5 changed files with 454 additions and 0 deletions.
1 change: 1 addition & 0 deletions models/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
project(models)

add_subdirectory(darknet)
add_subdirectory(yolo)

# Add directory name to sources.
set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/)
Expand Down
16 changes: 16 additions & 0 deletions models/yolo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.1.0 FATAL_ERROR)
project(yolo)

set(DIR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../")

set(SOURCES
yolo.hpp
yolo_impl.hpp
)

foreach(file ${SOURCES})
set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
endforeach()

set(DIRS ${DIRS} ${DIR_SRCS} PARENT_SCOPE)
254 changes: 254 additions & 0 deletions models/yolo/yolo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
/**
* @file yolo.hpp
* @author Kartik Dutt
*
* Definition of Yolo models.
*
* For more information, kindly refer to the following paper.
*
* Paper for YOLOv1.
*
* @code
* @article{Redmon2016,
* author = {Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi},
* title = {You Only Look Once : Unified, Real-Time Object Detection},
* year = {2016},
* url = {https://arxiv.org/pdf/1506.02640.pdf}
* }
* @endcode
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/

#ifndef MODELS_YOLO_HPP
#define MODELS_YOLO_HPP

#include <mlpack/core.hpp>
#include <mlpack/methods/ann/layer/layer.hpp>
#include <mlpack/methods/ann/ffn.hpp>
#include <mlpack/methods/ann/layer/layer_types.hpp>
#include <mlpack/methods/ann/init_rules/random_init.hpp>


namespace mlpack {
namespace ann /** Artificial Neural Network. */{

/**
* Definition of a YOLO object detection models.
*
* @tparam OutputLayerType The output layer type used to evaluate the network.
* @tparam InitializationRuleType Rule used to initialize the weight matrix.
* @tparam YOLOVersion Version of YOLO model.
*/
template<
typename OutputLayerType = NegativeLogLikelihood<>,
typename InitializationRuleType = RandomInitialization
>
class YOLO
{
public:
//! Create the YOLO model.
YOLO();

/**
* YOLO constructor intializes input shape and number of classes.
*
* @param inputChannels Number of input channels of the input image.
* @param inputWidth Width of the input image.
* @param inputHeight Height of the input image.
* @param yoloVersion Version of YOLO model.
* @param numClasses Optional number of classes to classify images into,
* only to be specified if includeTop is true.
* @param numBoxes Number of bounding boxes per image.
* @param featureSizeWidth Width of output feature map.
* @param featureSizeHeight Height of output feature map.
* @param weights One of 'none', 'voc'(pre-training on VOC-2012) or path to weights.
* @param includeTop Must be set to true if weights are set.
*/
YOLO(const size_t inputChannel,
const size_t inputWidth,
const size_t inputHeight,
const std::string yoloVersion = "v1-tiny",
const size_t numClasses = 20,
const size_t numBoxes = 2,
const size_t featureSizeWidth = 7,
const size_t featureSizeHeight = 7,
const std::string& weights = "none",
const bool includeTop = true);

/**
* YOLO constructor intializes input shape and number of classes.
*
* @param inputShape A three-valued tuple indicating input shape.
* First value is number of Channels (Channels-First).
* Second value is input height. Third value is input width.
* @param yoloVersion Version of YOLO model.
* @param numClasses Optional number of classes to classify images into,
* only to be specified if includeTop is true.
* @param numBoxes Number of bounding boxes per image.
* @param featureShape A twp-valued tuple indicating width and height of output feature
* map.
* @param weights One of 'none', 'voc'(pre-training on VOC) or path to weights.
*/
YOLO(const std::tuple<size_t, size_t, size_t> inputShape,
const std::string yoloVersion = "v1-tiny",
const size_t numClasses = 1000,
const size_t numBoxes = 2,
const std::tuple<size_t, size_t> featureShape = {7, 7},
const std::string& weights = "none",
const bool includeTop = true);

//! Get Layers of the model.
FFN<OutputLayerType, InitializationRuleType>& GetModel() { return yolo; }

//! Load weights into the model.
void LoadModel(const std::string& filePath);

//! Save weights for the model.
void SaveModel(const std::string& filePath);

private:
/**
* Adds Convolution Block.
*
* @tparam SequentialType Layer type in which convolution block will
* be added.
*
* @param inSize Number of input maps.
* @param outSize Number of output maps.
* @param kernelWidth Width of the filter/kernel.
* @param kernelHeight Height of the filter/kernel.
* @param strideWidth Stride of filter application in the x direction.
* @param strideHeight Stride of filter application in the y direction.
* @param padW Padding width of the input.
* @param padH Padding height of the input.
* @param batchNorm Boolean to determine whether a batch normalization
* layer is added.
* @param baseLayer Layer in which Convolution block will be added, if
* NULL added to YOLO FFN.
*/
template<typename SequentialType = Sequential<>>
void ConvolutionBlock(const size_t inSize,
const size_t outSize,
const size_t kernelWidth,
const size_t kernelHeight,
const size_t strideWidth = 1,
const size_t strideHeight = 1,
const size_t padW = 0,
const size_t padH = 0,
const bool batchNorm = false,
SequentialType* baseLayer = NULL)
{
Sequential<>* bottleNeck = new Sequential<>();
bottleNeck->Add(new Convolution<>(inSize, outSize, kernelWidth,
kernelHeight, strideWidth, strideHeight, padW, padH, inputWidth,
inputHeight));

mlpack::Log::Info << "Conv Layer. ";
mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight <<
", " << inSize << ") ----> ";

inputWidth = ConvOutSize(inputWidth, kernelWidth, strideWidth, padW);
inputHeight = ConvOutSize(inputHeight, kernelHeight, strideHeight, padH);
mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight <<
", " << outSize << ")" << std::endl;

if (batchNorm)
bottleNeck->Add(new BatchNorm<>(outSize, 1e-8, false));

bottleNeck->Add(new LeakyReLU<>(0.01));

if (baseLayer != NULL)
baseLayer->Add(bottleNeck);
else
yolo.Add(bottleNeck);
}

/**
* Adds Pooling Block.
*
* @param factor The factor by which input dimensions will be divided.
* @param type One of "max" or "mean". Determines whether add mean pooling
* layer or max pooling layer.
*/
void PoolingBlock(const size_t factor = 2,
const std::string type = "max")
{
if (type == "max")
{
yolo.Add(new AdaptiveMaxPooling<>(std::ceil(inputWidth * 1.0 / factor),
std::ceil(inputHeight * 1.0 / factor)));
}
else
{
yolo.Add(new AdaptiveMeanPooling<>(std::ceil(inputWidth * 1.0 /
factor), std::ceil(inputHeight * 1.0 / factor)));
}

mlpack::Log::Info << "Pooling Layer. ";
mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight <<
") ----> ";
// Update inputWidth and inputHeight.
inputWidth = std::ceil(inputWidth * 1.0 / factor);
inputHeight = std::ceil(inputHeight * 1.0 / factor);

mlpack::Log::Info << "(" << inputWidth << ", " << inputHeight << ")" << std::endl;
}

/**
* Return the convolution output size.
*
* @param size The size of the input (row or column).
* @param k The size of the filter (width or height).
* @param s The stride size (x or y direction).
* @param padding The size of the padding (width or height) on one side.
* @return The convolution output size.
*/
size_t ConvOutSize(const size_t size,
const size_t k,
const size_t s,
const size_t padding)
{
return std::floor(size + 2 * padding - k) / s + 1;
}

//! Locally stored YOLO Model.
FFN<OutputLayerType, InitializationRuleType> yolo;

//! Locally stored width of the image.
size_t inputWidth;

//! Locally stored height of the image.
size_t inputHeight;

//! Locally stored number of channels in the image.
size_t inputChannel;

//! Locally stored number of output classes.
size_t numClasses;

//! Locally stored number of output bounding boxes.
size_t numBoxes;

//! Locally stored width of output feature map.
size_t featureWidth;

//! Locally stored height of output feature map.
size_t featureHeight;

//! Locally stored type of pre-trained weights.
std::string weights;

//! Locally stored version of yolo model.
std::string yoloVersion;
}; // YOLO class.

} // namespace ann
} // namespace mlpack

# include "yolo_impl.hpp"

#endif
Loading

0 comments on commit 49dcafb

Please sign in to comment.