Skip to content

Commit

Permalink
intermediate review
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrhm committed Dec 2, 2024
1 parent fbab60e commit 8bff333
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 219 deletions.
13 changes: 7 additions & 6 deletions perception/object_detector/object_detector.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#include "object_detector.hpp"
#include <functional>

namespace mrover {

ObjectDetectorBase::ObjectDetectorBase(rclcpp::NodeOptions const& options) : rclcpp::Node(NODE_NAME, options), mLoopProfiler{get_logger()} {
std::string modelName;
float modelScoreThreshold{};
float modelNMSThreshold{};

std::vector<ParameterWrapper> params{
{"camera_frame", mCameraFrame, "zed_left_camera_frame"},
Expand All @@ -15,8 +14,8 @@ namespace mrover {
{"hitcount_threshold", mObjHitThreshold, 5},
{"hitcount_max", mObjMaxHitcount, 10},
{"model_name", modelName, "Large-Dataset"},
{"model_score_threshold", modelScoreThreshold, 0.75},
{"model_nms_threshold", modelNMSThreshold, 0.5}};
{"model_score_threshold", mModelScoreThreshold, 0.75},
{"model_nms_threshold", mModelNMSThreshold, 0.5}};

ParameterWrapper::declareParameters(this, params);

Expand All @@ -31,9 +30,11 @@ namespace mrover {
// Initialize TensorRT Inference Object and Get Important Output Information
mTensorRT = TensortRT{modelName, packagePath.string()};

mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output);
using namespace std::placeholders;

RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold));
mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), [this](Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob){preprocessYOLOv8Input(model, rgbImage, blobSizedImage, blob);}, [this](Model const& model, cv::Mat& output, std::vector<Detection>& detections){parseYOLOv8Output(model, output, detections);});

RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, mModelScoreThreshold, mModelNMSThreshold));
}

StereoObjectDetector::StereoObjectDetector(rclcpp::NodeOptions const& options) : ObjectDetectorBase(options) {
Expand Down
35 changes: 22 additions & 13 deletions perception/object_detector/object_detector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,16 @@
namespace mrover {

class ObjectDetectorBase : public rclcpp::Node {

protected:
struct Detection {
int classId{};
std::string className;
float confidence{};
cv::Rect box;

Detection(int _classId, std::string _className, float _confidence, cv::Rect _box) : classId{_classId}, className{_className}, confidence{_confidence}, box{_box}{}
};

struct Model {
std::string modelName;

Expand All @@ -18,20 +26,19 @@ namespace mrover {

std::vector<int64_t> outputTensorSize;

// Additional space for params that one algorithm might need and not others
std::vector<float> buffer;

// Converts an rgb image to a blob
std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rbgImageToBlob;
std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rgbImageToBlob;

// Converts an output tensor into a vector of detections
std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> outputTensorToDetections;

Model() = default;

Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::vector<float> _buffer, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), rgbImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
};



static constexpr char const* NODE_NAME = "object_detector";

std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
Expand All @@ -57,6 +64,8 @@ namespace mrover {
int mObjDecrementWeight{};
int mObjHitThreshold{};
int mObjMaxHitcount{};
float mModelScoreThreshold{};
float mModelNMSThreshold{};

auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr,
std::size_t u, std::size_t v,
Expand All @@ -66,15 +75,15 @@ namespace mrover {
std::span<Detection const> detections,
cv::Size const& imageSize = {640, 640}) -> void;

auto publishDetectedObjects(cv::InputArray image) -> void;

static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void;
auto publishDetectedObjects(cv::InputArray const& image) -> void;

auto static parseYOLOv8Output(Model const& model,
static auto drawDetectionBoxes(cv::InputOutputArray& image, std::span<Detection const> detections) -> void;

auto parseYOLOv8Output(Model const& model,
cv::Mat& output,
std::vector<Detection>& detections) -> void;
std::vector<Detection>& detections) const -> void;

auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;
static auto preprocessYOLOv8Input(Model const& model, cv::Mat const& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;

public:
explicit ObjectDetectorBase(rclcpp::NodeOptions const& options = rclcpp::NodeOptions());
Expand Down Expand Up @@ -105,7 +114,7 @@ namespace mrover {
public:
explicit ImageObjectDetector(rclcpp::NodeOptions const& options = rclcpp::NodeOptions());

auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float;
auto getTagBearing(cv::InputArray const& image, cv::Rect const& box) const -> float;

auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void;
};
Expand Down
36 changes: 11 additions & 25 deletions perception/object_detector/object_detector.parsing.cpp
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
#include "object_detector.hpp"
namespace mrover {
auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat const& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
if (model.inputTensorSize.size() != 4) {
throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?");
}

if (model.buffer.size() != 2) {
throw std::runtime_error("Expected 2 additional parameters!");
}

static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0;

cv::Size blobSize{static_cast<int32_t>(model.inputTensorSize[2]), static_cast<int32_t>(model.inputTensorSize[3])};
cv::resize(rgbImage, blobSizedImage, blobSize);
cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false);
}

auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) -> void {
auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) const -> void {
// Parse model specific dimensioning from the output

// The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows
Expand All @@ -26,12 +22,12 @@ namespace mrover {
throw std::runtime_error(message.data());
}

int rows = output.cols;
int dimensions = output.rows;
int numCandidates = output.cols;
int predictionDimension = output.rows;

// The output of the model is a batchSizex84x8400 matrix
// This converts the model to a TODO: Check this dimensioning
output = output.reshape(1, dimensions);
output = output.reshape(1, predictionDimension);
cv::transpose(output, output);

// This function expects the image to already be in the correct format thus no distrotion is needed
Expand All @@ -44,16 +40,16 @@ namespace mrover {
std::vector<cv::Rect> boxes;

// Each row of the output is a detection with a bounding box and associated class scores
for (int r = 0; r < rows; ++r) {
for (int r = 0; r < numCandidates; ++r) {
// Skip first four values as they are the box data
cv::Mat scores = output.row(r).colRange(4, dimensions);
cv::Mat scores = output.row(r).colRange(4, predictionDimension);

cv::Point classId;
double maxClassScore;
cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId);

// Determine if the class is an acceptable confidence level, otherwise disregard
if (maxClassScore <= model.buffer[0]) continue;
if (maxClassScore <= mModelScoreThreshold) continue;

confidences.push_back(static_cast<float>(maxClassScore));
classIds.push_back(classId.x);
Expand All @@ -77,22 +73,12 @@ namespace mrover {

//Coalesce the boxes into a smaller number of distinct boxes
std::vector<int> nmsResult;
cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult);
cv::dnn::NMSBoxes(boxes, confidences, mModelScoreThreshold, mModelNMSThreshold, nmsResult);

// Fill in the output Detections Vector
for (int i: nmsResult) {
Detection result;

//Fill in the id and confidence for the class
result.classId = classIds[i];
result.confidence = confidences[i];

//Fill in the class name and box information
result.className = model.classes[result.classId];
result.box = boxes[i];

//Push back the detection into the for storagevector
detections.push_back(result);
//Push back the detection into the for storage vector
detections.emplace_back(classIds[i], model.classes[classIds[i]], confidences[i], boxes[i]);
}
}

Expand Down
10 changes: 5 additions & 5 deletions perception/object_detector/object_detector.processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace mrover {

// Convert the RGB Image into the blob Image format
cv::Mat blobSizedImage;
mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);
mModel.rgbImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);

mLoopProfiler.measureEvent("Conversion");

Expand Down Expand Up @@ -151,7 +151,7 @@ namespace mrover {
}
}

auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray image) -> void {
auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray const& image) -> void {
mDetectionsImageMessage.header.stamp = get_clock()->now();
mDetectionsImageMessage.height = image.rows();
mDetectionsImageMessage.width = image.cols();
Expand Down Expand Up @@ -189,7 +189,7 @@ namespace mrover {

// Convert the RGB Image into the blob Image format
cv::Mat blobSizedImage;
mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);
mModel.rgbImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);

mLoopProfiler.measureEvent("Conversion");

Expand All @@ -207,7 +207,7 @@ namespace mrover {
mrover::msg::ImageTarget target;
target.name = className;
target.bearing = getTagBearing(blobSizedImage, box);
targets.targets.push_back(target);
targets.targets.emplace_back(target);
}

mTargetsPub->publish(targets);
Expand All @@ -218,7 +218,7 @@ namespace mrover {
mLoopProfiler.measureEvent("Publication");
}

auto ImageObjectDetector::getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float {
auto ImageObjectDetector::getTagBearing(cv::InputArray const& image, cv::Rect const& box) const -> float {
cv::Point2f center = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2;
float xNormalized = center.x / static_cast<float>(image.cols());
float xRecentered = 0.5f - xNormalized;
Expand Down
Loading

0 comments on commit 8bff333

Please sign in to comment.