Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring TensorRT for future development #11

Closed
wants to merge 14 commits into from
21 changes: 15 additions & 6 deletions perception/object_detector/object_detector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,39 @@ namespace mrover {

ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} {

std::string modelName;
float modelScoreThreshold{};
float modelNMSThreshold{};

std::vector<ParameterWrapper> params{
{"camera_frame", mCameraFrame, "zed_left_camera_frame"},
{"world_frame", mWorldFrame, "map"},
{"increment_weight", mObjIncrementWeight, 2},
{"decrement_weight", mObjDecrementWeight, 1},
{"hitcount_threshold", mObjHitThreshold, 5},
{"hitcount_max", mObjMaxHitcount, 10},
{"model_name", mModelName, "Large-Dataset"},
{"model_score_threshold", mModelScoreThreshold, 0.75},
{"model_nms_threshold", mModelNmsThreshold, 0.5}};
{"model_name", modelName, "Large-Dataset"},
{"model_score_threshold", modelScoreThreshold, 0.75},
{"model_nms_threshold", modelNMSThreshold, 0.5}};

ParameterWrapper::declareParameters(this, params);

// All of these variables will be invalidated after calling this function

std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover";

RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName);
RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName);

RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath);

mTensorRT = TensortRT{mModelName, packagePath.string()};
// Initialize TensorRT Inference Object and Get Important Output Information
mTensorRT = TensortRT{modelName, packagePath.string()};

mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output);

mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("object_detector/debug_img", 1);

RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold));
RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold));
}

StereoObjectDetector::StereoObjectDetector() {
Expand Down
41 changes: 34 additions & 7 deletions perception/object_detector/object_detector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,57 @@ namespace mrover {
class ObjectDetectorBase : public rclcpp::Node {

protected:
struct Model {
std::string modelName;

std::vector<int> objectHitCounts;

std::vector<std::string> classes;

std::vector<int64_t> inputTensorSize;

std::vector<int64_t> outputTensorSize;

// Additional space for params that one algorithm might need and not others
std::vector<float> buffer;

// Converts an rgb image to a blob
std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rbgImageToBlob;

// Converts an output tensor into a vector of detections
std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> outputTensorToDetections;

Model() = default;

Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::vector<float> _buffer, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
};

static constexpr char const* NODE_NAME = "object_detector";

std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
std::shared_ptr<tf2_ros::TransformBroadcaster> mTfBroadcaster = std::make_shared<tf2_ros::TransformBroadcaster>(this);
std::shared_ptr<tf2_ros::TransformListener> mTfListener = std::make_shared<tf2_ros::TransformListener>(*mTfBuffer);

Model mModel;

std::string mCameraFrame;
std::string mWorldFrame;

std::string mModelName;
cv::Mat mRgbImage, mImageBlob;

LoopProfiler mLoopProfiler;

TensortRT mTensorRT;

cv::Mat mRgbImage, mImageBlob;
sensor_msgs::msg::Image mDetectionsImageMessage;

rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mDebugImgPub;
rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub;

// TODO(quintin): Do not hard code exactly two classes
std::vector<int> mObjectHitCounts{0, 0};

int mObjIncrementWeight{};
int mObjDecrementWeight{};
int mObjHitThreshold{};
int mObjMaxHitcount{};
float mModelScoreThreshold{};
float mModelNmsThreshold{};

auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr,
std::size_t u, std::size_t v,
Expand All @@ -50,6 +71,12 @@ namespace mrover {

static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void;

auto static parseYOLOv8Output(Model const& model,
cv::Mat& output,
std::vector<Detection>& detections) -> void;

auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;

public:
explicit ObjectDetectorBase();

Expand Down
99 changes: 99 additions & 0 deletions perception/object_detector/object_detector.parsing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "object_detector.hpp"
namespace mrover {
auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
if (model.inputTensorSize.size() != 4) {
throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?");
}

if (model.buffer.size() != 2) {
throw std::runtime_error("Expected 2 additional parameters!");
}

static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0;

cv::Size blobSize{static_cast<int32_t>(model.inputTensorSize[2]), static_cast<int32_t>(model.inputTensorSize[3])};
cv::resize(rgbImage, blobSizedImage, blobSize);
cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false);
}

auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) -> void {
// Parse model specific dimensioning from the output

// The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows
if (output.cols <= output.rows) {
std::array<char, 75> message{};
snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows);
throw std::runtime_error(message.data());
}

int rows = output.cols;
int dimensions = output.rows;

// The output of the model is a batchSizex84x8400 matrix
// This converts the model to a TODO: Check this dimensioning
output = output.reshape(1, dimensions);
cv::transpose(output, output);

// This function expects the image to already be in the correct format thus no distrotion is needed
float const xFactor = 1.0;
float const yFactor = 1.0;

// Intermediate Storage Containers
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;

// Each row of the output is a detection with a bounding box and associated class scores
for (int r = 0; r < rows; ++r) {
// Skip first four values as they are the box data
cv::Mat scores = output.row(r).colRange(4, dimensions);

cv::Point classId;
double maxClassScore;
cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId);

// Determine if the class is an acceptable confidence level, otherwise disregard
if (maxClassScore <= model.buffer[0]) continue;

confidences.push_back(static_cast<float>(maxClassScore));
classIds.push_back(classId.x);

// Get the bounding box data
cv::Mat box = output.row(r).colRange(0, 4);
auto x = box.at<float>(0);
auto y = box.at<float>(1);
auto w = box.at<float>(2);
auto h = box.at<float>(3);

// Cast the corners into integers to be used on pixels
auto left = static_cast<int>((x - 0.5 * w) * xFactor);
auto top = static_cast<int>((y - 0.5 * h) * yFactor);
auto width = static_cast<int>(w * xFactor);
auto height = static_cast<int>(h * yFactor);

// Push abck the box into storage
boxes.emplace_back(left, top, width, height);
}

//Coalesce the boxes into a smaller number of distinct boxes
std::vector<int> nmsResult;
cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult);

// Fill in the output Detections Vector
for (int i: nmsResult) {
Detection result;

//Fill in the id and confidence for the class
result.classId = classIds[i];
result.confidence = confidences[i];

//Fill in the class name and box information
result.className = model.classes[result.classId];
result.box = boxes[i];

//Push back the detection into the for storagevector
detections.push_back(result);
}
}

}; // namespace mrover
23 changes: 15 additions & 8 deletions perception/object_detector/object_detector.processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,26 @@ namespace mrover {
RCLCPP_INFO_STREAM(get_logger(), std::format("Image size changed from [{}, {}] to [{}, {}]", mRgbImage.cols, mRgbImage.rows, msg->width, msg->height));
mRgbImage = cv::Mat{static_cast<int>(msg->height), static_cast<int>(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}};
}

// if 0x0 image
if (!mRgbImage.total()) {
return;
}

convertPointCloudToRGB(msg, mRgbImage);

// TODO(quintin): Avoid hard coding blob size
cv::Size blobSize{640, 640};
// Convert the RGB Image into the blob Image format
cv::Mat blobSizedImage;
cv::resize(mRgbImage, blobSizedImage, blobSize);
cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false);
mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);

mLoopProfiler.measureEvent("Conversion");

// Run the blob through the model
std::vector<Detection> detections{};
mTensorRT.modelForwardPass(mImageBlob, detections, mModelScoreThreshold, mModelNmsThreshold);
cv::Mat outputTensor;
mTensorRT.modelForwardPass(mImageBlob, outputTensor);

mModel.outputTensorToDetections(mModel, outputTensor, detections);

mLoopProfiler.measureEvent("Execution");

Expand Down Expand Up @@ -66,10 +73,10 @@ namespace mrover {
// Push the immediate detections to the camera frame
SE3Conversions::pushToTfTree(*mTfBroadcaster, objectImmediateFrame, mCameraFrame, objectInCamera.value(), get_clock()->now());
// Since the object is seen we need to increment the hit counter
mObjectHitCounts[classId] = std::min(mObjMaxHitcount, mObjectHitCounts[classId] + mObjIncrementWeight);
mModel.objectHitCounts[classId] = std::min(mObjMaxHitcount, mModel.objectHitCounts[classId] + mObjIncrementWeight);

// Only publish to permament if we are confident in the object
if (mObjectHitCounts[classId] > mObjHitThreshold) {
if (mModel.objectHitCounts[classId] > mObjHitThreshold) {
std::string objectPermanentFrame = className;
// Grab the object inside of the camera frame and push it into the map frame
SE3d objectInMap = SE3Conversions::fromTfTree(*mTfBuffer, objectImmediateFrame, mWorldFrame);
Expand All @@ -90,7 +97,7 @@ namespace mrover {
if (seenObjects[i]) continue;

assert(i < mObjectHitCounts.size());
mObjectHitCounts[i] = std::max(0, mObjectHitCounts[i] - mObjDecrementWeight);
mModel.objectHitCounts[i] = std::max(0, mModel.objectHitCounts[i] - mObjDecrementWeight);
}
}

Expand Down
1 change: 1 addition & 0 deletions perception/object_detector/pch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <filesystem>
#include <format>
#include <fstream>
#include <functional>
#include <iostream>
#include <limits>
#include <memory>
Expand Down
Loading