-
-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Long Range Object Detector #16
base: main
Are you sure you want to change the base?
Changes from all commits
c7e7a48
9eab3df
071bce0
1f7bcd8
5024098
32b6b59
35525b4
2cb0941
317645d
dc40933
816bed3
98283f0
5835ccb
b7be39c
abf847f
5d86825
8bc5121
0546020
8260544
703a82a
0886e3b
5f58d94
ba2da04
2bb7799
4981f14
9b3d105
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,36 +7,56 @@ namespace mrover { | |
class ObjectDetectorBase : public rclcpp::Node { | ||
|
||
protected: | ||
struct Model { | ||
std::string modelName; | ||
|
||
std::vector<int> objectHitCounts; | ||
|
||
std::vector<std::string> classes; | ||
|
||
std::vector<int64_t> inputTensorSize; | ||
|
||
std::vector<int64_t> outputTensorSize; | ||
|
||
// Additional space for params that one algorithm might need and not others | ||
std::vector<float> buffer; | ||
|
||
// Converts an rgb image to a blob | ||
std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rbgImageToBlob; | ||
|
||
// Converts an output tensor into a vector of detections | ||
std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> outputTensorToDetections; | ||
|
||
Model() = default; | ||
|
||
Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::vector<float> _buffer, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} | ||
alisonryckman marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
|
||
static constexpr char const* NODE_NAME = "object_detector"; | ||
|
||
std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock()); | ||
std::shared_ptr<tf2_ros::TransformBroadcaster> mTfBroadcaster = std::make_shared<tf2_ros::TransformBroadcaster>(this); | ||
std::shared_ptr<tf2_ros::TransformListener> mTfListener = std::make_shared<tf2_ros::TransformListener>(*mTfBuffer); | ||
|
||
Model mModel; | ||
|
||
std::string mCameraFrame; | ||
std::string mWorldFrame; | ||
|
||
std::string mModelName; | ||
cv::Mat mRgbImage, mImageBlob; | ||
|
||
LoopProfiler mLoopProfiler; | ||
|
||
TensortRT mTensorRT; | ||
|
||
cv::Mat mRgbImage, mImageBlob; | ||
sensor_msgs::msg::Image mDetectionsImageMessage; | ||
|
||
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mDebugImgPub; | ||
rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub; | ||
|
||
// TODO(quintin): Do not hard code exactly two classes | ||
std::vector<int> mObjectHitCounts{0, 0}; | ||
|
||
int mObjIncrementWeight{}; | ||
int mObjDecrementWeight{}; | ||
int mObjHitThreshold{}; | ||
int mObjMaxHitcount{}; | ||
float mModelScoreThreshold{}; | ||
float mModelNmsThreshold{}; | ||
|
||
auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, | ||
std::size_t u, std::size_t v, | ||
|
@@ -50,18 +70,43 @@ namespace mrover { | |
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. image should be const ref There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I changed publishDetectedObjects to const ref, drawDetectionBoxes to just ref and rgbImage in preprocessYOLOv8Input to const ref, lmk if i missed any |
||
static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void; | ||
|
||
auto static parseYOLOv8Output(Model const& model, | ||
cv::Mat& output, | ||
std::vector<Detection>& detections) -> void; | ||
|
||
auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; | ||
|
||
public: | ||
explicit ObjectDetectorBase(); | ||
|
||
~ObjectDetectorBase() override = default; | ||
}; | ||
|
||
class StereoObjectDetector final : public ObjectDetectorBase { | ||
private: | ||
rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub; | ||
|
||
public: | ||
explicit StereoObjectDetector(); | ||
|
||
static auto convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void; | ||
|
||
auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void; | ||
}; | ||
|
||
class ImageObjectDetector final : public ObjectDetectorBase { | ||
private: | ||
rclcpp::Publisher<mrover::msg::ImageTargets>::SharedPtr mTargetsPub; | ||
|
||
rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr mSensorSub; | ||
|
||
float mCameraHorizontalFov{}; | ||
|
||
public: | ||
explicit ImageObjectDetector(); | ||
|
||
auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. image const ref There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. adjusted |
||
|
||
auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void; | ||
}; | ||
} // namespace mrover |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#include "object_detector.hpp" | ||
namespace mrover { | ||
auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { | ||
if (model.inputTensorSize.size() != 4) { | ||
throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); | ||
} | ||
|
||
if (model.buffer.size() != 2) { | ||
throw std::runtime_error("Expected 2 additional parameters!"); | ||
} | ||
|
||
static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; | ||
|
||
cv::Size blobSize{static_cast<int32_t>(model.inputTensorSize[2]), static_cast<int32_t>(model.inputTensorSize[3])}; | ||
cv::resize(rgbImage, blobSizedImage, blobSize); | ||
cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); | ||
} | ||
|
||
auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) -> void { | ||
// Parse model specific dimensioning from the output | ||
|
||
// The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows | ||
if (output.cols <= output.rows) { | ||
std::array<char, 75> message{}; | ||
snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); | ||
throw std::runtime_error(message.data()); | ||
} | ||
|
||
int rows = output.cols; | ||
int dimensions = output.rows; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you name this more verbosely There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed |
||
|
||
// The output of the model is a batchSizex84x8400 matrix | ||
// This converts the model to a TODO: Check this dimensioning | ||
output = output.reshape(1, dimensions); | ||
cv::transpose(output, output); | ||
|
||
// This function expects the image to already be in the correct format thus no distrotion is needed | ||
float const xFactor = 1.0; | ||
float const yFactor = 1.0; | ||
|
||
// Intermediate Storage Containers | ||
std::vector<int> classIds; | ||
std::vector<float> confidences; | ||
std::vector<cv::Rect> boxes; | ||
|
||
// Each row of the output is a detection with a bounding box and associated class scores | ||
for (int r = 0; r < rows; ++r) { | ||
// Skip first four values as they are the box data | ||
cv::Mat scores = output.row(r).colRange(4, dimensions); | ||
|
||
cv::Point classId; | ||
double maxClassScore; | ||
cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); | ||
|
||
// Determine if the class is an acceptable confidence level, otherwise disregard | ||
if (maxClassScore <= model.buffer[0]) continue; | ||
|
||
confidences.push_back(static_cast<float>(maxClassScore)); | ||
classIds.push_back(classId.x); | ||
|
||
// Get the bounding box data | ||
cv::Mat box = output.row(r).colRange(0, 4); | ||
auto x = box.at<float>(0); | ||
auto y = box.at<float>(1); | ||
auto w = box.at<float>(2); | ||
auto h = box.at<float>(3); | ||
|
||
// Cast the corners into integers to be used on pixels | ||
auto left = static_cast<int>((x - 0.5 * w) * xFactor); | ||
auto top = static_cast<int>((y - 0.5 * h) * yFactor); | ||
auto width = static_cast<int>(w * xFactor); | ||
auto height = static_cast<int>(h * yFactor); | ||
|
||
// Push abck the box into storage | ||
boxes.emplace_back(left, top, width, height); | ||
} | ||
|
||
//Coalesce the boxes into a smaller number of distinct boxes | ||
std::vector<int> nmsResult; | ||
cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult); | ||
|
||
// Fill in the output Detections Vector | ||
for (int i: nmsResult) { | ||
Detection result; | ||
|
||
//Fill in the id and confidence for the class | ||
result.classId = classIds[i]; | ||
result.confidence = confidences[i]; | ||
|
||
//Fill in the class name and box information | ||
result.className = model.classes[result.classId]; | ||
result.box = boxes[i]; | ||
|
||
//Push back the detection into the for storagevector | ||
detections.push_back(result); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. emplace_back There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
} | ||
Comment on lines
+82
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be made more concise? |
||
} | ||
|
||
}; // namespace mrover |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rethink the buffer, unspecified size is not ideal
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this was a bad feature making non static calls is def the move