Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Long Range Object Detector #16

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ ament_target_dependencies(arm_controller rclcpp)

if (Gst_FOUND AND GstApp_FOUND)
mrover_add_node(usb_camera esw/usb_camera/*.cpp esw/usb_camera/pch.hpp)
target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs)
target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs parameter_utils)
ament_target_dependencies(usb_camera rclcpp)

if (LibUdev_FOUND)
Expand Down
2 changes: 2 additions & 0 deletions esw/usb_camera/pch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@
#include <sensor_msgs/image_encodings.hpp>
#include <sensor_msgs/msg/camera_info.hpp>
#include <sensor_msgs/msg/image.hpp>

#include <parameter.hpp>
45 changes: 20 additions & 25 deletions esw/usb_camera/usb_camera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,28 @@ namespace mrover {
UsbCamera::UsbCamera() : Node{"usb_camera", rclcpp::NodeOptions{}.use_intra_process_comms(true)} {
try {
/* Parameters */
declare_parameter("width", rclcpp::ParameterType::PARAMETER_INTEGER);
declare_parameter("height", rclcpp::ParameterType::PARAMETER_INTEGER);
declare_parameter("framerate", rclcpp::ParameterType::PARAMETER_INTEGER);
declare_parameter("device", rclcpp::ParameterType::PARAMETER_STRING, [] {
rcl_interfaces::msg::ParameterDescriptor d;
d.read_only = true;
d.description = "The device path to the USB camera, e.g. /dev/video0";
return d;
}());
declare_parameter("decode_jpeg_from_device", rclcpp::ParameterType::PARAMETER_BOOL, [] {
rcl_interfaces::msg::ParameterDescriptor d;
d.read_only = true;
d.description = "If true, decode JPEG directly from the USB device instead of YUY2. "
"This decreases image quality but increases performance and reduces USB bus load. "
"It also reduces the 'jelly' effect caused by large YUY2 formats.";
return d;
}());

mWidth = get_parameter("width").as_int();
mHeight = get_parameter("height").as_int();
std::int64_t framerate = get_parameter("framerate").as_int();
std::string device = get_parameter("device").as_string();
bool decodeJpegFromDevice = get_parameter("decode_jpeg_from_device").as_bool();
int framerate{};
std::string device{};
std::string imageTopicName{};
std::string cameraInfoTopicName{};
double watchdogTimeout{};
bool decodeJpegFromDevice{};

std::vector<ParameterWrapper> params{
{"width", mWidth, 640},
{"height", mHeight, 480},
{"framerate", framerate, 30},
{"device", device, "/dev/video2"},
{"image_topic", imageTopicName, "/usb_camera/image"},
{"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"},
{"watchdog_timeout", watchdogTimeout, 1.0},
{"decode_jpeg_from_device", decodeJpegFromDevice, false}};

ParameterWrapper::declareParameters(this, params);

/* Interfaces */
mImgPub = create_publisher<sensor_msgs::msg::Image>("image", 1);
mCamInfoPub = create_publisher<sensor_msgs::msg::CameraInfo>("camera_info", 1);
mImgPub = create_publisher<sensor_msgs::msg::Image>(imageTopicName, 1);
mCamInfoPub = create_publisher<sensor_msgs::msg::CameraInfo>(cameraInfoTopicName, 1);

/* Pipeline */
gst_init(nullptr, nullptr);
Expand Down
2 changes: 1 addition & 1 deletion esw/usb_camera/usb_camera.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace mrover {
rclcpp::Publisher<sensor_msgs::msg::CameraInfo>::SharedPtr mCamInfoPub;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mImgPub;

std::uint16_t mWidth{}, mHeight{};
int mWidth{}, mHeight{};

GstElement *mStreamSink{}, *mPipeline{};
GMainLoop* mMainLoop{};
Expand Down
54 changes: 45 additions & 9 deletions perception/object_detector/object_detector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
namespace mrover {

ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} {
std::string modelName;
float modelScoreThreshold{};
float modelNMSThreshold{};

std::vector<ParameterWrapper> params{
{"camera_frame", mCameraFrame, "zed_left_camera_frame"},
Expand All @@ -11,36 +14,69 @@ namespace mrover {
{"decrement_weight", mObjDecrementWeight, 1},
{"hitcount_threshold", mObjHitThreshold, 5},
{"hitcount_max", mObjMaxHitcount, 10},
{"model_name", mModelName, "Large-Dataset"},
{"model_score_threshold", mModelScoreThreshold, 0.75},
{"model_nms_threshold", mModelNmsThreshold, 0.5}};
{"model_name", modelName, "Large-Dataset"},
{"model_score_threshold", modelScoreThreshold, 0.75},
{"model_nms_threshold", modelNMSThreshold, 0.5}};

ParameterWrapper::declareParameters(this, params);

// All of these variables will be invalidated after calling this function

std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover";

RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName);
RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName);

RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath);

mTensorRT = TensortRT{mModelName, packagePath.string()};
// Initialize TensorRT Inference Object and Get Important Output Information
mTensorRT = TensortRT{modelName, packagePath.string()};

mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("object_detector/debug_img", 1);
mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output);

RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold));
RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold));
}

StereoObjectDetector::StereoObjectDetector() {
mSensorSub = create_subscription<sensor_msgs::msg::PointCloud2>("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) {
RCLCPP_INFO_STREAM(get_logger(), "Creating Stereo Object Detector...");

mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("/stereo_object_detector/debug_img", 1);

mSensorSub = create_subscription<sensor_msgs::msg::PointCloud2>("/zed/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) {
StereoObjectDetector::pointCloudCallback(msg);
});
}

ImageObjectDetector::ImageObjectDetector() {
RCLCPP_INFO_STREAM(get_logger(), "Creating Image Object Detector...");

std::vector<ParameterWrapper> params{
{"long_range_camera/fov", mCameraHorizontalFov, 80.0}};

ParameterWrapper::declareParameters(this, params);

mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("/long_range_object_detector/debug_img", 1);

mSensorSub = create_subscription<sensor_msgs::msg::Image>("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) {
ImageObjectDetector::imageCallback(msg);
});

mTargetsPub = create_publisher<mrover::msg::ImageTargets>("/long_range_camera/objects", 1);
}
} // namespace mrover


auto main(int argc, char** argv) -> int {
rclcpp::init(argc, argv);
rclcpp::spin(std::make_shared<mrover::StereoObjectDetector>());

// DO NOT REMOVE OR ELSE REF COUNT WILL GO TO ZERO
auto imgOD = std::make_shared<mrover::ImageObjectDetector>();
auto stereoOD = std::make_shared<mrover::StereoObjectDetector>();

rclcpp::executors::SingleThreadedExecutor executor;
executor.add_node(imgOD);
executor.add_node(stereoOD);
executor.spin();

rclcpp::shutdown();
return EXIT_SUCCESS;
}
61 changes: 53 additions & 8 deletions perception/object_detector/object_detector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,56 @@ namespace mrover {
class ObjectDetectorBase : public rclcpp::Node {

protected:
struct Model {
std::string modelName;

std::vector<int> objectHitCounts;

std::vector<std::string> classes;

std::vector<int64_t> inputTensorSize;

std::vector<int64_t> outputTensorSize;

// Additional space for params that one algorithm might need and not others
std::vector<float> buffer;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rethink the buffer, unspecified size is not ideal

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this was a bad feature making non static calls is def the move


// Converts an rgb image to a blob
std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rbgImageToBlob;

// Converts an output tensor into a vector of detections
std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> outputTensorToDetections;

Model() = default;

Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::vector<float> _buffer, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
alisonryckman marked this conversation as resolved.
Show resolved Hide resolved
};

static constexpr char const* NODE_NAME = "object_detector";

std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
std::shared_ptr<tf2_ros::TransformBroadcaster> mTfBroadcaster = std::make_shared<tf2_ros::TransformBroadcaster>(this);
std::shared_ptr<tf2_ros::TransformListener> mTfListener = std::make_shared<tf2_ros::TransformListener>(*mTfBuffer);

Model mModel;

std::string mCameraFrame;
std::string mWorldFrame;

std::string mModelName;
cv::Mat mRgbImage, mImageBlob;

LoopProfiler mLoopProfiler;

TensortRT mTensorRT;

cv::Mat mRgbImage, mImageBlob;
sensor_msgs::msg::Image mDetectionsImageMessage;

rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mDebugImgPub;
rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub;

// TODO(quintin): Do not hard code exactly two classes
std::vector<int> mObjectHitCounts{0, 0};

int mObjIncrementWeight{};
int mObjDecrementWeight{};
int mObjHitThreshold{};
int mObjMaxHitcount{};
float mModelScoreThreshold{};
float mModelNmsThreshold{};

auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr,
std::size_t u, std::size_t v,
Expand All @@ -50,18 +70,43 @@ namespace mrover {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image should be const ref

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I changed publishDetectedObjects to const ref, drawDetectionBoxes to just ref and rgbImage in preprocessYOLOv8Input to const ref, lmk if i missed any

static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void;

auto static parseYOLOv8Output(Model const& model,
cv::Mat& output,
std::vector<Detection>& detections) -> void;

auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;

public:
explicit ObjectDetectorBase();

~ObjectDetectorBase() override = default;
};

class StereoObjectDetector final : public ObjectDetectorBase {
private:
rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub;

public:
explicit StereoObjectDetector();

static auto convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void;

auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void;
};

class ImageObjectDetector final : public ObjectDetectorBase {
private:
rclcpp::Publisher<mrover::msg::ImageTargets>::SharedPtr mTargetsPub;

rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr mSensorSub;

float mCameraHorizontalFov{};

public:
explicit ImageObjectDetector();

auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image const ref

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adjusted


auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void;
};
} // namespace mrover
99 changes: 99 additions & 0 deletions perception/object_detector/object_detector.parsing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "object_detector.hpp"
namespace mrover {
auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
if (model.inputTensorSize.size() != 4) {
throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?");
}

if (model.buffer.size() != 2) {
throw std::runtime_error("Expected 2 additional parameters!");
}

static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0;

cv::Size blobSize{static_cast<int32_t>(model.inputTensorSize[2]), static_cast<int32_t>(model.inputTensorSize[3])};
cv::resize(rgbImage, blobSizedImage, blobSize);
cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false);
}

auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) -> void {
// Parse model specific dimensioning from the output

// The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows
if (output.cols <= output.rows) {
std::array<char, 75> message{};
snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows);
throw std::runtime_error(message.data());
}

int rows = output.cols;
int dimensions = output.rows;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you name this more verbosely

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed


// The output of the model is a batchSizex84x8400 matrix
// This converts the model to a TODO: Check this dimensioning
output = output.reshape(1, dimensions);
cv::transpose(output, output);

// This function expects the image to already be in the correct format thus no distrotion is needed
float const xFactor = 1.0;
float const yFactor = 1.0;

// Intermediate Storage Containers
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;

// Each row of the output is a detection with a bounding box and associated class scores
for (int r = 0; r < rows; ++r) {
// Skip first four values as they are the box data
cv::Mat scores = output.row(r).colRange(4, dimensions);

cv::Point classId;
double maxClassScore;
cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId);

// Determine if the class is an acceptable confidence level, otherwise disregard
if (maxClassScore <= model.buffer[0]) continue;

confidences.push_back(static_cast<float>(maxClassScore));
classIds.push_back(classId.x);

// Get the bounding box data
cv::Mat box = output.row(r).colRange(0, 4);
auto x = box.at<float>(0);
auto y = box.at<float>(1);
auto w = box.at<float>(2);
auto h = box.at<float>(3);

// Cast the corners into integers to be used on pixels
auto left = static_cast<int>((x - 0.5 * w) * xFactor);
auto top = static_cast<int>((y - 0.5 * h) * yFactor);
auto width = static_cast<int>(w * xFactor);
auto height = static_cast<int>(h * yFactor);

// Push abck the box into storage
boxes.emplace_back(left, top, width, height);
}

//Coalesce the boxes into a smaller number of distinct boxes
std::vector<int> nmsResult;
cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult);

// Fill in the output Detections Vector
for (int i: nmsResult) {
Detection result;

//Fill in the id and confidence for the class
result.classId = classIds[i];
result.confidence = confidences[i];

//Fill in the class name and box information
result.className = model.classes[result.classId];
result.box = boxes[i];

//Push back the detection into the for storagevector
detections.push_back(result);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emplace_back

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
Comment on lines +82 to +96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be made more concise?

}

}; // namespace mrover
Loading