umrover · jbrhm · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -284,7 +284,7 @@ ament_target_dependencies(arm_controller rclcpp)
 
 if (Gst_FOUND AND GstApp_FOUND)
     mrover_add_node(usb_camera esw/usb_camera/*.cpp esw/usb_camera/pch.hpp)
-    target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs)
+    target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs parameter_utils)
     ament_target_dependencies(usb_camera rclcpp)
 
     if (LibUdev_FOUND)

diff --git a/esw/usb_camera/pch.hpp b/esw/usb_camera/pch.hpp
@@ -28,3 +28,5 @@
 #include <sensor_msgs/image_encodings.hpp>
 #include <sensor_msgs/msg/camera_info.hpp>
 #include <sensor_msgs/msg/image.hpp>
+
+#include <parameter.hpp>
diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp
@@ -13,33 +13,28 @@ namespace mrover {
     UsbCamera::UsbCamera() : Node{"usb_camera", rclcpp::NodeOptions{}.use_intra_process_comms(true)} {
         try {
             /* Parameters */
-            declare_parameter("width", rclcpp::ParameterType::PARAMETER_INTEGER);
-            declare_parameter("height", rclcpp::ParameterType::PARAMETER_INTEGER);
-            declare_parameter("framerate", rclcpp::ParameterType::PARAMETER_INTEGER);
-            declare_parameter("device", rclcpp::ParameterType::PARAMETER_STRING, [] {
-                rcl_interfaces::msg::ParameterDescriptor d;
-                d.read_only = true;
-                d.description = "The device path to the USB camera, e.g. /dev/video0";
-                return d;
-            }());
-            declare_parameter("decode_jpeg_from_device", rclcpp::ParameterType::PARAMETER_BOOL, [] {
-                rcl_interfaces::msg::ParameterDescriptor d;
-                d.read_only = true;
-                d.description = "If true, decode JPEG directly from the USB device instead of YUY2. "
-                                "This decreases image quality but increases performance and reduces USB bus load. "
-                                "It also reduces the 'jelly' effect caused by large YUY2 formats.";
-                return d;
-            }());
-
-            mWidth = get_parameter("width").as_int();
-            mHeight = get_parameter("height").as_int();
-            std::int64_t framerate = get_parameter("framerate").as_int();
-            std::string device = get_parameter("device").as_string();
-            bool decodeJpegFromDevice = get_parameter("decode_jpeg_from_device").as_bool();
+            int framerate{};
+            std::string device{};
+            std::string imageTopicName{};
+            std::string cameraInfoTopicName{};
+            double watchdogTimeout{};
+            bool decodeJpegFromDevice{};
+
+            std::vector<ParameterWrapper> params{
+                    {"width", mWidth, 640},
+                    {"height", mHeight, 480},
+                    {"framerate", framerate, 30},
+                    {"device", device, "/dev/video2"},
+                    {"image_topic", imageTopicName, "/usb_camera/image"},
+                    {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"},
+                    {"watchdog_timeout", watchdogTimeout, 1.0},
+                    {"decode_jpeg_from_device", decodeJpegFromDevice, false}};
+
+            ParameterWrapper::declareParameters(this, params);
 
             /* Interfaces */
-            mImgPub = create_publisher<sensor_msgs::msg::Image>("image", 1);
-            mCamInfoPub = create_publisher<sensor_msgs::msg::CameraInfo>("camera_info", 1);
+            mImgPub = create_publisher<sensor_msgs::msg::Image>(imageTopicName, 1);
+            mCamInfoPub = create_publisher<sensor_msgs::msg::CameraInfo>(cameraInfoTopicName, 1);
 
             /* Pipeline */
             gst_init(nullptr, nullptr);

diff --git a/esw/usb_camera/usb_camera.hpp b/esw/usb_camera/usb_camera.hpp
@@ -9,7 +9,7 @@ namespace mrover {
         rclcpp::Publisher<sensor_msgs::msg::CameraInfo>::SharedPtr mCamInfoPub;
         rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mImgPub;
 
-        std::uint16_t mWidth{}, mHeight{};
+        int mWidth{}, mHeight{};
 
         GstElement *mStreamSink{}, *mPipeline{};
         GMainLoop* mMainLoop{};

diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp
@@ -3,6 +3,9 @@
 namespace mrover {
 
     ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} {
+        std::string modelName;
+        float modelScoreThreshold{};
+        float modelNMSThreshold{};
 
         std::vector<ParameterWrapper> params{
                 {"camera_frame", mCameraFrame, "zed_left_camera_frame"},
@@ -11,36 +14,69 @@ namespace mrover {
                 {"decrement_weight", mObjDecrementWeight, 1},
                 {"hitcount_threshold", mObjHitThreshold, 5},
                 {"hitcount_max", mObjMaxHitcount, 10},
-                {"model_name", mModelName, "Large-Dataset"},
-                {"model_score_threshold", mModelScoreThreshold, 0.75},
-                {"model_nms_threshold", mModelNmsThreshold, 0.5}};
+                {"model_name", modelName, "Large-Dataset"},
+                {"model_score_threshold", modelScoreThreshold, 0.75},
+                {"model_nms_threshold", modelNMSThreshold, 0.5}};
 
         ParameterWrapper::declareParameters(this, params);
 
+        // All of these variables will be invalidated after calling this function
+
         std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover";
 
-        RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName);
+        RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName);
 
         RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath);
 
-        mTensorRT = TensortRT{mModelName, packagePath.string()};
+        // Initialize TensorRT Inference Object and Get Important Output Information
+        mTensorRT = TensortRT{modelName, packagePath.string()};
 
-        mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("object_detector/debug_img", 1);
+        mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output);
 
-        RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold));
+        RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold));
     }
 
     StereoObjectDetector::StereoObjectDetector() {
-        mSensorSub = create_subscription<sensor_msgs::msg::PointCloud2>("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) {
+        RCLCPP_INFO_STREAM(get_logger(), "Creating Stereo Object Detector...");
+
+        mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("/stereo_object_detector/debug_img", 1);
+
+        mSensorSub = create_subscription<sensor_msgs::msg::PointCloud2>("/zed/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) {
             StereoObjectDetector::pointCloudCallback(msg);
         });
     }
+
+    ImageObjectDetector::ImageObjectDetector() {
+        RCLCPP_INFO_STREAM(get_logger(), "Creating Image Object Detector...");
+
+        std::vector<ParameterWrapper> params{
+                {"long_range_camera/fov", mCameraHorizontalFov, 80.0}};
+
+        ParameterWrapper::declareParameters(this, params);
+
+        mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("/long_range_object_detector/debug_img", 1);
+
+        mSensorSub = create_subscription<sensor_msgs::msg::Image>("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) {
+            ImageObjectDetector::imageCallback(msg);
+        });
+
+        mTargetsPub = create_publisher<mrover::msg::ImageTargets>("/long_range_camera/objects", 1);
+    }
 } // namespace mrover
 
 
 auto main(int argc, char** argv) -> int {
     rclcpp::init(argc, argv);
-    rclcpp::spin(std::make_shared<mrover::StereoObjectDetector>());
+
+    // DO NOT REMOVE OR ELSE REF COUNT WILL GO TO ZERO
+    auto imgOD = std::make_shared<mrover::ImageObjectDetector>();
+    auto stereoOD = std::make_shared<mrover::StereoObjectDetector>();
+
+    rclcpp::executors::SingleThreadedExecutor executor;
+    executor.add_node(imgOD);
+    executor.add_node(stereoOD);
+    executor.spin();
+
     rclcpp::shutdown();
     return EXIT_SUCCESS;
 }
diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp
@@ -7,36 +7,56 @@ namespace mrover {
     class ObjectDetectorBase : public rclcpp::Node {
 
     protected:
+        struct Model {
+            std::string modelName;
+
+            std::vector<int> objectHitCounts;
+
+            std::vector<std::string> classes;
+
+            std::vector<int64_t> inputTensorSize;
+
+            std::vector<int64_t> outputTensorSize;
+
+            // Additional space for params that one algorithm might need and not others
+            std::vector<float> buffer;
+
+            // Converts an rgb image to a blob
+            std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rbgImageToBlob;
+
+            // Converts an output tensor into a vector of detections
+            std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> outputTensorToDetections;
+
+            Model() = default;
+
+            Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::vector<float> _buffer, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
+        };
+
         static constexpr char const* NODE_NAME = "object_detector";
 
         std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
         std::shared_ptr<tf2_ros::TransformBroadcaster> mTfBroadcaster = std::make_shared<tf2_ros::TransformBroadcaster>(this);
         std::shared_ptr<tf2_ros::TransformListener> mTfListener = std::make_shared<tf2_ros::TransformListener>(*mTfBuffer);
 
+        Model mModel;
+
         std::string mCameraFrame;
         std::string mWorldFrame;
 
-        std::string mModelName;
+        cv::Mat mRgbImage, mImageBlob;
 
         LoopProfiler mLoopProfiler;
 
         TensortRT mTensorRT;
 
-        cv::Mat mRgbImage, mImageBlob;
         sensor_msgs::msg::Image mDetectionsImageMessage;
 
         rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mDebugImgPub;
-        rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub;
-
-        // TODO(quintin): Do not hard code exactly two classes
-        std::vector<int> mObjectHitCounts{0, 0};
 
         int mObjIncrementWeight{};
         int mObjDecrementWeight{};
         int mObjHitThreshold{};
         int mObjMaxHitcount{};
-        float mModelScoreThreshold{};
-        float mModelNmsThreshold{};
 
         auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr,
                                        std::size_t u, std::size_t v,
@@ -50,18 +70,43 @@ namespace mrover {
 
         static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void;
 
+        auto static parseYOLOv8Output(Model const& model,
+                                      cv::Mat& output,
+                                      std::vector<Detection>& detections) -> void;
+
+        auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;
+
     public:
         explicit ObjectDetectorBase();
 
         ~ObjectDetectorBase() override = default;
     };
 
     class StereoObjectDetector final : public ObjectDetectorBase {
+    private:
+        rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub;
+
     public:
         explicit StereoObjectDetector();
 
         static auto convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void;
 
         auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void;
     };
+
+    class ImageObjectDetector final : public ObjectDetectorBase {
+    private:
+        rclcpp::Publisher<mrover::msg::ImageTargets>::SharedPtr mTargetsPub;
+
+        rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr mSensorSub;
+
+        float mCameraHorizontalFov{};
+
+    public:
+        explicit ImageObjectDetector();
+
+        auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float;
+
+        auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void;
+    };
 } // namespace mrover
diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp
@@ -0,0 +1,99 @@
+#include "object_detector.hpp"
+namespace mrover {
+    auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
+        if (model.inputTensorSize.size() != 4) {
+            throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?");
+        }
+
+        if (model.buffer.size() != 2) {
+            throw std::runtime_error("Expected 2 additional parameters!");
+        }
+
+        static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0;
+
+        cv::Size blobSize{static_cast<int32_t>(model.inputTensorSize[2]), static_cast<int32_t>(model.inputTensorSize[3])};
+        cv::resize(rgbImage, blobSizedImage, blobSize);
+        cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false);
+    }
+
+    auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) -> void {
+        // Parse model specific dimensioning from the output
+
+        // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows
+        if (output.cols <= output.rows) {
+            std::array<char, 75> message{};
+            snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows);
+            throw std::runtime_error(message.data());
+        }
+
+        int rows = output.cols;
+        int dimensions = output.rows;
+
+        // The output of the model is a batchSizex84x8400 matrix
+        // This converts the model to a TODO: Check this dimensioning
+        output = output.reshape(1, dimensions);
+        cv::transpose(output, output);
+
+        // This function expects the image to already be in the correct format thus no distrotion is needed
+        float const xFactor = 1.0;
+        float const yFactor = 1.0;
+
+        // Intermediate Storage Containers
+        std::vector<int> classIds;
+        std::vector<float> confidences;
+        std::vector<cv::Rect> boxes;
+
+        // Each row of the output is a detection with a bounding box and associated class scores
+        for (int r = 0; r < rows; ++r) {
+            // Skip first four values as they are the box data
+            cv::Mat scores = output.row(r).colRange(4, dimensions);
+
+            cv::Point classId;
+            double maxClassScore;
+            cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId);
+
+            // Determine if the class is an acceptable confidence level, otherwise disregard
+            if (maxClassScore <= model.buffer[0]) continue;
+
+            confidences.push_back(static_cast<float>(maxClassScore));
+            classIds.push_back(classId.x);
+
+            // Get the bounding box data
+            cv::Mat box = output.row(r).colRange(0, 4);
+            auto x = box.at<float>(0);
+            auto y = box.at<float>(1);
+            auto w = box.at<float>(2);
+            auto h = box.at<float>(3);
+
+            // Cast the corners into integers to be used on pixels
+            auto left = static_cast<int>((x - 0.5 * w) * xFactor);
+            auto top = static_cast<int>((y - 0.5 * h) * yFactor);
+            auto width = static_cast<int>(w * xFactor);
+            auto height = static_cast<int>(h * yFactor);
+
+            // Push abck the box into storage
+            boxes.emplace_back(left, top, width, height);
+        }
+
+        //Coalesce the boxes into a smaller number of distinct boxes
+        std::vector<int> nmsResult;
+        cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult);
+
+        // Fill in the output Detections Vector
+        for (int i: nmsResult) {
+            Detection result;
+
+            //Fill in the id and confidence for the class
+            result.classId = classIds[i];
+            result.confidence = confidences[i];
+
+            //Fill in the class name and box information
+            result.className = model.classes[result.classId];
+            result.box = boxes[i];
+
+            //Push back the detection into the for storagevector
+            detections.push_back(result);
+        }
+    }
+
+}; // namespace mrover