From 8bff333b91945b3a5eced31bf5a7f5235794fb80 Mon Sep 17 00:00:00 2001
From: jbrhm <jrabraham1904@gmail.com>
Date: Sun, 1 Dec 2024 23:29:45 -0500
Subject: [PATCH] intermediate review

---
 .../object_detector/object_detector.cpp       |  13 +-
 .../object_detector/object_detector.hpp       |  35 ++--
 .../object_detector.parsing.cpp               |  36 ++--
 .../object_detector.processing.cpp            |  10 +-
 scripts/visualizer.py                         | 161 ------------------
 tensorrt/inference.cu                         |   3 +-
 tensorrt/tensorrt.hpp                         |   7 -
 7 files changed, 46 insertions(+), 219 deletions(-)
 delete mode 100755 scripts/visualizer.py
diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp
index 1da154c6..7f53f883 100644
--- a/perception/object_detector/object_detector.cpp
+++ b/perception/object_detector/object_detector.cpp
@@ -1,11 +1,10 @@
 #include "object_detector.hpp"
+#include <functional>
 
 namespace mrover {
 
     ObjectDetectorBase::ObjectDetectorBase(rclcpp::NodeOptions const& options) : rclcpp::Node(NODE_NAME, options), mLoopProfiler{get_logger()} {
         std::string modelName;
-        float modelScoreThreshold{};
-        float modelNMSThreshold{};
 
         std::vector<ParameterWrapper> params{
                 {"camera_frame", mCameraFrame, "zed_left_camera_frame"},
@@ -15,8 +14,8 @@ namespace mrover {
                 {"hitcount_threshold", mObjHitThreshold, 5},
                 {"hitcount_max", mObjMaxHitcount, 10},
                 {"model_name", modelName, "Large-Dataset"},
-                {"model_score_threshold", modelScoreThreshold, 0.75},
-                {"model_nms_threshold", modelNMSThreshold, 0.5}};
+                {"model_score_threshold", mModelScoreThreshold, 0.75},
+                {"model_nms_threshold", mModelNMSThreshold, 0.5}};
 
         ParameterWrapper::declareParameters(this, params);
 
@@ -31,9 +30,11 @@ namespace mrover {
         // Initialize TensorRT Inference Object and Get Important Output Information
         mTensorRT = TensortRT{modelName, packagePath.string()};
 
-        mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output);
+        using namespace std::placeholders;
 
-        RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold));
+        mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), [this](Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob){preprocessYOLOv8Input(model, rgbImage, blobSizedImage, blob);}, [this](Model const& model, cv::Mat& output, std::vector<Detection>& detections){parseYOLOv8Output(model, output, detections);});
+
+        RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, mModelScoreThreshold, mModelNMSThreshold));
     }
 
     StereoObjectDetector::StereoObjectDetector(rclcpp::NodeOptions const& options) : ObjectDetectorBase(options) {
diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp
index ae0beda3..e8e92f63 100644
--- a/perception/object_detector/object_detector.hpp
+++ b/perception/object_detector/object_detector.hpp
@@ -5,8 +5,16 @@
 namespace mrover {
 
     class ObjectDetectorBase : public rclcpp::Node {
-
     protected:
+        struct Detection {
+            int classId{};
+            std::string className;
+            float confidence{};
+            cv::Rect box;
+
+            Detection(int _classId, std::string _className, float _confidence, cv::Rect _box) : classId{_classId}, className{_className}, confidence{_confidence}, box{_box}{}
+        };
+
         struct Model {
             std::string modelName;
 
@@ -18,20 +26,19 @@ namespace mrover {
 
             std::vector<int64_t> outputTensorSize;
 
-            // Additional space for params that one algorithm might need and not others
-            std::vector<float> buffer;
-
             // Converts an rgb image to a blob
-            std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rbgImageToBlob;
+            std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> rgbImageToBlob;
 
             // Converts an output tensor into a vector of detections
             std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> outputTensorToDetections;
 
             Model() = default;
 
-            Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::vector<float> _buffer, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
+            Model(std::string _modelName, std::vector<int> _objectHitCounts, std::vector<std::string> _classes, std::vector<int64_t> _inputTensorSize, std::vector<int64_t> _outputTensorSize, std::function<void(Model const&, cv::Mat&, cv::Mat&, cv::Mat&)> _rbgImageToBlob, std::function<void(Model const&, cv::Mat&, std::vector<Detection>&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), rgbImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {}
         };
 
+        
+
         static constexpr char const* NODE_NAME = "object_detector";
 
         std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
@@ -57,6 +64,8 @@ namespace mrover {
         int mObjDecrementWeight{};
         int mObjHitThreshold{};
         int mObjMaxHitcount{};
+        float mModelScoreThreshold{};
+        float mModelNMSThreshold{};
 
         auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr,
                                        std::size_t u, std::size_t v,
@@ -66,15 +75,15 @@ namespace mrover {
                               std::span<Detection const> detections,
                               cv::Size const& imageSize = {640, 640}) -> void;
 
-        auto publishDetectedObjects(cv::InputArray image) -> void;
-
-        static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void;
+        auto publishDetectedObjects(cv::InputArray const& image) -> void;
 
-        auto static parseYOLOv8Output(Model const& model,
+        static auto drawDetectionBoxes(cv::InputOutputArray& image, std::span<Detection const> detections) -> void;
+        
+        auto parseYOLOv8Output(Model const& model,
                                       cv::Mat& output,
-                                      std::vector<Detection>& detections) -> void;
+                                      std::vector<Detection>& detections) const -> void;
 
-        auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;
+        static auto preprocessYOLOv8Input(Model const& model, cv::Mat const& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void;
 
     public:
         explicit ObjectDetectorBase(rclcpp::NodeOptions const& options = rclcpp::NodeOptions());
@@ -105,7 +114,7 @@ namespace mrover {
     public:
         explicit ImageObjectDetector(rclcpp::NodeOptions const& options = rclcpp::NodeOptions());
 
-        auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float;
+        auto getTagBearing(cv::InputArray const& image, cv::Rect const& box) const -> float;
 
         auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void;
     };
diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp
index dc7d7167..0aa12974 100644
--- a/perception/object_detector/object_detector.parsing.cpp
+++ b/perception/object_detector/object_detector.parsing.cpp
@@ -1,14 +1,10 @@
 #include "object_detector.hpp"
 namespace mrover {
-    auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
+    auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat const& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void {
         if (model.inputTensorSize.size() != 4) {
             throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?");
         }
 
-        if (model.buffer.size() != 2) {
-            throw std::runtime_error("Expected 2 additional parameters!");
-        }
-
         static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0;
 
         cv::Size blobSize{static_cast<int32_t>(model.inputTensorSize[2]), static_cast<int32_t>(model.inputTensorSize[3])};
@@ -16,7 +12,7 @@ namespace mrover {
         cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false);
     }
 
-    auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) -> void {
+    auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector<Detection>& detections) const -> void {
         // Parse model specific dimensioning from the output
 
         // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows
@@ -26,12 +22,12 @@ namespace mrover {
             throw std::runtime_error(message.data());
         }
 
-        int rows = output.cols;
-        int dimensions = output.rows;
+        int numCandidates = output.cols;
+        int predictionDimension = output.rows;
 
         // The output of the model is a batchSizex84x8400 matrix
         // This converts the model to a TODO: Check this dimensioning
-        output = output.reshape(1, dimensions);
+        output = output.reshape(1, predictionDimension);
         cv::transpose(output, output);
 
         // This function expects the image to already be in the correct format thus no distrotion is needed
@@ -44,16 +40,16 @@ namespace mrover {
         std::vector<cv::Rect> boxes;
 
         // Each row of the output is a detection with a bounding box and associated class scores
-        for (int r = 0; r < rows; ++r) {
+        for (int r = 0; r < numCandidates; ++r) {
             // Skip first four values as they are the box data
-            cv::Mat scores = output.row(r).colRange(4, dimensions);
+            cv::Mat scores = output.row(r).colRange(4, predictionDimension);
 
             cv::Point classId;
             double maxClassScore;
             cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId);
 
             // Determine if the class is an acceptable confidence level, otherwise disregard
-            if (maxClassScore <= model.buffer[0]) continue;
+            if (maxClassScore <= mModelScoreThreshold) continue;
 
             confidences.push_back(static_cast<float>(maxClassScore));
             classIds.push_back(classId.x);
@@ -77,22 +73,12 @@ namespace mrover {
 
         //Coalesce the boxes into a smaller number of distinct boxes
         std::vector<int> nmsResult;
-        cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult);
+        cv::dnn::NMSBoxes(boxes, confidences, mModelScoreThreshold, mModelNMSThreshold, nmsResult);
 
         // Fill in the output Detections Vector
         for (int i: nmsResult) {
-            Detection result;
-
-            //Fill in the id and confidence for the class
-            result.classId = classIds[i];
-            result.confidence = confidences[i];
-
-            //Fill in the class name and box information
-            result.className = model.classes[result.classId];
-            result.box = boxes[i];
-
-            //Push back the detection into the for storagevector
-            detections.push_back(result);
+            //Push back the detection into the for storage vector
+            detections.emplace_back(classIds[i], model.classes[classIds[i]], confidences[i], boxes[i]);
         }
     }
 
diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp
index 9ef29662..4034d245 100644
--- a/perception/object_detector/object_detector.processing.cpp
+++ b/perception/object_detector/object_detector.processing.cpp
@@ -24,7 +24,7 @@ namespace mrover {
 
         // Convert the RGB Image into the blob Image format
         cv::Mat blobSizedImage;
-        mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);
+        mModel.rgbImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);
 
         mLoopProfiler.measureEvent("Conversion");
 
@@ -151,7 +151,7 @@ namespace mrover {
         }
     }
 
-    auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray image) -> void {
+    auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray const& image) -> void {
         mDetectionsImageMessage.header.stamp = get_clock()->now();
         mDetectionsImageMessage.height = image.rows();
         mDetectionsImageMessage.width = image.cols();
@@ -189,7 +189,7 @@ namespace mrover {
 
         // Convert the RGB Image into the blob Image format
         cv::Mat blobSizedImage;
-        mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);
+        mModel.rgbImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob);
 
         mLoopProfiler.measureEvent("Conversion");
 
@@ -207,7 +207,7 @@ namespace mrover {
             mrover::msg::ImageTarget target;
             target.name = className;
             target.bearing = getTagBearing(blobSizedImage, box);
-            targets.targets.push_back(target);
+            targets.targets.emplace_back(target);
         }
 
         mTargetsPub->publish(targets);
@@ -218,7 +218,7 @@ namespace mrover {
         mLoopProfiler.measureEvent("Publication");
     }
 
-    auto ImageObjectDetector::getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float {
+    auto ImageObjectDetector::getTagBearing(cv::InputArray const& image, cv::Rect const& box) const -> float {
         cv::Point2f center = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2;
         float xNormalized = center.x / static_cast<float>(image.cols());
         float xRecentered = 0.5f - xNormalized;
diff --git a/scripts/visualizer.py b/scripts/visualizer.py
deleted file mode 100755
index 336f9e67..00000000
--- a/scripts/visualizer.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-import signal
-import graphviz  # type: ignore
-import time
-from PyQt5.QtWidgets import *  # type: ignore
-from PyQt5.QtCore import *  # type: ignore
-from PyQt5.QtGui import QPainter  # type: ignore
-from PyQt5.QtSvg import QSvgRenderer  # type:ignore
-
-import rclpy
-import rclpy.time
-import rclpy.logging
-from rclpy.node import Node
-from rclpy.executors import ExternalShutdownException
-import sys
-from mrover.msg import StateMachineStructure, StateMachineStateUpdate
-from threading import Lock
-from dataclasses import dataclass
-from typing import Optional, List, Dict
-import threading
-
-
-STRUCTURE_TOPIC = "nav_structure"
-STATUS_TOPIC = "nav_state"
-
-
-@dataclass
-class State:
-    name: str
-    children: List[State]
-
-
-class StateMachine:
-    def __init__(self):
-        self.states: Dict[str, State] = {}
-        self.structure: Optional[StateMachineStructure] = None
-        self.mutex: Lock = Lock()
-        self.cur_active: str = ""
-        self.previous_state: str = ""
-        self.needs_redraw: bool = True
-
-    def set_active_state(self, active_state):
-        """
-        sets the state specified to be active (thread safe)
-        """
-        with self.mutex:
-            if active_state != self.cur_active and active_state in self.states:
-                self.previous_state = self.cur_active
-                self.cur_active = active_state
-                self.needs_redraw = True
-                now = rclpy.time.Time()
-                rclpy.logging.get_logger("Visualizer").info(
-                    f"Current time: {now} Previous state: {self.previous_state} Current State: { self.cur_active}"
-                )
-
-    def _rebuild(self, structure: StateMachineStructure):
-        """
-        rebuilds the state dictionary with a new structure message
-        """
-        self.states = {child.origin: State(child.origin, []) for child in structure.transitions}
-        for transition in structure.transitions:
-            origin = transition.origin
-            for to in transition.destinations:
-                self.states[origin].children.append(self.states[to])
-        self.needs_redraw = True
-
-    def check_rebuild(self, structure: StateMachineStructure):
-        """
-        checks if the structure passed as input matches the structure already represented (thread safe)
-        """
-        with self.mutex:
-            if structure == self.structure:
-                return False
-            else:
-                self._rebuild(structure)
-                self.structure = structure
-
-    def container_status_callback(self, status: StateMachineStateUpdate):
-        self.set_active_state(status.state)
-
-    def container_structure_callback(self, structure: StateMachineStructure):
-        self.check_rebuild(structure)
-
-
-class GUI(QWidget):  # type: ignore
-    def __init__(self, state_machine_instance, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.label: QLabel = QLabel()  # type: ignore
-        self.timer: QTimer = QTimer()  # type: ignore
-        self.renderer: QSvgRenderer = QSvgRenderer()
-        self.timer.timeout.connect(self.update)
-        self.timer.start(1)
-        self.graph: Optional[graphviz.Digraph] = None
-        self.img = None
-        self.state_machine: StateMachine = state_machine_instance
-        self.viz = Node("Visualizer")
-
-        self.viz.create_subscription(
-            StateMachineStructure, STRUCTURE_TOPIC, self.state_machine.container_structure_callback, 1
-        )
-
-        self.viz.create_subscription(
-            StateMachineStateUpdate, STATUS_TOPIC, self.state_machine.container_status_callback, 1
-        )
-
-    def paintEvent(self, event):
-        painter = QPainter(self)
-        if self.img is not None:
-            self.renderer.load(self.img)
-        self.resize(self.renderer.defaultSize())
-        self.renderer.render(painter)
-
-    def update(self):  # type: ignore[override]
-        rclpy.spin_once(self.viz)
-        with self.state_machine.mutex:
-            if self.graph is None or self.state_machine.needs_redraw:
-                self.graph = graphviz.Digraph(comment="State Machine Diagram", format="svg")
-                for state_name in self.state_machine.states.keys():
-                    color = "red" if self.state_machine.cur_active == state_name else "black"
-                    self.graph.node(state_name, color=color)
-
-                for state in self.state_machine.states.values():
-                    for child in state.children:
-                        self.graph.edge(state.name, child.name)
-
-                self.state_machine.needs_redraw = False
-
-        self.img = self.graph.pipe()
-        self.repaint()
-
-
-def main():
-    try:
-        rclpy.init()
-
-        signal.signal(signal.SIGINT, signal.SIG_DFL)
-        state_machine = StateMachine()
-
-        print("Node Created...")
-
-        print("Subscriptions Created...")
-
-        print("Node Created...")
-
-        print("Subscriptions Created...")
-
-        app = QApplication([])  # type: ignore
-        g = GUI(state_machine)
-        g.show()
-        app.exec_()
-
-    except KeyboardInterrupt:
-        pass
-    except ExternalShutdownException:
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu
index eb2673ba..f8c83198 100644
--- a/tensorrt/inference.cu
+++ b/tensorrt/inference.cu
@@ -182,10 +182,9 @@ auto Inference::prepTensors() -> void {
 
 	assert(mContext);
 	// Create an appropriately sized output tensor
-	// TODO: Fix this
 	auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str());
 	for (int i = 0; i < nbDims; i++) {
-		std::array<char, 512> message;
+		std::array<char, 512> message{};
 		std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]);
 		mLogger.log(ILogger::Severity::kINFO, message.data());
 	}
diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp
index dd45596b..2f4aea86 100644
--- a/tensorrt/tensorrt.hpp
+++ b/tensorrt/tensorrt.hpp
@@ -2,13 +2,6 @@
 
 #include "pch.hpp"
 
-struct Detection {
-    int classId{};
-    std::string className;
-    float confidence{};
-    cv::Rect box;
-};
-
 class TensortRT {
     std::string mModelName;