diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 1da154c6..7f53f883 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -1,11 +1,10 @@ #include "object_detector.hpp" +#include namespace mrover { ObjectDetectorBase::ObjectDetectorBase(rclcpp::NodeOptions const& options) : rclcpp::Node(NODE_NAME, options), mLoopProfiler{get_logger()} { std::string modelName; - float modelScoreThreshold{}; - float modelNMSThreshold{}; std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, @@ -15,8 +14,8 @@ namespace mrover { {"hitcount_threshold", mObjHitThreshold, 5}, {"hitcount_max", mObjMaxHitcount, 10}, {"model_name", modelName, "Large-Dataset"}, - {"model_score_threshold", modelScoreThreshold, 0.75}, - {"model_nms_threshold", modelNMSThreshold, 0.5}}; + {"model_score_threshold", mModelScoreThreshold, 0.75}, + {"model_nms_threshold", mModelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); @@ -31,9 +30,11 @@ namespace mrover { // Initialize TensorRT Inference Object and Get Important Output Information mTensorRT = TensortRT{modelName, packagePath.string()}; - mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); + using namespace std::placeholders; - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), [this](Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob){preprocessYOLOv8Input(model, rgbImage, blobSizedImage, blob);}, [this](Model const& model, cv::Mat& output, std::vector& detections){parseYOLOv8Output(model, output, detections);}); + + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, mModelScoreThreshold, mModelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector(rclcpp::NodeOptions const& options) : ObjectDetectorBase(options) { diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index ae0beda3..e8e92f63 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -5,8 +5,16 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { - protected: + struct Detection { + int classId{}; + std::string className; + float confidence{}; + cv::Rect box; + + Detection(int _classId, std::string _className, float _confidence, cv::Rect _box) : classId{_classId}, className{_className}, confidence{_confidence}, box{_box}{} + }; + struct Model { std::string modelName; @@ -18,20 +26,19 @@ namespace mrover { std::vector outputTensorSize; - // Additional space for params that one algorithm might need and not others - std::vector buffer; - // Converts an rgb image to a blob - std::function rbgImageToBlob; + std::function rgbImageToBlob; // Converts an output tensor into a vector of detections std::function&)> outputTensorToDetections; Model() = default; - Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), rgbImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} }; + + static constexpr char const* NODE_NAME = "object_detector"; std::unique_ptr mTfBuffer = std::make_unique(get_clock()); @@ -57,6 +64,8 @@ namespace mrover { int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; + float mModelScoreThreshold{}; + float mModelNMSThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -66,15 +75,15 @@ namespace mrover { std::span detections, cv::Size const& imageSize = {640, 640}) -> void; - auto publishDetectedObjects(cv::InputArray image) -> void; - - static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; + auto publishDetectedObjects(cv::InputArray const& image) -> void; - auto static parseYOLOv8Output(Model const& model, + static auto drawDetectionBoxes(cv::InputOutputArray& image, std::span detections) -> void; + + auto parseYOLOv8Output(Model const& model, cv::Mat& output, - std::vector& detections) -> void; + std::vector& detections) const -> void; - auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; + static auto preprocessYOLOv8Input(Model const& model, cv::Mat const& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(rclcpp::NodeOptions const& options = rclcpp::NodeOptions()); @@ -105,7 +114,7 @@ namespace mrover { public: explicit ImageObjectDetector(rclcpp::NodeOptions const& options = rclcpp::NodeOptions()); - auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float; + auto getTagBearing(cv::InputArray const& image, cv::Rect const& box) const -> float; auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void; }; diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index dc7d7167..0aa12974 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,14 +1,10 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat const& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { if (model.inputTensorSize.size() != 4) { throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } - if (model.buffer.size() != 2) { - throw std::runtime_error("Expected 2 additional parameters!"); - } - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; @@ -16,7 +12,7 @@ namespace mrover { cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); } - auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { + auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) const -> void { // Parse model specific dimensioning from the output // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows @@ -26,12 +22,12 @@ namespace mrover { throw std::runtime_error(message.data()); } - int rows = output.cols; - int dimensions = output.rows; + int numCandidates = output.cols; + int predictionDimension = output.rows; // The output of the model is a batchSizex84x8400 matrix // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); + output = output.reshape(1, predictionDimension); cv::transpose(output, output); // This function expects the image to already be in the correct format thus no distrotion is needed @@ -44,16 +40,16 @@ namespace mrover { std::vector boxes; // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { + for (int r = 0; r < numCandidates; ++r) { // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); + cv::Mat scores = output.row(r).colRange(4, predictionDimension); cv::Point classId; double maxClassScore; cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= model.buffer[0]) continue; + if (maxClassScore <= mModelScoreThreshold) continue; confidences.push_back(static_cast(maxClassScore)); classIds.push_back(classId.x); @@ -77,22 +73,12 @@ namespace mrover { //Coalesce the boxes into a smaller number of distinct boxes std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult); + cv::dnn::NMSBoxes(boxes, confidences, mModelScoreThreshold, mModelNMSThreshold, nmsResult); // Fill in the output Detections Vector for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = model.classes[result.classId]; - result.box = boxes[i]; - - //Push back the detection into the for storagevector - detections.push_back(result); + //Push back the detection into the for storage vector + detections.emplace_back(classIds[i], model.classes[classIds[i]], confidences[i], boxes[i]); } } diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 9ef29662..4034d245 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -24,7 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); + mModel.rgbImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); @@ -151,7 +151,7 @@ namespace mrover { } } - auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray image) -> void { + auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray const& image) -> void { mDetectionsImageMessage.header.stamp = get_clock()->now(); mDetectionsImageMessage.height = image.rows(); mDetectionsImageMessage.width = image.cols(); @@ -189,7 +189,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); + mModel.rgbImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); @@ -207,7 +207,7 @@ namespace mrover { mrover::msg::ImageTarget target; target.name = className; target.bearing = getTagBearing(blobSizedImage, box); - targets.targets.push_back(target); + targets.targets.emplace_back(target); } mTargetsPub->publish(targets); @@ -218,7 +218,7 @@ namespace mrover { mLoopProfiler.measureEvent("Publication"); } - auto ImageObjectDetector::getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float { + auto ImageObjectDetector::getTagBearing(cv::InputArray const& image, cv::Rect const& box) const -> float { cv::Point2f center = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; float xNormalized = center.x / static_cast(image.cols()); float xRecentered = 0.5f - xNormalized; diff --git a/scripts/visualizer.py b/scripts/visualizer.py deleted file mode 100755 index 336f9e67..00000000 --- a/scripts/visualizer.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations -import signal -import graphviz # type: ignore -import time -from PyQt5.QtWidgets import * # type: ignore -from PyQt5.QtCore import * # type: ignore -from PyQt5.QtGui import QPainter # type: ignore -from PyQt5.QtSvg import QSvgRenderer # type:ignore - -import rclpy -import rclpy.time -import rclpy.logging -from rclpy.node import Node -from rclpy.executors import ExternalShutdownException -import sys -from mrover.msg import StateMachineStructure, StateMachineStateUpdate -from threading import Lock -from dataclasses import dataclass -from typing import Optional, List, Dict -import threading - - -STRUCTURE_TOPIC = "nav_structure" -STATUS_TOPIC = "nav_state" - - -@dataclass -class State: - name: str - children: List[State] - - -class StateMachine: - def __init__(self): - self.states: Dict[str, State] = {} - self.structure: Optional[StateMachineStructure] = None - self.mutex: Lock = Lock() - self.cur_active: str = "" - self.previous_state: str = "" - self.needs_redraw: bool = True - - def set_active_state(self, active_state): - """ - sets the state specified to be active (thread safe) - """ - with self.mutex: - if active_state != self.cur_active and active_state in self.states: - self.previous_state = self.cur_active - self.cur_active = active_state - self.needs_redraw = True - now = rclpy.time.Time() - rclpy.logging.get_logger("Visualizer").info( - f"Current time: {now} Previous state: {self.previous_state} Current State: { self.cur_active}" - ) - - def _rebuild(self, structure: StateMachineStructure): - """ - rebuilds the state dictionary with a new structure message - """ - self.states = {child.origin: State(child.origin, []) for child in structure.transitions} - for transition in structure.transitions: - origin = transition.origin - for to in transition.destinations: - self.states[origin].children.append(self.states[to]) - self.needs_redraw = True - - def check_rebuild(self, structure: StateMachineStructure): - """ - checks if the structure passed as input matches the structure already represented (thread safe) - """ - with self.mutex: - if structure == self.structure: - return False - else: - self._rebuild(structure) - self.structure = structure - - def container_status_callback(self, status: StateMachineStateUpdate): - self.set_active_state(status.state) - - def container_structure_callback(self, structure: StateMachineStructure): - self.check_rebuild(structure) - - -class GUI(QWidget): # type: ignore - def __init__(self, state_machine_instance, *args, **kwargs): - super().__init__(*args, **kwargs) - self.label: QLabel = QLabel() # type: ignore - self.timer: QTimer = QTimer() # type: ignore - self.renderer: QSvgRenderer = QSvgRenderer() - self.timer.timeout.connect(self.update) - self.timer.start(1) - self.graph: Optional[graphviz.Digraph] = None - self.img = None - self.state_machine: StateMachine = state_machine_instance - self.viz = Node("Visualizer") - - self.viz.create_subscription( - StateMachineStructure, STRUCTURE_TOPIC, self.state_machine.container_structure_callback, 1 - ) - - self.viz.create_subscription( - StateMachineStateUpdate, STATUS_TOPIC, self.state_machine.container_status_callback, 1 - ) - - def paintEvent(self, event): - painter = QPainter(self) - if self.img is not None: - self.renderer.load(self.img) - self.resize(self.renderer.defaultSize()) - self.renderer.render(painter) - - def update(self): # type: ignore[override] - rclpy.spin_once(self.viz) - with self.state_machine.mutex: - if self.graph is None or self.state_machine.needs_redraw: - self.graph = graphviz.Digraph(comment="State Machine Diagram", format="svg") - for state_name in self.state_machine.states.keys(): - color = "red" if self.state_machine.cur_active == state_name else "black" - self.graph.node(state_name, color=color) - - for state in self.state_machine.states.values(): - for child in state.children: - self.graph.edge(state.name, child.name) - - self.state_machine.needs_redraw = False - - self.img = self.graph.pipe() - self.repaint() - - -def main(): - try: - rclpy.init() - - signal.signal(signal.SIGINT, signal.SIG_DFL) - state_machine = StateMachine() - - print("Node Created...") - - print("Subscriptions Created...") - - print("Node Created...") - - print("Subscriptions Created...") - - app = QApplication([]) # type: ignore - g = GUI(state_machine) - g.show() - app.exec_() - - except KeyboardInterrupt: - pass - except ExternalShutdownException: - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index eb2673ba..f8c83198 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -182,10 +182,9 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor - // TODO: Fix this auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { - std::array message; + std::array message{}; std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); mLogger.log(ILogger::Severity::kINFO, message.data()); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index dd45596b..2f4aea86 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -2,13 +2,6 @@ #include "pch.hpp" -struct Detection { - int classId{}; - std::string className; - float confidence{}; - cv::Rect box; -}; - class TensortRT { std::string mModelName;