diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c9e02fe..6ae5d5d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -284,7 +284,7 @@ ament_target_dependencies(arm_controller rclcpp) if (Gst_FOUND AND GstApp_FOUND) mrover_add_node(usb_camera esw/usb_camera/*.cpp esw/usb_camera/pch.hpp) - target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs) + target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs parameter_utils) ament_target_dependencies(usb_camera rclcpp) if (LibUdev_FOUND) diff --git a/esw/usb_camera/pch.hpp b/esw/usb_camera/pch.hpp index 76b73552..da4a1ba0 100644 --- a/esw/usb_camera/pch.hpp +++ b/esw/usb_camera/pch.hpp @@ -28,3 +28,5 @@ #include #include #include + +#include diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp index 20d83109..46677ba6 100644 --- a/esw/usb_camera/usb_camera.cpp +++ b/esw/usb_camera/usb_camera.cpp @@ -13,33 +13,28 @@ namespace mrover { UsbCamera::UsbCamera() : Node{"usb_camera", rclcpp::NodeOptions{}.use_intra_process_comms(true)} { try { /* Parameters */ - declare_parameter("width", rclcpp::ParameterType::PARAMETER_INTEGER); - declare_parameter("height", rclcpp::ParameterType::PARAMETER_INTEGER); - declare_parameter("framerate", rclcpp::ParameterType::PARAMETER_INTEGER); - declare_parameter("device", rclcpp::ParameterType::PARAMETER_STRING, [] { - rcl_interfaces::msg::ParameterDescriptor d; - d.read_only = true; - d.description = "The device path to the USB camera, e.g. /dev/video0"; - return d; - }()); - declare_parameter("decode_jpeg_from_device", rclcpp::ParameterType::PARAMETER_BOOL, [] { - rcl_interfaces::msg::ParameterDescriptor d; - d.read_only = true; - d.description = "If true, decode JPEG directly from the USB device instead of YUY2. " - "This decreases image quality but increases performance and reduces USB bus load. " - "It also reduces the 'jelly' effect caused by large YUY2 formats."; - return d; - }()); - - mWidth = get_parameter("width").as_int(); - mHeight = get_parameter("height").as_int(); - std::int64_t framerate = get_parameter("framerate").as_int(); - std::string device = get_parameter("device").as_string(); - bool decodeJpegFromDevice = get_parameter("decode_jpeg_from_device").as_bool(); + int framerate{}; + std::string device{}; + std::string imageTopicName{}; + std::string cameraInfoTopicName{}; + double watchdogTimeout{}; + bool decodeJpegFromDevice{}; + + std::vector params{ + {"width", mWidth, 640}, + {"height", mHeight, 480}, + {"framerate", framerate, 30}, + {"device", device, "/dev/video2"}, + {"image_topic", imageTopicName, "/usb_camera/image"}, + {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"}, + {"watchdog_timeout", watchdogTimeout, 1.0}, + {"decode_jpeg_from_device", decodeJpegFromDevice, false}}; + + ParameterWrapper::declareParameters(this, params); /* Interfaces */ - mImgPub = create_publisher("image", 1); - mCamInfoPub = create_publisher("camera_info", 1); + mImgPub = create_publisher(imageTopicName, 1); + mCamInfoPub = create_publisher(cameraInfoTopicName, 1); /* Pipeline */ gst_init(nullptr, nullptr); diff --git a/esw/usb_camera/usb_camera.hpp b/esw/usb_camera/usb_camera.hpp index 27212a90..3c9eecff 100644 --- a/esw/usb_camera/usb_camera.hpp +++ b/esw/usb_camera/usb_camera.hpp @@ -9,7 +9,7 @@ namespace mrover { rclcpp::Publisher::SharedPtr mCamInfoPub; rclcpp::Publisher::SharedPtr mImgPub; - std::uint16_t mWidth{}, mHeight{}; + int mWidth{}, mHeight{}; GstElement *mStreamSink{}, *mPipeline{}; GMainLoop* mMainLoop{}; diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index dc7f91e6..8ed72b0d 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -3,6 +3,9 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { + std::string modelName; + float modelScoreThreshold{}; + float modelNMSThreshold{}; std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, @@ -11,36 +14,69 @@ namespace mrover { {"decrement_weight", mObjDecrementWeight, 1}, {"hitcount_threshold", mObjHitThreshold, 5}, {"hitcount_max", mObjMaxHitcount, 10}, - {"model_name", mModelName, "Large-Dataset"}, - {"model_score_threshold", mModelScoreThreshold, 0.75}, - {"model_nms_threshold", mModelNmsThreshold, 0.5}}; + {"model_name", modelName, "Large-Dataset"}, + {"model_score_threshold", modelScoreThreshold, 0.75}, + {"model_nms_threshold", modelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); + // All of these variables will be invalidated after calling this function + std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName); RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); - mTensorRT = TensortRT{mModelName, packagePath.string()}; + // Initialize TensorRT Inference Object and Get Important Output Information + mTensorRT = TensortRT{modelName, packagePath.string()}; - mDebugImgPub = create_publisher("object_detector/debug_img", 1); + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold)); + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { - mSensorSub = create_subscription("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { + RCLCPP_INFO_STREAM(get_logger(), "Creating Stereo Object Detector..."); + + mDebugImgPub = create_publisher("/stereo_object_detector/debug_img", 1); + + mSensorSub = create_subscription("/zed/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { StereoObjectDetector::pointCloudCallback(msg); }); } + + ImageObjectDetector::ImageObjectDetector() { + RCLCPP_INFO_STREAM(get_logger(), "Creating Image Object Detector..."); + + std::vector params{ + {"long_range_camera/fov", mCameraHorizontalFov, 80.0}}; + + ParameterWrapper::declareParameters(this, params); + + mDebugImgPub = create_publisher("/long_range_object_detector/debug_img", 1); + + mSensorSub = create_subscription("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) { + ImageObjectDetector::imageCallback(msg); + }); + + mTargetsPub = create_publisher("/long_range_camera/objects", 1); + } } // namespace mrover auto main(int argc, char** argv) -> int { rclcpp::init(argc, argv); - rclcpp::spin(std::make_shared()); + + // DO NOT REMOVE OR ELSE REF COUNT WILL GO TO ZERO + auto imgOD = std::make_shared(); + auto stereoOD = std::make_shared(); + + rclcpp::executors::SingleThreadedExecutor executor; + executor.add_node(imgOD); + executor.add_node(stereoOD); + executor.spin(); + rclcpp::shutdown(); return EXIT_SUCCESS; } diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 28ebe502..1e266d74 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,36 +7,56 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: + struct Model { + std::string modelName; + + std::vector objectHitCounts; + + std::vector classes; + + std::vector inputTensorSize; + + std::vector outputTensorSize; + + // Additional space for params that one algorithm might need and not others + std::vector buffer; + + // Converts an rgb image to a blob + std::function rbgImageToBlob; + + // Converts an output tensor into a vector of detections + std::function&)> outputTensorToDetections; + + Model() = default; + + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + }; + static constexpr char const* NODE_NAME = "object_detector"; std::unique_ptr mTfBuffer = std::make_unique(get_clock()); std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); + Model mModel; + std::string mCameraFrame; std::string mWorldFrame; - std::string mModelName; + cv::Mat mRgbImage, mImageBlob; LoopProfiler mLoopProfiler; TensortRT mTensorRT; - cv::Mat mRgbImage, mImageBlob; sensor_msgs::msg::Image mDetectionsImageMessage; rclcpp::Publisher::SharedPtr mDebugImgPub; - rclcpp::Subscription::SharedPtr mSensorSub; - - // TODO(quintin): Do not hard code exactly two classes - std::vector mObjectHitCounts{0, 0}; int mObjIncrementWeight{}; int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; - float mModelScoreThreshold{}; - float mModelNmsThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -50,6 +70,12 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; + auto static parseYOLOv8Output(Model const& model, + cv::Mat& output, + std::vector& detections) -> void; + + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; + public: explicit ObjectDetectorBase(); @@ -57,6 +83,9 @@ namespace mrover { }; class StereoObjectDetector final : public ObjectDetectorBase { + private: + rclcpp::Subscription::SharedPtr mSensorSub; + public: explicit StereoObjectDetector(); @@ -64,4 +93,20 @@ namespace mrover { auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void; }; + + class ImageObjectDetector final : public ObjectDetectorBase { + private: + rclcpp::Publisher::SharedPtr mTargetsPub; + + rclcpp::Subscription::SharedPtr mSensorSub; + + float mCameraHorizontalFov{}; + + public: + explicit ImageObjectDetector(); + + auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float; + + auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void; + }; } // namespace mrover diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp new file mode 100644 index 00000000..dc7d7167 --- /dev/null +++ b/perception/object_detector/object_detector.parsing.cpp @@ -0,0 +1,99 @@ +#include "object_detector.hpp" +namespace mrover { + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { + if (model.inputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + + if (model.buffer.size() != 2) { + throw std::runtime_error("Expected 2 additional parameters!"); + } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + + cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; + cv::resize(rgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + } + + auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { + // Parse model specific dimensioning from the output + + // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows + if (output.cols <= output.rows) { + std::array message{}; + snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); + throw std::runtime_error(message.data()); + } + + int rows = output.cols; + int dimensions = output.rows; + + // The output of the model is a batchSizex84x8400 matrix + // This converts the model to a TODO: Check this dimensioning + output = output.reshape(1, dimensions); + cv::transpose(output, output); + + // This function expects the image to already be in the correct format thus no distrotion is needed + float const xFactor = 1.0; + float const yFactor = 1.0; + + // Intermediate Storage Containers + std::vector classIds; + std::vector confidences; + std::vector boxes; + + // Each row of the output is a detection with a bounding box and associated class scores + for (int r = 0; r < rows; ++r) { + // Skip first four values as they are the box data + cv::Mat scores = output.row(r).colRange(4, dimensions); + + cv::Point classId; + double maxClassScore; + cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); + + // Determine if the class is an acceptable confidence level, otherwise disregard + if (maxClassScore <= model.buffer[0]) continue; + + confidences.push_back(static_cast(maxClassScore)); + classIds.push_back(classId.x); + + // Get the bounding box data + cv::Mat box = output.row(r).colRange(0, 4); + auto x = box.at(0); + auto y = box.at(1); + auto w = box.at(2); + auto h = box.at(3); + + // Cast the corners into integers to be used on pixels + auto left = static_cast((x - 0.5 * w) * xFactor); + auto top = static_cast((y - 0.5 * h) * yFactor); + auto width = static_cast(w * xFactor); + auto height = static_cast(h * yFactor); + + // Push abck the box into storage + boxes.emplace_back(left, top, width, height); + } + + //Coalesce the boxes into a smaller number of distinct boxes + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult); + + // Fill in the output Detections Vector + for (int i: nmsResult) { + Detection result; + + //Fill in the id and confidence for the class + result.classId = classIds[i]; + result.confidence = confidences[i]; + + //Fill in the class name and box information + result.className = model.classes[result.classId]; + result.box = boxes[i]; + + //Push back the detection into the for storagevector + detections.push_back(result); + } + } + +}; // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 4be823b4..92a628bf 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -14,19 +14,26 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), std::format("Image size changed from [{}, {}] to [{}, {}]", mRgbImage.cols, mRgbImage.rows, msg->width, msg->height)); mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } + + // if 0x0 image + if (!mRgbImage.total()) { + return; + } + convertPointCloudToRGB(msg, mRgbImage); - // TODO(quintin): Avoid hard coding blob size - cv::Size blobSize{640, 640}; + // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); // Run the blob through the model std::vector detections{}; - mTensorRT.modelForwardPass(mImageBlob, detections, mModelScoreThreshold, mModelNmsThreshold); + cv::Mat outputTensor; + mTensorRT.modelForwardPass(mImageBlob, outputTensor); + + mModel.outputTensorToDetections(mModel, outputTensor, detections); mLoopProfiler.measureEvent("Execution"); @@ -66,10 +73,10 @@ namespace mrover { // Push the immediate detections to the camera frame SE3Conversions::pushToTfTree(*mTfBroadcaster, objectImmediateFrame, mCameraFrame, objectInCamera.value(), get_clock()->now()); // Since the object is seen we need to increment the hit counter - mObjectHitCounts[classId] = std::min(mObjMaxHitcount, mObjectHitCounts[classId] + mObjIncrementWeight); + mModel.objectHitCounts[classId] = std::min(mObjMaxHitcount, mModel.objectHitCounts[classId] + mObjIncrementWeight); // Only publish to permament if we are confident in the object - if (mObjectHitCounts[classId] > mObjHitThreshold) { + if (mModel.objectHitCounts[classId] > mObjHitThreshold) { std::string objectPermanentFrame = className; // Grab the object inside of the camera frame and push it into the map frame SE3d objectInMap = SE3Conversions::fromTfTree(*mTfBuffer, objectImmediateFrame, mWorldFrame); @@ -90,7 +97,7 @@ namespace mrover { if (seenObjects[i]) continue; assert(i < mObjectHitCounts.size()); - mObjectHitCounts[i] = std::max(0, mObjectHitCounts[i] - mObjDecrementWeight); + mModel.objectHitCounts[i] = std::max(0, mModel.objectHitCounts[i] - mObjDecrementWeight); } } @@ -170,4 +177,54 @@ namespace mrover { }); } + auto ImageObjectDetector::imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void { + assert(msg); + assert(msg->height > 0); + assert(msg->width > 0); + assert(msg->encoding == sensor_msgs::image_encodings::BGRA8); + + mLoopProfiler.beginLoop(); + + cv::Mat bgraImage{static_cast(msg->height), static_cast(msg->width), CV_8UC4, const_cast(msg->data.data())}; + cv::cvtColor(bgraImage, mRgbImage, cv::COLOR_BGRA2RGB); + + // Convert the RGB Image into the blob Image format + cv::Mat blobSizedImage; + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); + + mLoopProfiler.measureEvent("Conversion"); + + // Run the blob through the model + std::vector detections{}; + cv::Mat outputTensor; + mTensorRT.modelForwardPass(mImageBlob, outputTensor); + + mModel.outputTensorToDetections(mModel, outputTensor, detections); + + mLoopProfiler.measureEvent("Execution"); + + mrover::msg::ImageTargets targets{}; + for (auto const& [classId, className, confidence, box]: detections) { + mrover::msg::ImageTarget target; + target.name = className; + target.bearing = getTagBearing(blobSizedImage, box); + targets.targets.push_back(target); + } + + mTargetsPub->publish(targets); + + drawDetectionBoxes(blobSizedImage, detections); + publishDetectedObjects(blobSizedImage); + + mLoopProfiler.measureEvent("Publication"); + } + + auto ImageObjectDetector::getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float { + cv::Point2f center = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; + float xNormalized = center.x / static_cast(image.cols()); + float xRecentered = 0.5f - xNormalized; + float bearingDegrees = xRecentered * mCameraHorizontalFov; + return bearingDegrees * std::numbers::pi_v / 180.0f; + } + } // namespace mrover diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index 7ab5af34..1ed375b0 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ // Messages #include "sensor_msgs/image_encodings.hpp" #include "sensor_msgs/msg/image.hpp" +#include +#include #include #include #include diff --git a/perception/zed_wrapper/zed_wrapper.cpp b/perception/zed_wrapper/zed_wrapper.cpp index cf35ca6b..3309cbcf 100644 --- a/perception/zed_wrapper/zed_wrapper.cpp +++ b/perception/zed_wrapper/zed_wrapper.cpp @@ -18,13 +18,13 @@ namespace mrover { RCLCPP_INFO(this->get_logger(), "Created Zed Wrapper Node, %s", NODE_NAME); // Publishers - mRightImgPub = create_publisher("right/image", 1); - mLeftImgPub = create_publisher("left/image", 1); + mRightImgPub = create_publisher("zed/right/image", 1); + mLeftImgPub = create_publisher("zed/left/image", 1); mImuPub = create_publisher("zed_imu/data_raw", 1); mMagPub = create_publisher("zed_imu/mag", 1); - mPcPub = create_publisher("camera/left/points", 1); - mRightCamInfoPub = create_publisher("camera/right/camera_info", 1); - mLeftCamInfoPub = create_publisher("camera/left/camera_info", 1); + mPcPub = create_publisher("zed/left/points", 1); + mRightCamInfoPub = create_publisher("zed/right/camera_info", 1); + mLeftCamInfoPub = create_publisher("zed/left/camera_info", 1); // Declare and set Params int imageWidth{}; diff --git a/scripts/visualizer.py b/scripts/visualizer.py index 5aae7a5d..336f9e67 100755 --- a/scripts/visualizer.py +++ b/scripts/visualizer.py @@ -112,7 +112,7 @@ def paintEvent(self, event): self.resize(self.renderer.defaultSize()) self.renderer.render(painter) - def update(self): + def update(self): # type: ignore[override] rclpy.spin_once(self.viz) with self.state_machine.mutex: if self.graph is None or self.state_machine.needs_redraw: diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index 17bda4c3..0e288c79 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -11,32 +11,64 @@ using namespace nvinfer1; * Requires bindings, inputTensor, stream * Modifies stream, outputTensor */ -constexpr static auto INPUT_BINDING_NAME = "images"; -constexpr static auto OUTPUT_BINDING_NAME = "output0"; -Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} { - mModelPath = onnxModelPath.string(); +Inference::Inference(std::string modelName, std::string packagePathString) : mModelName{std::move(modelName)}, mPackagePath{std::move(packagePathString)}{ + + //Create ONNX and engine file paths + + mLogger.log(ILogger::Severity::kINFO, mModelName.c_str()); + mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str()); + mONNXModelPath = std::filesystem::path{packagePathString} / "data" / std::string(mModelName + ".onnx"); + mEngineModelPath = std::filesystem::path{packagePathString} / "data" / std::string("tensorrt-engine-" + mModelName + ".engine");; + + std::array message{}; + std::snprintf(message.data(), message.size(), "Reading from ONNX model at %s and creating TensorRT engine at %s", mONNXModelPath.c_str(), mEngineModelPath.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); // Create the engine object from either the file or from onnx file - mEngine = std::unique_ptr{createCudaEngine(onnxModelPath, modelName)}; + mEngine = std::unique_ptr{createCudaEngine()}; if (!mEngine) throw std::runtime_error("Failed to create CUDA engine"); mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine"); // Check some assumptions about the model if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count"); - if (mEngine->getTensorIOMode(INPUT_BINDING_NAME) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); - if (mEngine->getTensorIOMode(OUTPUT_BINDING_NAME) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Input Binding Input To Be 1"); + + // Store the IO Tensor Names + mInputTensorName = mEngine->getIOTensorName(0); + mOutputTensorName = mEngine->getIOTensorName(1); + + if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); + if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1"); + + // Be verbose about the input tensor size + auto inputTensorSize = getInputTensorSize(); + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < inputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + // Be verbose about the input tensor size + auto outputTensorSize = getInputTensorSize(); + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mOutputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < outputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } createExecutionContext(); prepTensors(); } -auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine* { +auto Inference::createCudaEngine() -> ICudaEngine* { + mLogger.log(ILogger::Severity::kINFO, "Creating engine building tools..."); constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - std::unique_ptr builder{createInferBuilder(mLogger)}; + IBuilder* builder = createInferBuilder(mLogger); if (!builder) throw std::runtime_error("Failed to create Infer Builder"); mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder"); @@ -52,17 +84,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std if (!config) throw std::runtime_error("Failed to create Builder Config"); mLogger.log(ILogger::Severity::kINFO, "Created Builder Config"); - if (!parser->parseFromFile(onnxModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { + if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { throw std::runtime_error("Failed to parse ONNX file"); } IRuntime* runtime = createInferRuntime(mLogger); // Define the engine file location relative to the mrover package - std::filesystem::path packagePath{mPackagePath}; - std::filesystem::path enginePath = packagePath / "data" / std::string("tensorrt-engine-").append(modelName).append(".engine"); // Check if engine file exists - if (!exists(enginePath)) { + if (!exists(mEngineModelPath)) { std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl; // Create the Engine from onnx file @@ -75,15 +105,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std // Save Engine to File auto trtModelStream = tempEng->serialize(); - std::ofstream outputFileStream{enginePath, std::ios::binary}; - outputFileStream.write(static_cast(trtModelStream->data()), trtModelStream->size()); + std::ofstream outputFileStream{mEngineModelPath, std::ios::binary}; + outputFileStream.write(static_cast(trtModelStream->data()), static_cast(trtModelStream->size())); outputFileStream.close(); return tempEng; } // Load engine from file - std::ifstream inputFileStream{enginePath, std::ios::binary}; + std::ifstream inputFileStream{mEngineModelPath, std::ios::binary}; std::stringstream engineBuffer; // Stream in the engine file to the buffer @@ -99,7 +129,7 @@ auto Inference::createExecutionContext() -> void { if (!mContext) throw std::runtime_error("Failed to create execution context"); // Set up the input tensor sizing - mContext->setInputShape(INPUT_BINDING_NAME, mEngine->getTensorShape(INPUT_BINDING_NAME)); + mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str())); } auto Inference::doDetections(cv::Mat const& img) const -> void { @@ -144,7 +174,6 @@ auto Inference::prepTensors() -> void { for(int32_t i = 0; i < rank; ++i){ size *= extents[i]; } - std::cout << tensorName << " is getting allocated to size " << size << std::endl; // Create GPU memory for TensorRT to operate on if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess) @@ -153,7 +182,8 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor - auto const [nbDims, d] = mEngine->getTensorShape(OUTPUT_BINDING_NAME); + // TODO: Fix this + auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { std::array message; std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); @@ -170,3 +200,28 @@ auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { // Returns the id for the input tensor return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise } + + +auto Inference::getInputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mInputTensorName.c_str()); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); + } + + return inputBlobSize; +} + +auto Inference::getOutputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mOutputTensorName.c_str()); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); + } + + return inputBlobSize; +} diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index d0b65469..0df14ef0 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -11,9 +11,12 @@ using nvinfer1::IExecutionContext; class Inference { - std::string mModelPath; + std::string mModelName; std::string mPackagePath; + std::filesystem::path mONNXModelPath; + std::filesystem::path mEngineModelPath; + nvinfer1::Logger mLogger; std::unique_ptr mEngine{}; @@ -21,7 +24,9 @@ class Inference { std::unique_ptr mContext{}; cv::Mat mInputTensor; + std::string mInputTensorName; cv::Mat mOutputTensor; + std::string mOutputTensorName; std::array mBindings{}; @@ -35,7 +40,7 @@ class Inference { * * Uses the file path to locate the onnx model and the modelName to locate the engine file */ - auto createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine*; + auto createCudaEngine() -> ICudaEngine*; /** * @brief Performs the Model forward pass and allocates the reults in the output tensor @@ -61,7 +66,7 @@ class Inference { auto createExecutionContext() -> void; public: - explicit Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString); + explicit Inference(std::string modelName, std::string packagePathString); /** * @brief Runs the forward pass on the given input image in CNN format @@ -72,4 +77,14 @@ public: * @brief Retrieves the mat where the output from the forward pass was stored */ auto getOutputTensor() -> cv::Mat; + + /** + * @brief Retrives the expected input tensor size + */ + auto getInputTensorSize() -> std::vector; + + /** + * @brief Retrives the expected input tensor size + */ + auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index d5849095..54f5c87d 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -12,8 +12,8 @@ using namespace nvinfer1; * Modifies stream, outputTensor */ -InferenceWrapper::InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath) { - mInference.reset(new Inference(std::move(onnxModelPath), modelName, packagePath)); +InferenceWrapper::InferenceWrapper(std::string const& modelName, std::string const& packagePath) { + mInference = std::make_shared(modelName, packagePath); } auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { @@ -24,3 +24,11 @@ auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { auto InferenceWrapper::getOutputTensor() const -> cv::Mat { return mInference->getOutputTensor(); } + +auto InferenceWrapper::getInputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); +} + +auto InferenceWrapper::getOutputTensorSize() -> std::vector{ + return mInference->getOutputTensorSize(); +} diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index f9d83b99..d6e15a36 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -12,10 +12,14 @@ class InferenceWrapper { ~InferenceWrapper() = default; - explicit InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath); + explicit InferenceWrapper(std::string const& modelName, std::string const& packagePath); auto doDetections(cv::Mat const& img) const -> void; // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; + + [[nodiscard]] auto getInputTensorSize() -> std::vector; + + [[nodiscard]] auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 287b3f2a..2c4dc3ff 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -5,96 +5,22 @@ using namespace std; TensortRT::TensortRT() = default; TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} { - std::filesystem::path packagePath{std::move(packagePathString)}; - std::filesystem::path modelFileName = mModelName.append(".onnx"); - std::filesystem::path modelPath = packagePath / "data" / modelFileName; - mInferenceWrapper = InferenceWrapper{modelPath, mModelName, packagePath}; + mInferenceWrapper = InferenceWrapper{mModelName, packagePath}; } TensortRT::~TensortRT() = default; -auto TensortRT::modelForwardPass(cv::Mat const& blob, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - mInferenceWrapper.doDetections(blob); - cv::Mat output = mInferenceWrapper.getOutputTensor(); - parseModelOutput(output, detections, modelScoreThreshold, modelNMSThreshold); +auto TensortRT::modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void { + mInferenceWrapper.doDetections(inputBlobTensor); + outputTensor = mInferenceWrapper.getOutputTensor(); } -auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - // Parse model specific dimensioning from the output - - // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows - if (output.cols <= output.rows) { - throw std::runtime_error("Something is wrong Model with interpretation"); - } - - int rows = output.cols; - int dimensions = output.rows; - - // The output of the model is a batchSizex84x8400 matrix - // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); - cv::transpose(output, output); - - // This function expects the image to already be in the correct format thus no distrotion is needed - float const xFactor = 1.0; - float const yFactor = 1.0; - - // Intermediate Storage Containers - std::vector classIds; - std::vector confidences; - std::vector boxes; - - // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { - // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); - - cv::Point classId; - double maxClassScore; - cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); - - // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; - - confidences.push_back(static_cast(maxClassScore)); - classIds.push_back(classId.x); - - // Get the bounding box data - cv::Mat box = output.row(r).colRange(0, 4); - auto x = box.at(0); - auto y = box.at(1); - auto w = box.at(2); - auto h = box.at(3); - - // Cast the corners into integers to be used on pixels - auto left = static_cast((x - 0.5 * w) * xFactor); - auto top = static_cast((y - 0.5 * h) * yFactor); - auto width = static_cast(w * xFactor); - auto height = static_cast(h * yFactor); - - // Push abck the box into storage - boxes.emplace_back(left, top, width, height); - } - - //Coalesce the boxes into a smaller number of distinct boxes - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); - - // Fill in the output Detections Vector - for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = classes[result.classId]; - result.box = boxes[i]; +auto TensortRT::getInputTensorSize() -> std::vector{ + return mInferenceWrapper.getInputTensorSize(); +} - //Push back the detection into the for storagevector - detections.push_back(result); - } +auto TensortRT::getOutputTensorSize() -> std::vector{ + return mInferenceWrapper.getOutputTensorSize(); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 6d695636..dd45596b 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -12,15 +12,8 @@ struct Detection { class TensortRT { std::string mModelName; - std::vector classes{"bottle", "hammer"}; - InferenceWrapper mInferenceWrapper; - auto parseModelOutput(cv::Mat& output, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; - public: TensortRT(); @@ -28,8 +21,9 @@ class TensortRT { ~TensortRT(); - auto modelForwardPass(cv::Mat const& blob, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; + auto modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void; + + [[nodiscard]] auto getInputTensorSize() -> std::vector; + + [[nodiscard]] auto getOutputTensorSize() -> std::vector; };