From 05833a69bf4aedc1c7dbb90f322078951e65b18f Mon Sep 17 00:00:00 2001
From: John <134429827+jbrhm@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:58:03 -0400
Subject: [PATCH] Add the ZED Wrapper and Object Detector (#3)

* starter files

* starter code

* finding zed sdk

* building zed sdk

* y no build

* gets paramaters

* gets more aprameters

* why no callback

* variants :)))))

* logic error

* more params

* most init done

* no build :(

* almost builds

* builds :)))))

* wat svo?

* builds

* AHHHHHHHH POINTERS AHHHHHHHHH

* STD MOVE FTW

* undefined symbols :(

* undefined symbols :(

* Builds

* Runs

* Better includes

* Works

* 62 Hz

* begin porting voer obj detect

* more errors

* learning lib builds

* brah

* bruhhhh

* works

* better type safety

* got rid of compiler warnings

* starter files

* starter code

* finding zed sdk

* building zed sdk

* y no build

* gets paramaters

* gets more aprameters

* why no callback

* variants :)))))

* logic error

* more params

* most init done

* no build :(

* almost builds

* builds :)))))

* wat svo?

* builds

* AHHHHHHHH POINTERS AHHHHHHHHH

* STD MOVE FTW

* undefined symbols :(

* undefined symbols :(

* Builds

* Runs

* Better includes

* Works

* 62 Hz

* begin porting voer obj detect

* more errors

* learning lib builds

* brah

* bruhhhh

* works

* better type safety

* got rid of compiler warnings

* Update zed_wrapper.bridge.cpp

* Updated CUDA

* Updated CUDA

* Update zed_wrapper.bridge.cpp

* CLANG FORMAT IS CRACCCCCCKKKKKKED bro :))))))))))))

* CLANG FORMAT IS CRACCCCCCKKKKKKED bro :))))))))))))

* CLANG FORMAT IS CRACCCCCCKKKKKKED bro :))))))))))))

* CLANG FORMAT IS CRACCCCCCKKKKKKED bro :))))))))))))

* BLACK

* builds

* no engine

* redundant

* Ashwin Suggestions

* No more engine

* Updating compiler flags

* Final Refactoring

* od still works

* zed default params

* od default params

* Works with new parameters

* Better pathing on zed launch

* Better launch file configuration

* works
---
 CMakeLists.txt                                |  43 ++-
 build.sh                                      |   6 +-
 config/object_detector.yaml                   |  18 +
 config/zed.yaml                               |  22 ++
 data/.gitignore                               |   1 +
 data/Large-Dataset.onnx                       |   3 +
 launch/perception.launch.py                   |  28 ++
 parameter_utils/parameter.hpp                 | 104 ++++++
 .../object_detector/object_detector.cpp       |  47 +++
 .../object_detector/object_detector.hpp       |  67 ++++
 .../object_detector.processing.cpp            | 173 ++++++++++
 perception/object_detector/pch.hpp            |  53 +++
 perception/point.hpp                          |  14 +-
 perception/zed_wrapper/pch.hpp                |  31 ++
 perception/zed_wrapper/zed_wrapper.bridge.cpp | 116 +++++++
 perception/zed_wrapper/zed_wrapper.bridge.cu  |  81 +++++
 perception/zed_wrapper/zed_wrapper.cpp        | 324 ++++++++++++++++++
 perception/zed_wrapper/zed_wrapper.hpp        | 103 ++++++
 tensorrt/inference.cu                         | 172 ++++++++++
 tensorrt/inference.cuh                        |  75 ++++
 tensorrt/inference_wrapper.cpp                |  26 ++
 tensorrt/inference_wrapper.hpp                |  21 ++
 tensorrt/logger.cu                            |  25 ++
 tensorrt/logger.cuh                           |  21 ++
 tensorrt/pch.hpp                              |  17 +
 tensorrt/tensorrt.cpp                         | 100 ++++++
 tensorrt/tensorrt.hpp                         |  35 ++
 27 files changed, 1723 insertions(+), 3 deletions(-)
 create mode 100644 config/object_detector.yaml
 create mode 100644 config/zed.yaml
 create mode 100644 data/.gitignore
 create mode 100644 data/Large-Dataset.onnx
 create mode 100644 launch/perception.launch.py
 create mode 100644 parameter_utils/parameter.hpp
 create mode 100644 perception/object_detector/object_detector.cpp
 create mode 100644 perception/object_detector/object_detector.hpp
 create mode 100644 perception/object_detector/object_detector.processing.cpp
 create mode 100644 perception/object_detector/pch.hpp
 create mode 100644 perception/zed_wrapper/pch.hpp
 create mode 100644 perception/zed_wrapper/zed_wrapper.bridge.cpp
 create mode 100644 perception/zed_wrapper/zed_wrapper.bridge.cu
 create mode 100644 perception/zed_wrapper/zed_wrapper.cpp
 create mode 100644 perception/zed_wrapper/zed_wrapper.hpp
 create mode 100644 tensorrt/inference.cu
 create mode 100644 tensorrt/inference.cuh
 create mode 100644 tensorrt/inference_wrapper.cpp
 create mode 100644 tensorrt/inference_wrapper.hpp
 create mode 100644 tensorrt/logger.cu
 create mode 100644 tensorrt/logger.cuh
 create mode 100644 tensorrt/pch.hpp
 create mode 100644 tensorrt/tensorrt.cpp
 create mode 100644 tensorrt/tensorrt.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a4776866..c0ad9825 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ project(mrover VERSION 2025.0.0 LANGUAGES C CXX)
 cmake_policy(SET CMP0148 OLD)
 
 set(CMAKE_CXX_STANDARD 23)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
 # Generate compile_commands.json for clangd language server.
 # Can be used by VSCode, CLion, VIM, etc.
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -210,6 +210,7 @@ ament_target_dependencies(lie rclcpp geometry_msgs tf2 tf2_ros)
 
 mrover_add_header_only_library(units units)
 mrover_add_header_only_library(loop_profiler loop_profiler)
+mrover_add_header_only_library(parameter_utils parameter_utils)
 
 # Simulator
 
@@ -226,6 +227,46 @@ target_include_directories(simulator SYSTEM PRIVATE ${BULLET_INCLUDE_DIRS} ${OPE
 
 # Perception
 
+find_package(ZED QUIET)
+find_package(CUDA QUIET)
+find_package(OpenCV REQUIRED)
+if(ZED_FOUND AND CUDA_FOUND)
+	enable_language(CUDA)
+	# CUDA Compile Options
+	add_library(cuda_compiler_flags INTERFACE)
+	target_compile_options(cuda_compiler_flags INTERFACE 
+		-Wno-pedantic 
+		-Wno-deprecated 
+		-Wno-unused-parameter 
+		-diag-suppress=815 
+		-diag-suppress=780 
+		-Wno-deprecated-copy 
+		-Wno-unused-command-line-argument 
+		-Wno-ignored-qualifiers 
+		-Wno-sometimes-uninitialized
+	)
+
+	# ZED Wrapper
+	mrover_add_node(zed perception/zed_wrapper/*.c* perception/zed_wrapper/pch.hpp)
+	target_compile_options(zed PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-std=c++20>)
+	target_link_libraries(zed parameter_utils lie MANIF::manif ${CUDA_LIBRARIES} loop_profiler cuda_compiler_flags)
+	ament_target_dependencies(zed rclcpp sensor_msgs ZED CUDA tf2 tf2_ros)
+
+	# Learning Library
+    # TODO(john): Update to use the new API
+	mrover_add_library(tensorrt tensorrt/*.c* tensorrt)
+	target_compile_options(tensorrt PRIVATE -Wno-deprecated-declarations $<$<COMPILE_LANGUAGE:CUDA>:-std=c++17>)
+	target_include_directories(tensorrt PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+	target_link_libraries(tensorrt PRIVATE opencv_core opencv_dnn opencv_imgproc lie nvinfer nvonnxparser tbb cuda_compiler_flags)
+
+	# Object Detector
+	mrover_add_node(object_detector perception/object_detector/*.c* src/perception/object_detector/pch.hpp)
+    target_link_libraries(object_detector opencv_core opencv_dnn opencv_imgproc lie tbb tensorrt opencv_imgcodecs opencv_highgui loop_profiler parameter_utils cuda_compiler_flags)
+	ament_target_dependencies(object_detector rclcpp sensor_msgs CUDA tf2 tf2_ros)
+else()
+	message("ZED not found...")
+endif()
+
 mrover_add_node(tag_detector perception/tag_detector/*.cpp perception/tag_detector/pch.hpp)
 ament_target_dependencies(tag_detector rclcpp tf2 tf2_ros)
 target_link_libraries(tag_detector lie opencv_core opencv_aruco opencv_imgproc loop_profiler)
diff --git a/build.sh b/build.sh
index 1f640485..ca62847e 100755
--- a/build.sh
+++ b/build.sh
@@ -5,10 +5,14 @@ set -euxo pipefail
 # Build in the colcon workspace, not the package
 pushd ../..
 
-# set C/CXX compilers
+# Set C/C++ compilers
 export CC=clang
 export CXX=clang++
 
+# Set CUDA compilers
+export CUDAHOSTCXX=g++-9
+export CUDACXX=/usr/local/cuda-12.3/bin/nvcc
+
 # TODO (ali): add build configs for debug vs release
 colcon build \
 	--cmake-args -G Ninja -W no-dev -DCMAKE_BUILD_TYPE=RelWithDebInfo \
diff --git a/config/object_detector.yaml b/config/object_detector.yaml
new file mode 100644
index 00000000..c0619cb1
--- /dev/null
+++ b/config/object_detector.yaml
@@ -0,0 +1,18 @@
+
+# All units are in SI
+# ===================
+# Time:     second, hz
+# Angle:    radian
+# Distance: meter
+
+/object_detector:
+  ros__parameters:
+    camera_frame: "zed_left_camera_frame"
+    world_frame: "map"
+    increment_weight: 2
+    decrement_weight: 1
+    hitcount_threshold: 5
+    hitcount_max: 10
+    model_name: "Large-Dataset"
+    model_score_threshold: 0.75
+    model_nms_threshold: 0.5
diff --git a/config/zed.yaml b/config/zed.yaml
new file mode 100644
index 00000000..93994021
--- /dev/null
+++ b/config/zed.yaml
@@ -0,0 +1,22 @@
+# All units are in SI
+# ===================
+# Time:     second, hz
+# Angle:    radian
+# Distance: meter
+
+/zed_wrapper:
+  ros__parameters:
+    depth_confidence: 70
+    serial_number: -1
+    grab_target_fps: 60
+    texture_confidence: 100
+    image_width: 1280
+    image_height: 720
+    svo_file: ""
+    use_depth_stabilization: false
+    grab_resolution: "HD720"
+    depth_mode: "PERFORMANCE"
+    depth_maximum_distance: 12.0
+    use_builtin_visual_odom: false
+    use_pose_smoothing: true
+    use_area_memory: true
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 00000000..f8f1090c
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1 @@
+*.engine
diff --git a/data/Large-Dataset.onnx b/data/Large-Dataset.onnx
new file mode 100644
index 00000000..2f92fce2
--- /dev/null
+++ b/data/Large-Dataset.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:364a9d50bf4da8fead466af3f28a35192fd0ca0f306d9b3c99de24351465420b
+size 44720392
diff --git a/launch/perception.launch.py b/launch/perception.launch.py
new file mode 100644
index 00000000..81f87e9c
--- /dev/null
+++ b/launch/perception.launch.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+
+from ament_index_python import get_package_share_directory
+
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node, ComposableNodeContainer
+from launch_ros.descriptions import ComposableNode
+from launch.conditions import LaunchConfigurationEquals
+
+
+def generate_launch_description():
+    zed_node = Node(
+        package="mrover",
+        executable="zed",
+        name="zed_wrapper",
+        parameters=[Path(get_package_share_directory("mrover"), "config", "zed.yaml")],
+    )
+
+    object_detector_node = Node(
+        package="mrover",
+        executable="object_detector",
+        name="object_detector",
+        parameters=[Path(get_package_share_directory("mrover"), "config", "object_detector.yaml")],
+    )
+
+    return LaunchDescription([zed_node, object_detector_node])
diff --git a/parameter_utils/parameter.hpp b/parameter_utils/parameter.hpp
new file mode 100644
index 00000000..f12b29cd
--- /dev/null
+++ b/parameter_utils/parameter.hpp
@@ -0,0 +1,104 @@
+#include <vector>
+#include <string>
+#include <variant>
+#include <exception>
+#include <rclcpp/rclcpp.hpp>
+
+// Overloads pattern for visit
+template<class... Ts> struct overload : Ts... { using Ts::operator()...; };
+template<class... Ts> overload(Ts...) -> overload<Ts...>;
+
+
+namespace mrover {
+	class ParameterWrapper {
+	private:
+		static inline std::shared_ptr<rclcpp::ParameterCallbackHandle> cbHande;
+		
+	public:
+		rclcpp::ParameterType mType;
+
+		std::string mParamDescriptor;
+
+		std::variant<int*, std::string*, bool*, double*, float*> mData;
+
+		std::variant<int, std::string, bool, double, float> mDefaultValue;
+
+		ParameterWrapper(std::string paramDescriptor, int& variable, int defaultValue = 0) : mType{rclcpp::ParameterType::PARAMETER_INTEGER}, mParamDescriptor{std::move(paramDescriptor)}, mData{&variable}, mDefaultValue{defaultValue}{}
+
+		ParameterWrapper(std::string paramDescriptor, std::string& variable, std::string defaultValue = "") : mType{rclcpp::ParameterType::PARAMETER_STRING}, mParamDescriptor{std::move(paramDescriptor)}, mData{&variable}, mDefaultValue{std::move(defaultValue)}{}
+
+		ParameterWrapper(std::string paramDescriptor, bool& variable, bool defaultValue = false) : mType{rclcpp::ParameterType::PARAMETER_BOOL}, mParamDescriptor{std::move(paramDescriptor)}, mData{&variable}, mDefaultValue{defaultValue}{}
+
+		ParameterWrapper(std::string paramDescriptor, double& variable, double defaultValue = 0.0) : mType{rclcpp::ParameterType::PARAMETER_DOUBLE}, mParamDescriptor{std::move(paramDescriptor)}, mData{&variable}, mDefaultValue{defaultValue}{}
+
+		ParameterWrapper(std::string paramDescriptor, float& variable, float defaultValue = 0.0) : mType{rclcpp::ParameterType::PARAMETER_DOUBLE}, mParamDescriptor{std::move(paramDescriptor)}, mData{&variable}, mDefaultValue{defaultValue}{}
+
+		void visit(rclcpp::Node* node){
+			std::visit(overload{
+				[&](int* arg){
+					try{
+						*arg = static_cast<int>(node->get_parameter(mParamDescriptor).as_int());
+					}catch(rclcpp::exceptions::ParameterUninitializedException& e){
+						try{
+							*arg = std::get<int>(mDefaultValue);
+						}catch (std::bad_variant_access const& ex){
+							throw std::runtime_error("Bad Variant Access: Type not int");
+						}
+					}
+				},
+				[&](std::string* arg){
+					try{
+							*arg = node->get_parameter(mParamDescriptor).as_string();
+					}catch(rclcpp::exceptions::ParameterUninitializedException& e){
+						try{
+							*arg = std::get<std::string>(mDefaultValue);
+						}catch (std::bad_variant_access const& ex){
+							throw std::runtime_error("Bad Variant Access: Type not std::string");
+						}
+					}
+				},
+				[&](bool* arg){
+					try{
+						*arg = node->get_parameter(mParamDescriptor).as_bool();
+					}catch(rclcpp::exceptions::ParameterUninitializedException& e){
+						try{
+							*arg = std::get<bool>(mDefaultValue);
+						}catch (std::bad_variant_access const& ex){
+							throw std::runtime_error("Bad Variant Access: Type not bool");
+						}
+					}
+				},
+				[&](double* arg){
+					try{
+						*arg = node->get_parameter(mParamDescriptor).as_double();
+					}catch(rclcpp::exceptions::ParameterUninitializedException& e){
+						try{
+							*arg = std::get<double>(mDefaultValue);
+						}catch (std::bad_variant_access const& ex){
+							throw std::runtime_error("Bad Variant Access: Type not double");
+						}
+					}
+				},
+				[&](float* arg){
+					try{
+						*arg = static_cast<float>(node->get_parameter(mParamDescriptor).as_double());
+					}catch(rclcpp::exceptions::ParameterUninitializedException& e){
+						try{
+							*arg = std::get<float>(mDefaultValue);
+						}catch (std::bad_variant_access const& ex){
+							throw std::runtime_error("Bad Variant Access: Type not float");
+						}
+					}
+				}
+			}, mData);
+		}
+
+		static inline auto declareParameters(rclcpp::Node* node, std::vector<ParameterWrapper>& params) -> void{
+			RCLCPP_INFO(rclcpp::get_logger("param_logger"), "Declaring %zu parameters...", params.size());
+			for(auto& param : params){
+				node->declare_parameter(param.mParamDescriptor, param.mType);
+				param.visit(node);
+			}
+		}
+	};
+};
diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp
new file mode 100644
index 00000000..f6511b79
--- /dev/null
+++ b/perception/object_detector/object_detector.cpp
@@ -0,0 +1,47 @@
+#include "object_detector.hpp"
+
+namespace mrover {
+
+    ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} {
+
+        std::vector<ParameterWrapper> params{
+                {"camera_frame", mCameraFrame, "zed_left_camera_frame"},
+                {"world_frame", mWorldFrame, "map"},
+                {"increment_weight", mObjIncrementWeight, 2},
+                {"decrement_weight", mObjDecrementWeight, 1},
+                {"hitcount_threshold", mObjHitThreshold, 5},
+                {"hitcount_max", mObjMaxHitcount, 10},
+                {"model_name", mModelName, "Large-Dataset"},
+                {"model_score_threshold", mModelScoreThreshold, 0.75},
+                {"model_nms_threshold", mModelNmsThreshold, 0.5}
+		};
+
+        ParameterWrapper::declareParameters(this, params);
+
+		std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover";
+
+        RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName);
+
+        RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath);
+
+        mTensorRT = TensortRT{mModelName, packagePath.string()};
+
+        mDebugImgPub = create_publisher<sensor_msgs::msg::Image>("object_detector/debug_img", 1);
+
+        RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold));
+    }
+
+    StereoObjectDetector::StereoObjectDetector() {
+        mSensorSub = create_subscription<sensor_msgs::msg::PointCloud2>("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) {
+            StereoObjectDetector::pointCloudCallback(msg);
+        });
+    }
+} // namespace mrover
+
+
+auto main(int argc, char** argv) -> int {
+    rclcpp::init(argc, argv);
+    rclcpp::spin(std::make_shared<mrover::StereoObjectDetector>());
+    rclcpp::shutdown();
+    return EXIT_SUCCESS;
+}
diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp
new file mode 100644
index 00000000..28ebe502
--- /dev/null
+++ b/perception/object_detector/object_detector.hpp
@@ -0,0 +1,67 @@
+#pragma once
+
+#include "pch.hpp"
+
+namespace mrover {
+
+    class ObjectDetectorBase : public rclcpp::Node {
+
+    protected:
+        static constexpr char const* NODE_NAME = "object_detector";
+
+        std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
+        std::shared_ptr<tf2_ros::TransformBroadcaster> mTfBroadcaster = std::make_shared<tf2_ros::TransformBroadcaster>(this);
+        std::shared_ptr<tf2_ros::TransformListener> mTfListener = std::make_shared<tf2_ros::TransformListener>(*mTfBuffer);
+
+        std::string mCameraFrame;
+        std::string mWorldFrame;
+
+        std::string mModelName;
+
+        LoopProfiler mLoopProfiler;
+
+        TensortRT mTensorRT;
+
+        cv::Mat mRgbImage, mImageBlob;
+        sensor_msgs::msg::Image mDetectionsImageMessage;
+
+        rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mDebugImgPub;
+        rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr mSensorSub;
+
+        // TODO(quintin): Do not hard code exactly two classes
+        std::vector<int> mObjectHitCounts{0, 0};
+
+        int mObjIncrementWeight{};
+        int mObjDecrementWeight{};
+        int mObjHitThreshold{};
+        int mObjMaxHitcount{};
+        float mModelScoreThreshold{};
+        float mModelNmsThreshold{};
+
+        auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr,
+                                       std::size_t u, std::size_t v,
+                                       std::size_t width, std::size_t height) const -> std::optional<SE3d>;
+
+        auto updateHitsObject(sensor_msgs::msg::PointCloud2::UniquePtr const& msg,
+                              std::span<Detection const> detections,
+                              cv::Size const& imageSize = {640, 640}) -> void;
+
+        auto publishDetectedObjects(cv::InputArray image) -> void;
+
+        static auto drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void;
+
+    public:
+        explicit ObjectDetectorBase();
+
+        ~ObjectDetectorBase() override = default;
+    };
+
+    class StereoObjectDetector final : public ObjectDetectorBase {
+    public:
+        explicit StereoObjectDetector();
+
+        static auto convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void;
+
+        auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void;
+    };
+} // namespace mrover
diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp
new file mode 100644
index 00000000..4be823b4
--- /dev/null
+++ b/perception/object_detector/object_detector.processing.cpp
@@ -0,0 +1,173 @@
+#include "object_detector.hpp"
+
+namespace mrover {
+
+    auto StereoObjectDetector::pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void {
+        assert(msg);
+        assert(msg->height > 0);
+        assert(msg->width > 0);
+
+        mLoopProfiler.beginLoop();
+
+        // Adjust the picture size to be in line with the expected img size from the Point Cloud
+        if (static_cast<int>(msg->height) != mRgbImage.rows || static_cast<int>(msg->width) != mRgbImage.cols) {
+            RCLCPP_INFO_STREAM(get_logger(), std::format("Image size changed from [{}, {}] to [{}, {}]", mRgbImage.cols, mRgbImage.rows, msg->width, msg->height));
+            mRgbImage = cv::Mat{static_cast<int>(msg->height), static_cast<int>(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}};
+        }
+        convertPointCloudToRGB(msg, mRgbImage);
+
+        // TODO(quintin): Avoid hard coding blob size
+        cv::Size blobSize{640, 640};
+        cv::Mat blobSizedImage;
+        cv::resize(mRgbImage, blobSizedImage, blobSize);
+        cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false);
+
+        mLoopProfiler.measureEvent("Conversion");
+
+        // Run the blob through the model
+        std::vector<Detection> detections{};
+        mTensorRT.modelForwardPass(mImageBlob, detections, mModelScoreThreshold, mModelNmsThreshold);
+
+        mLoopProfiler.measureEvent("Execution");
+
+        // Increment Object hit counts if theyre seen
+        // Decrement Object hit counts if they're not seen
+        updateHitsObject(msg, detections);
+
+        // Draw the bounding boxes on the image
+        drawDetectionBoxes(blobSizedImage, detections);
+        publishDetectedObjects(blobSizedImage);
+
+        mLoopProfiler.measureEvent("Publication");
+    }
+
+    auto ObjectDetectorBase::updateHitsObject(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, std::span<Detection const> detections, cv::Size const& imageSize) -> void {
+        // Set of flags indicating if the given object has been seen
+        // TODO(quintin): Do not hard code exactly two classes
+        std::bitset<2> seenObjects{0b00};
+        for (auto const& [classId, className, confidence, box]: detections) {
+            // Resize from blob space to image space
+            cv::Point2f centerInBlob = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2;
+            float xRatio = static_cast<float>(msg->width) / static_cast<float>(imageSize.width);
+            float yRatio = static_cast<float>(msg->height) / static_cast<float>(imageSize.height);
+            std::size_t centerXInImage = std::lround(centerInBlob.x * xRatio);
+            std::size_t centerYInImage = std::lround(centerInBlob.y * yRatio);
+
+            assert(static_cast<std::size_t>(classId) < mObjectHitCounts.size());
+
+            if (seenObjects[classId]) return;
+
+            seenObjects[classId] = true;
+
+            // Get the object's position in 3D from the point cloud and run this statement if the optional has a value
+            if (std::optional<SE3d> objectInCamera = spiralSearchForValidPoint(msg, centerXInImage, centerYInImage, box.width, box.height)) {
+                try {
+                    std::string objectImmediateFrame = std::format("immediate{}", className);
+                    // Push the immediate detections to the camera frame
+                    SE3Conversions::pushToTfTree(*mTfBroadcaster, objectImmediateFrame, mCameraFrame, objectInCamera.value(), get_clock()->now());
+                    // Since the object is seen we need to increment the hit counter
+                    mObjectHitCounts[classId] = std::min(mObjMaxHitcount, mObjectHitCounts[classId] + mObjIncrementWeight);
+
+                    // Only publish to permament if we are confident in the object
+                    if (mObjectHitCounts[classId] > mObjHitThreshold) {
+                        std::string objectPermanentFrame = className;
+                        // Grab the object inside of the camera frame and push it into the map frame
+                        SE3d objectInMap = SE3Conversions::fromTfTree(*mTfBuffer, objectImmediateFrame, mWorldFrame);
+                        SE3Conversions::pushToTfTree(*mTfBroadcaster, objectPermanentFrame, mWorldFrame, objectInMap, get_clock()->now());
+                    }
+
+                } catch (tf2::ExtrapolationException const&) {
+                    RCLCPP_INFO_STREAM(get_logger(), "Old data for immediate objects");
+                } catch (tf2::LookupException const&) {
+                    RCLCPP_INFO_STREAM(get_logger(), "Expected transform for immediate objects");
+                } catch (tf2::ConnectivityException const&) {
+                    RCLCPP_INFO_STREAM(get_logger(), "Expected connection to odom frame. Is visual odometry running?");
+                }
+            }
+        }
+
+        for (std::size_t i = 0; i < seenObjects.size(); i++) {
+            if (seenObjects[i]) continue;
+
+            assert(i < mObjectHitCounts.size());
+            mObjectHitCounts[i] = std::max(0, mObjectHitCounts[i] - mObjDecrementWeight);
+        }
+    }
+
+    auto ObjectDetectorBase::spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, std::size_t width, std::size_t height) const -> std::optional<SE3d> {
+        // See: https://stackoverflow.com/a/398302
+        auto xc = static_cast<int>(u), yc = static_cast<int>(v);
+        auto sw = static_cast<int>(width), sh = static_cast<int>(height);
+        auto ih = static_cast<int>(cloudPtr->height), iw = static_cast<int>(cloudPtr->width);
+        int sx = 0, sy = 0; // Spiral coordinates starting at (0, 0)
+        int dx = 0, dy = -1;
+        std::size_t bigger = std::max(width, height);
+        std::size_t maxIterations = bigger * bigger;
+        for (std::size_t i = 0; i < maxIterations; i++) {
+            if (-sw / 2 < sx && sx <= sw / 2 && -sh / 2 < sy && sy <= sh / 2) {
+                int ix = xc + sx, iy = yc + sy; // Image coordinates
+
+                if (ix < 0 || ix >= iw || iy < 0 || iy >= ih) {
+                    RCLCPP_INFO_STREAM(get_logger(), std::format("Spiral query is outside the image: [{}, {}]", ix, iy));
+                    continue;
+                }
+
+                Point const& point = reinterpret_cast<Point const*>(cloudPtr->data.data())[ix + iy * cloudPtr->width];
+                if (!std::isfinite(point.x) || !std::isfinite(point.y) || !std::isfinite(point.z)) continue;
+
+                return std::make_optional<SE3d>(R3d{point.x, point.y, point.z}, SO3d::Identity());
+            }
+
+            if (sx == sy || (sx < 0 && sx == -sy) || (sx > 0 && sx == 1 - sy)) {
+                dy = -dy;
+                std::swap(dx, dy);
+            }
+
+            sx += dx;
+            sy += dy;
+        }
+        return std::nullopt;
+    }
+
+    auto ObjectDetectorBase::drawDetectionBoxes(cv::InputOutputArray image, std::span<Detection const> detections) -> void {
+        // Draw the detected object's bounding boxes on the image for each of the objects detected
+        std::array const fontColors{cv::Scalar{0, 4, 227}, cv::Scalar{232, 115, 5}};
+        for (std::size_t i = 0; i < detections.size(); i++) {
+            // Font color will change for each different detection
+            cv::Scalar const& fontColor = fontColors.at(detections[i].classId);
+            cv::rectangle(image, detections[i].box, fontColor, 1, cv::LINE_8, 0);
+
+            // Put the text on the image
+            cv::Point textPosition(80, static_cast<int>(80 * (i + 1)));
+            constexpr int fontSize = 1;
+            constexpr int fontWeight = 2;
+            putText(image, detections[i].className, textPosition, cv::FONT_HERSHEY_COMPLEX, fontSize, fontColor, fontWeight); // Putting the text in the matrix
+        }
+    }
+
+    auto ObjectDetectorBase::publishDetectedObjects(cv::InputArray image) -> void {
+        mDetectionsImageMessage.header.stamp = get_clock()->now();
+        mDetectionsImageMessage.height = image.rows();
+        mDetectionsImageMessage.width = image.cols();
+        mDetectionsImageMessage.encoding = sensor_msgs::image_encodings::BGRA8;
+        mDetectionsImageMessage.is_bigendian = __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+        mDetectionsImageMessage.step = 4 * mDetectionsImageMessage.width;
+        mDetectionsImageMessage.data.resize(mDetectionsImageMessage.step * mDetectionsImageMessage.height);
+        cv::Mat debugImageWrapper{image.size(), CV_8UC4, mDetectionsImageMessage.data.data()};
+        cv::cvtColor(image, debugImageWrapper, cv::COLOR_RGB2BGRA);
+
+        mDebugImgPub->publish(mDetectionsImageMessage);
+    }
+
+    auto StereoObjectDetector::convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void {
+        auto* pixelPtr = reinterpret_cast<cv::Vec3b*>(image.data);
+        auto* pointPtr = reinterpret_cast<Point const*>(msg->data.data());
+        std::for_each(std::execution::par_unseq, pixelPtr, pixelPtr + image.total(), [&](cv::Vec3b& pixel) {
+            std::size_t const i = &pixel - pixelPtr;
+            pixel[0] = pointPtr[i].r;
+            pixel[1] = pointPtr[i].g;
+            pixel[2] = pointPtr[i].b;
+        });
+    }
+
+} // namespace mrover
diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp
new file mode 100644
index 00000000..81eda570
--- /dev/null
+++ b/perception/object_detector/pch.hpp
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <execution>
+#include <filesystem>
+#include <format>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+
+#include <opencv2/core.hpp>
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+
+// Ros Client Library
+#include <rclcpp/rclcpp.hpp>
+#include <tf2_ros/transform_broadcaster.h>
+#include <tf2_ros/transform_listener.h>
+#include <ament_index_cpp/get_package_prefix.hpp>
+
+// Messages
+#include "sensor_msgs/image_encodings.hpp"
+#include "sensor_msgs/msg/image.hpp"
+#include <sensor_msgs/distortion_models.hpp>
+#include <sensor_msgs/image_encodings.hpp>
+#include <sensor_msgs/msg/camera_info.hpp>
+#include <sensor_msgs/msg/image.hpp>
+#include <sensor_msgs/msg/imu.hpp>
+#include <sensor_msgs/msg/magnetic_field.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+#include <sensor_msgs/point_cloud2_iterator.hpp>
+
+#include <lie.hpp>
+#include <loop_profiler.hpp>
+
+#include "parameter.hpp"
+#include "point.hpp"
+#include <tensorrt.hpp>
diff --git a/perception/point.hpp b/perception/point.hpp
index 0dc9b14c..ffec6f6d 100644
--- a/perception/point.hpp
+++ b/perception/point.hpp
@@ -1,5 +1,17 @@
 #pragma once
 
+#include "sensor_msgs/image_encodings.hpp"
+#include "sensor_msgs/msg/image.hpp"
+#include <cstdint>
+#include <sensor_msgs/distortion_models.hpp>
+#include <sensor_msgs/image_encodings.hpp>
+#include <sensor_msgs/msg/camera_info.hpp>
+#include <sensor_msgs/msg/image.hpp>
+#include <sensor_msgs/msg/imu.hpp>
+#include <sensor_msgs/msg/magnetic_field.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+#include <sensor_msgs/point_cloud2_iterator.hpp>
+
 namespace mrover {
 
     /**
@@ -9,7 +21,7 @@ namespace mrover {
      */
     struct Point {
         float x, y, z;
-        uint8_t b, g, r, a;
+        std::uint8_t b, g, r, a;
         float normal_x, normal_y, normal_z;
         float curvature;
     } __attribute__((packed));
diff --git a/perception/zed_wrapper/pch.hpp b/perception/zed_wrapper/pch.hpp
new file mode 100644
index 00000000..6f072f76
--- /dev/null
+++ b/perception/zed_wrapper/pch.hpp
@@ -0,0 +1,31 @@
+// Ros Client Library
+#include <rclcpp/rclcpp.hpp>
+#include <tf2_ros/transform_broadcaster.h>
+#include <tf2_ros/transform_listener.h>
+
+// Messages
+#include <sensor_msgs/distortion_models.hpp>
+#include <sensor_msgs/image_encodings.hpp>
+#include <sensor_msgs/msg/camera_info.hpp>
+#include <sensor_msgs/msg/image.hpp>
+#include <sensor_msgs/msg/imu.hpp>
+#include <sensor_msgs/msg/magnetic_field.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+#include <sensor_msgs/point_cloud2_iterator.hpp>
+
+// ZED
+#include <sl/Camera.hpp>
+
+// CUDA
+#include <thrust/device_vector.h>
+
+// STD
+#include <format>
+#include <string>
+#include <vector>
+
+// Utils
+#include "lie.hpp"
+#include "loop_profiler.hpp"
+#include "parameter.hpp"
+#include "point.hpp"
diff --git a/perception/zed_wrapper/zed_wrapper.bridge.cpp b/perception/zed_wrapper/zed_wrapper.bridge.cpp
new file mode 100644
index 00000000..9f7e2348
--- /dev/null
+++ b/perception/zed_wrapper/zed_wrapper.bridge.cpp
@@ -0,0 +1,116 @@
+#include "zed_wrapper.hpp"
+
+namespace mrover {
+
+    constexpr static float DEG_TO_RAD = std::numbers::pi_v<float> / 180.0f;
+    constexpr static std::uint64_t NS_PER_S = 1000000000;
+
+    auto slTime2Ros(sl::Timestamp t) -> rclcpp::Time {
+        return {static_cast<int32_t>(t.getNanoseconds() / NS_PER_S),
+                static_cast<uint32_t>(t.getNanoseconds() % NS_PER_S)};
+    }
+
+    auto fillImuMessage(rclcpp::Node* node, sl::SensorsData::IMUData& imuData, sensor_msgs::msg::Imu& msg) -> void {
+        msg.header.stamp = node->now();
+        msg.orientation.x = imuData.pose.getOrientation().x;
+        msg.orientation.y = imuData.pose.getOrientation().y;
+        msg.orientation.z = imuData.pose.getOrientation().z;
+        msg.orientation.w = imuData.pose.getOrientation().w;
+        for (int i = 0; i < 3; ++i)
+            for (int j = 0; j < 3; ++j)
+                msg.orientation_covariance[i * 3 + j] = imuData.pose_covariance(i, j) * DEG_TO_RAD * DEG_TO_RAD;
+        msg.angular_velocity.x = imuData.angular_velocity.x * DEG_TO_RAD;
+        msg.angular_velocity.y = imuData.angular_velocity.y * DEG_TO_RAD;
+        msg.angular_velocity.z = imuData.angular_velocity.z * DEG_TO_RAD;
+        for (int i = 0; i < 3; ++i)
+            for (int j = 0; j < 3; ++j)
+                msg.angular_velocity_covariance[i * 3 + j] = imuData.angular_velocity_covariance(i, j) * DEG_TO_RAD * DEG_TO_RAD;
+        msg.linear_acceleration.x = imuData.linear_acceleration.x;
+        msg.linear_acceleration.y = imuData.linear_acceleration.y;
+        msg.linear_acceleration.z = imuData.linear_acceleration.z;
+        for (int i = 0; i < 3; ++i)
+            for (int j = 0; j < 3; ++j)
+                msg.linear_acceleration_covariance[i * 3 + j] = imuData.linear_acceleration_covariance(i, j);
+    }
+
+    auto fillMagMessage(sl::SensorsData::MagnetometerData const& magData, sensor_msgs::msg::MagneticField& msg) -> void {
+        msg.magnetic_field.x = magData.magnetic_field_calibrated.x;
+        msg.magnetic_field.y = magData.magnetic_field_calibrated.y;
+        msg.magnetic_field.z = magData.magnetic_field_calibrated.z;
+
+        msg.magnetic_field_covariance.fill(0.0f);
+        msg.magnetic_field_covariance[0] = 0.039e-6;
+        msg.magnetic_field_covariance[4] = 0.037e-6;
+        msg.magnetic_field_covariance[8] = 0.047e-6;
+    }
+
+    auto fillImageMessage(sl::Mat const& bgra, sensor_msgs::msg::Image::UniquePtr const& msg) -> void {
+        assert(bgra.getChannels() == 4);
+        assert(msg);
+
+        msg->height = bgra.getHeight();
+        msg->width = bgra.getWidth();
+        msg->encoding = sensor_msgs::image_encodings::BGRA8;
+        msg->step = bgra.getStepBytes();
+        msg->is_bigendian = __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+        auto* bgrGpuPtr = bgra.getPtr<sl::uchar1>(sl::MEM::GPU);
+        size_t size = msg->step * msg->height;
+        msg->data.resize(size);
+        checkCudaError(cudaMemcpy(msg->data.data(), bgrGpuPtr, size, cudaMemcpyDeviceToHost));
+    }
+
+    auto fillCameraInfoMessages(sl::CalibrationParameters& calibration, sl::Resolution const& resolution,
+                                sensor_msgs::msg::CameraInfo& leftInfoMsg, sensor_msgs::msg::CameraInfo& rightInfoMsg) -> void {
+
+
+        leftInfoMsg.width = resolution.width;
+        leftInfoMsg.height = resolution.height;
+        rightInfoMsg.width = resolution.width;
+        rightInfoMsg.height = resolution.height;
+        leftInfoMsg.distortion_model = sensor_msgs::distortion_models::PLUMB_BOB;
+        rightInfoMsg.distortion_model = sensor_msgs::distortion_models::PLUMB_BOB;
+        leftInfoMsg.d.resize(5);
+        rightInfoMsg.d.resize(5);
+        leftInfoMsg.d[0] = calibration.left_cam.disto[0];
+        leftInfoMsg.d[1] = calibration.left_cam.disto[1];
+        leftInfoMsg.d[2] = calibration.left_cam.disto[4];
+        leftInfoMsg.d[3] = calibration.left_cam.disto[2];
+        leftInfoMsg.d[4] = calibration.left_cam.disto[3];
+        rightInfoMsg.d[0] = calibration.right_cam.disto[0];
+        rightInfoMsg.d[1] = calibration.right_cam.disto[1];
+        rightInfoMsg.d[2] = calibration.right_cam.disto[4];
+        rightInfoMsg.d[3] = calibration.right_cam.disto[2];
+        rightInfoMsg.d[4] = calibration.right_cam.disto[3];
+        leftInfoMsg.k.fill(0.0);
+        rightInfoMsg.k.fill(0.0);
+        leftInfoMsg.k[0] = calibration.left_cam.fx;
+        leftInfoMsg.k[2] = calibration.left_cam.cx;
+        leftInfoMsg.k[4] = calibration.left_cam.fy;
+        leftInfoMsg.k[5] = calibration.left_cam.cy;
+        leftInfoMsg.k[8] = 1.0;
+        rightInfoMsg.k[0] = calibration.right_cam.fx;
+        rightInfoMsg.k[2] = calibration.right_cam.cx;
+        rightInfoMsg.k[4] = calibration.right_cam.fy;
+        rightInfoMsg.k[5] = calibration.right_cam.cy;
+        rightInfoMsg.k[8] = 1;
+        leftInfoMsg.r.fill(0.0);
+        rightInfoMsg.r.fill(0.0);
+        for (size_t i = 0; i < 3; ++i) {
+            leftInfoMsg.r[i * 3 + i] = 1.0;
+            rightInfoMsg.r[i * 3 + i] = 1.0;
+        }
+        leftInfoMsg.p.fill(0.0);
+        rightInfoMsg.p.fill(0.0);
+        leftInfoMsg.p[0] = calibration.left_cam.fx;
+        leftInfoMsg.p[2] = calibration.left_cam.cx;
+        leftInfoMsg.p[5] = calibration.left_cam.fy;
+        leftInfoMsg.p[6] = calibration.left_cam.cy;
+        leftInfoMsg.p[10] = 1.0;
+        rightInfoMsg.p[3] = -1.0 * calibration.left_cam.fx * calibration.getCameraBaseline();
+        rightInfoMsg.p[0] = calibration.right_cam.fx;
+        rightInfoMsg.p[2] = calibration.right_cam.cx;
+        rightInfoMsg.p[5] = calibration.right_cam.fy;
+        rightInfoMsg.p[6] = calibration.right_cam.cy;
+        rightInfoMsg.p[10] = 1.0;
+    }
+}; // namespace mrover
diff --git a/perception/zed_wrapper/zed_wrapper.bridge.cu b/perception/zed_wrapper/zed_wrapper.bridge.cu
new file mode 100644
index 00000000..709fc51b
--- /dev/null
+++ b/perception/zed_wrapper/zed_wrapper.bridge.cu
@@ -0,0 +1,81 @@
+// Be careful what you include in this file, it is compiled with nvcc (NVIDIA CUDA compiler)
+
+#include "point.hpp"
+
+#include <sensor_msgs/distortion_models.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+#include <sensor_msgs/point_cloud2_iterator.hpp>
+
+#include <sl/Camera.hpp>
+#include <thrust/device_vector.h>
+
+namespace mrover {
+
+    using PointCloudGpu = thrust::device_vector<Point>;
+
+    // Optimal for the Jetson Xavier NX - this is max threads per block and each block has a max of 2048 threads
+    constexpr uint BLOCK_SIZE = 1024;
+
+    /**
+     * @brief Runs on the GPU, interleaving the XYZ and BGRA buffers into a single buffer of #Point structs.
+     */
+    __global__ void fillPointCloudMessageKernel(sl::float4* xyzGpuPtr, sl::uchar4* bgraGpuPtr, sl::float4* normalsGpuPtr, Point* pcGpuPtr, size_t size) {
+        // This function is invoked once per element at index #i in the point cloud
+        size_t const i = blockIdx.x * blockDim.x + threadIdx.x;
+        if (i >= size) return;
+
+        pcGpuPtr[i].x = xyzGpuPtr[i].x;
+        pcGpuPtr[i].y = xyzGpuPtr[i].y;
+        pcGpuPtr[i].z = xyzGpuPtr[i].z;
+        pcGpuPtr[i].b = bgraGpuPtr[i].r;
+        pcGpuPtr[i].g = bgraGpuPtr[i].g;
+        pcGpuPtr[i].r = bgraGpuPtr[i].b;
+        pcGpuPtr[i].a = bgraGpuPtr[i].a;
+        // pcGpuPtr[i].normal_x = normalsGpuPtr[i].x;
+        // pcGpuPtr[i].normal_y = normalsGpuPtr[i].y;
+        // pcGpuPtr[i].normal_z = normalsGpuPtr[i].z;
+    }
+
+    void checkCudaError(cudaError_t err) {
+        if (err == cudaSuccess) return;
+
+        //RCLCPP_ERROR_STREAM(rclcpp::get_logger("cuda_error"), "CUDA error: " << cudaGetErrorString(err));
+        throw std::runtime_error("CUDA error");
+    }
+
+    /**
+     * Fills a PointCloud2 message residing on the CPU from two GPU buffers (one for XYZ and one for BGRA).
+     *
+     * @param xyzGpu    XYZ buffer on the GPU
+     * @param bgraGpu   BGRA buffer on the GPU
+     * @param pcGpu     Point cloud buffer on the GPU (@see Point)
+     * @param msg       Point cloud message with buffer on the CPU
+     */
+    void fillPointCloudMessageFromGpu(sl::Mat& xyzGpu, sl::Mat& bgraGpu, sl::Mat& normalsGpu, PointCloudGpu& pcGpu, sensor_msgs::msg::PointCloud2::UniquePtr const& msg) {
+        assert(bgraGpu.getWidth() >= xyzGpu.getWidth());
+        assert(bgraGpu.getHeight() >= xyzGpu.getHeight());
+        assert(bgraGpu.getChannels() == 4);
+        assert(xyzGpu.getChannels() == 4); // Last channel is unused
+        assert(msg);
+
+        auto* bgraGpuPtr = bgraGpu.getPtr<sl::uchar4>(sl::MEM::GPU);
+        auto* xyzGpuPtr = xyzGpu.getPtr<sl::float4>(sl::MEM::GPU);
+        auto* normalsGpuPtr = normalsGpu.getPtr<sl::float4>(sl::MEM::GPU);
+        msg->is_bigendian = __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+        msg->is_dense = true;
+        msg->height = bgraGpu.getHeight();
+        msg->width = bgraGpu.getWidth();
+        fillPointCloudMessageHeader(msg);
+        std::size_t size = msg->width * msg->height;
+
+        pcGpu.resize(size);
+        Point* pcGpuPtr = pcGpu.data().get();
+        dim3 threadsPerBlock{BLOCK_SIZE};
+        dim3 numBlocks{static_cast<uint>(std::ceil(static_cast<float>(size) / BLOCK_SIZE))};
+        fillPointCloudMessageKernel<<<numBlocks, threadsPerBlock>>>(xyzGpuPtr, bgraGpuPtr, normalsGpuPtr, pcGpuPtr, size);
+        checkCudaError(cudaPeekAtLastError());
+        checkCudaError(cudaMemcpy(msg->data.data(), pcGpuPtr, size * sizeof(Point), cudaMemcpyDeviceToHost));
+    }
+
+
+} // namespace mrover
diff --git a/perception/zed_wrapper/zed_wrapper.cpp b/perception/zed_wrapper/zed_wrapper.cpp
new file mode 100644
index 00000000..806efecb
--- /dev/null
+++ b/perception/zed_wrapper/zed_wrapper.cpp
@@ -0,0 +1,324 @@
+#include "zed_wrapper.hpp"
+
+namespace mrover {
+    template<typename TEnum>
+    [[nodiscard]] auto stringToZedEnum(std::string_view string) -> TEnum {
+        using int_t = std::underlying_type_t<TEnum>;
+        for (int_t i = 0; i < static_cast<int_t>(TEnum::LAST); ++i) {
+            if (sl::String{string.data()} == sl::toString(static_cast<TEnum>(i))) {
+                return static_cast<TEnum>(i);
+            }
+        }
+        throw std::invalid_argument("Invalid enum string");
+    }
+
+
+    ZedWrapper::ZedWrapper() : Node(NODE_NAME, rclcpp::NodeOptions().use_intra_process_comms(true)), mLoopProfilerGrab{get_logger()}, mLoopProfilerUpdate{get_logger()} {
+        try {
+            RCLCPP_INFO(this->get_logger(), "Created Zed Wrapper Node, %s", NODE_NAME);
+
+            // Publishers
+            mRightImgPub = create_publisher<sensor_msgs::msg::Image>("right/image", 1);
+            mLeftImgPub = create_publisher<sensor_msgs::msg::Image>("left/image", 1);
+            mImuPub = create_publisher<sensor_msgs::msg::Imu>("zed_imu/data_raw", 1);
+            mMagPub = create_publisher<sensor_msgs::msg::MagneticField>("zed_imu/mag", 1);
+            mPcPub = create_publisher<sensor_msgs::msg::PointCloud2>("camera/left/points", 1);
+            mRightCamInfoPub = create_publisher<sensor_msgs::msg::CameraInfo>("camera/right/camera_info", 1);
+            mLeftCamInfoPub = create_publisher<sensor_msgs::msg::CameraInfo>("camera/left/camera_info", 1);
+
+            // Declare and set Params
+            int imageWidth{};
+            int imageHeight{};
+
+            std::string svoFile{};
+
+            std::string grabResolutionString, depthModeString;
+
+            std::vector<ParameterWrapper> params{
+                    {"depth_confidence", mDepthConfidence, 70},
+                    {"serial_number", mSerialNumber, -1},
+                    {"grab_target_fps", mGrabTargetFps, 60},
+                    {"texture_confidence", mTextureConfidence, 100},
+                    {"image_width", imageWidth, 1280},
+                    {"image_height", imageHeight, 720},
+                    {"svo_file", svoFile, ""},
+                    {"use_depth_stabilization", mUseDepthStabilization, false},
+                    {"grab_resolution", grabResolutionString, std::string{sl::toString(sl::RESOLUTION::HD720)}},
+                    {"depth_mode", depthModeString, std::string{sl::toString(sl::DEPTH_MODE::PERFORMANCE)}},
+                    {"depth_maximum_distance", mDepthMaximumDistance, 12.0},
+                    {"use_builtin_visual_odom", mUseBuiltinPosTracking, false},
+                    {"use_pose_smoothing", mUsePoseSmoothing, true},
+                    {"use_area_memory", mUseAreaMemory, true}
+			};
+
+            ParameterWrapper::declareParameters(this, params);
+
+            mSvoPath = svoFile.c_str();
+
+            if (imageWidth < 0 || imageHeight < 0) {
+                throw std::invalid_argument("Invalid image dimensions");
+            }
+            if (mGrabTargetFps < 0) {
+                throw std::invalid_argument("Invalid grab target framerate");
+            }
+
+            mImageResolution = sl::Resolution(imageWidth, imageHeight);
+            mPointResolution = sl::Resolution(imageWidth, imageHeight);
+            mNormalsResolution = sl::Resolution(imageWidth, imageHeight);
+
+            RCLCPP_INFO_STREAM(get_logger(), std::format("Resolution: {} image: {}x{} points: {}x{}",
+                                                         grabResolutionString, mImageResolution.width, mImageResolution.height, mPointResolution.width, mPointResolution.height)
+                                                     .c_str());
+            RCLCPP_INFO_STREAM(get_logger(), std::format("Use builtin visual odometry: {}", mUseBuiltinPosTracking ? "true" : "false"));
+
+            sl::InitParameters initParameters;
+
+            if (mSvoPath) {
+                initParameters.input.setFromSVOFile(mSvoPath);
+            } else {
+                if (mSerialNumber == -1) {
+                    initParameters.input.setFromCameraID(-1, sl::BUS_TYPE::USB);
+                } else {
+                    initParameters.input.setFromSerialNumber(mSerialNumber, sl::BUS_TYPE::USB);
+                }
+            }
+
+            RCLCPP_INFO_STREAM(get_logger(), grabResolutionString);
+
+            initParameters.depth_stabilization = mUseDepthStabilization;
+            initParameters.camera_resolution = stringToZedEnum<sl::RESOLUTION>(grabResolutionString);
+            initParameters.depth_mode = stringToZedEnum<sl::DEPTH_MODE>(depthModeString);
+            initParameters.coordinate_units = sl::UNIT::METER;
+            initParameters.sdk_verbose = true; // Log useful information
+            initParameters.camera_fps = mGrabTargetFps;
+            initParameters.coordinate_system = sl::COORDINATE_SYSTEM::RIGHT_HANDED_Z_UP_X_FWD; // Match ROS
+            initParameters.depth_maximum_distance = static_cast<float>(mDepthMaximumDistance);
+            RCLCPP_INFO_STREAM(get_logger(), depthModeString);
+
+            mDepthEnabled = initParameters.depth_mode != sl::DEPTH_MODE::NONE;
+
+            if (mZed.open(initParameters) != sl::ERROR_CODE::SUCCESS) {
+                throw std::runtime_error("ZED failed to open");
+            }
+
+            mZedInfo = mZed.getCameraInformation();
+
+            if (mUseBuiltinPosTracking) {
+                sl::PositionalTrackingParameters positionalTrackingParameters;
+                positionalTrackingParameters.enable_pose_smoothing = mUsePoseSmoothing;
+                positionalTrackingParameters.enable_area_memory = mUseAreaMemory;
+                mZed.enablePositionalTracking(positionalTrackingParameters);
+            }
+
+            cudaDeviceProp prop{};
+            cudaGetDeviceProperties(&prop, 0);
+            RCLCPP_INFO_STREAM(get_logger(), std::format("MP count: {}, Max threads/MP: {}, Max blocks/MP: {}, max threads/block: {}",
+                                                         prop.multiProcessorCount, prop.maxThreadsPerMultiProcessor, prop.maxBlocksPerMultiProcessor, prop.maxThreadsPerBlock));
+
+            mGrabThread = std::thread(&ZedWrapper::grabThread, this);
+            mPointCloudThread = std::thread(&ZedWrapper::pointCloudUpdateThread, this);
+        } catch (std::exception const& e) {
+            RCLCPP_FATAL_STREAM(get_logger(), std::format("Exception while starting: {}", e.what()));
+            rclcpp::shutdown();
+        }
+    }
+
+
+    auto ZedWrapper::grabThread() -> void {
+        try {
+            RCLCPP_INFO(this->get_logger(), "Starting grab thread");
+            while (rclcpp::ok()) {
+                //mLoopProfilerGrab.beginLoop();
+
+                sl::RuntimeParameters runtimeParameters;
+                runtimeParameters.confidence_threshold = mDepthConfidence;
+                runtimeParameters.texture_confidence_threshold = mTextureConfidence;
+
+
+                if (sl::ERROR_CODE error = mZed.grab(runtimeParameters); error != sl::ERROR_CODE::SUCCESS)
+                    throw std::runtime_error(std::format("ZED failed to grab {}", sl::toString(error).c_str()));
+
+                mLoopProfilerGrab.measureEvent("zed_grab");
+
+                // Retrieval has to happen on the same thread as grab so that the image and point cloud are synced
+                if (mZed.retrieveImage(mGrabMeasures.rightImage, sl::VIEW::RIGHT, sl::MEM::GPU, mImageResolution) != sl::ERROR_CODE::SUCCESS)
+                    throw std::runtime_error("ZED failed to retrieve right image");
+
+                mLoopProfilerGrab.measureEvent("zed_retrieve_right_image");
+
+                // Only left set is used for processing
+                if (mDepthEnabled) {
+                    if (mZed.retrieveImage(mGrabMeasures.leftImage, sl::VIEW::LEFT, sl::MEM::GPU, mImageResolution) != sl::ERROR_CODE::SUCCESS)
+                        throw std::runtime_error("ZED failed to retrieve left image");
+                    if (mZed.retrieveMeasure(mGrabMeasures.leftPoints, sl::MEASURE::XYZ, sl::MEM::GPU, mPointResolution) != sl::ERROR_CODE::SUCCESS)
+                        throw std::runtime_error("ZED failed to retrieve point cloud");
+                }
+
+                mLoopProfilerGrab.measureEvent("zed_retrieve_left_image");
+
+                // if (mZed.retrieveMeasure(mGrabMeasures.leftNormals, sl::MEASURE::NORMALS, sl::MEM::GPU, mNormalsResolution) != sl::ERROR_CODE::SUCCESS)
+                //     throw std::runtime_error("ZED failed to retrieve point cloud normals");
+
+                assert(mGrabMeasures.leftImage.timestamp == mGrabMeasures.leftPoints.timestamp);
+
+                mGrabMeasures.time = mSvoPath.c_str() ? now() : slTime2Ros(mZed.getTimestamp(sl::TIME_REFERENCE::IMAGE));
+
+                // If the processing thread is busy skip
+                // We want this thread to run as fast as possible for grab and positional tracking
+                if (mSwapMutex.try_lock()) {
+                    std::swap(mGrabMeasures, mPcMeasures);
+                    mIsSwapReady = true;
+                    mSwapMutex.unlock();
+                    mSwapCv.notify_one();
+                }
+
+                // Positional tracking module publishing
+                if (mUseBuiltinPosTracking) {
+                    sl::Pose pose;
+                    sl::POSITIONAL_TRACKING_STATE status = mZed.getPosition(pose);
+                    if (status == sl::POSITIONAL_TRACKING_STATE::OK) {
+                        sl::Translation const& translation = pose.getTranslation();
+                        sl::Orientation const& orientation = pose.getOrientation();
+                        try {
+                            SE3d leftCameraInOdom{{translation.x, translation.y, translation.z},
+                                                  Eigen::Quaterniond{orientation.w, orientation.x, orientation.y, orientation.z}.normalized()};
+                            SE3d baseLinkToLeftCamera = SE3Conversions::fromTfTree(*mTfBuffer, "base_link", "zed_left_camera_frame");
+                            SE3d baseLinkInOdom = leftCameraInOdom * baseLinkToLeftCamera;
+                            SE3Conversions::pushToTfTree(*mTfBroadcaster, "base_link", "odom", baseLinkInOdom, now());
+                        } catch (tf2::TransformException& e) {
+                            RCLCPP_INFO_STREAM(get_logger(), "Failed to get transform: " << e.what());
+                        }
+                    } else {
+                        RCLCPP_INFO_STREAM(get_logger(), "Positional tracking failed: " << status);
+                    }
+                }
+
+                mLoopProfilerGrab.measureEvent("pose_tracking");
+
+                if (mZedInfo.camera_model != sl::MODEL::ZED) {
+                    sl::SensorsData sensorData;
+                    mZed.getSensorsData(sensorData, sl::TIME_REFERENCE::CURRENT);
+
+                    sensor_msgs::msg::Imu imuMsg;
+                    fillImuMessage(this, sensorData.imu, imuMsg);
+                    imuMsg.header.frame_id = "zed_mag_frame";
+                    imuMsg.header.stamp = now();
+                    mImuPub->publish(imuMsg);
+
+                    if (mZedInfo.camera_model != sl::MODEL::ZED_M) {
+                        sensor_msgs::msg::MagneticField magMsg;
+                        fillMagMessage(sensorData.magnetometer, magMsg);
+                        magMsg.header.frame_id = "zed_mag_frame";
+                        magMsg.header.stamp = now();
+                        mMagPub->publish(magMsg);
+                    }
+                }
+
+                mLoopProfilerGrab.measureEvent("publish_imu_and_mag");
+            }
+
+            mZed.close();
+            RCLCPP_INFO(get_logger(), "Grab thread finished");
+
+        } catch (std::exception const& e) {
+            RCLCPP_FATAL_STREAM(get_logger(), std::format("Exception while running grab thread: {}", e.what()));
+            mZed.close();
+            rclcpp::shutdown();
+            std::exit(EXIT_FAILURE);
+        }
+    }
+
+    auto ZedWrapper::pointCloudUpdateThread() -> void {
+        try {
+            RCLCPP_INFO(get_logger(), "Starting point cloud thread");
+
+            while (rclcpp::ok()) {
+                //mLoopProfilerUpdate.beginLoop();
+
+                // TODO(quintin): May be bad to allocate every update, best case optimized by tcache
+                // Needed because publish directly shares the pointer to other nodelets running in this process
+                auto pointCloudMsg = std::make_unique<sensor_msgs::msg::PointCloud2>();
+
+                // Swap critical section
+                {
+                    std::unique_lock lock{mSwapMutex};
+                    // Waiting on the condition variable will drop the lock and reacquire it when the condition is met
+                    mSwapCv.wait(lock, [this] { return mIsSwapReady; });
+                    mIsSwapReady = false;
+                    mLoopProfilerUpdate.measureEvent("wait_and_alloc");
+
+                    if (mDepthEnabled) {
+                        fillPointCloudMessageFromGpu(mPcMeasures.leftPoints, mPcMeasures.leftImage, mPcMeasures.leftNormals, mPointCloudGpu, pointCloudMsg);
+                        pointCloudMsg->header.stamp = mPcMeasures.time;
+                        pointCloudMsg->header.frame_id = "zed_left_camera_frame";
+                        mLoopProfilerUpdate.measureEvent("fill_pc");
+                    }
+
+
+                    auto leftImgMsg = std::make_unique<sensor_msgs::msg::Image>();
+                    fillImageMessage(mPcMeasures.leftImage, leftImgMsg);
+                    leftImgMsg->header.frame_id = "zed_left_camera_optical_frame";
+                    leftImgMsg->header.stamp = mPcMeasures.time;
+                    mLeftImgPub->publish(std::move(leftImgMsg));
+                    mLoopProfilerUpdate.measureEvent("pub_left");
+
+                    auto rightImgMsg = std::make_unique<sensor_msgs::msg::Image>();
+                    fillImageMessage(mPcMeasures.rightImage, rightImgMsg);
+                    rightImgMsg->header.frame_id = "zed_right_camera_optical_frame";
+                    rightImgMsg->header.stamp = mPcMeasures.time;
+                    mRightImgPub->publish(std::move(rightImgMsg));
+                    mLoopProfilerUpdate.measureEvent("pub_right");
+                }
+
+                if (mDepthEnabled) {
+                    mPcPub->publish(std::move(pointCloudMsg));
+                }
+                mLoopProfilerUpdate.measureEvent("pub_pc");
+
+                sl::CalibrationParameters calibration = mZedInfo.camera_configuration.calibration_parameters;
+                auto leftCamInfoMsg = sensor_msgs::msg::CameraInfo();
+                auto rightCamInfoMsg = sensor_msgs::msg::CameraInfo();
+                fillCameraInfoMessages(calibration, mImageResolution, leftCamInfoMsg, rightCamInfoMsg);
+                leftCamInfoMsg.header.frame_id = "zed_left_camera_optical_frame";
+                leftCamInfoMsg.header.stamp = mPcMeasures.time;
+                rightCamInfoMsg.header.frame_id = "zed_right_camera_optical_frame";
+                rightCamInfoMsg.header.stamp = mPcMeasures.time;
+                mLeftCamInfoPub->publish(leftCamInfoMsg);
+                mRightCamInfoPub->publish(rightCamInfoMsg);
+                mLoopProfilerUpdate.measureEvent("pub_camera_info");
+            }
+
+            RCLCPP_INFO(get_logger(), "Tag thread finished");
+        } catch (std::exception const& e) {
+            RCLCPP_FATAL_STREAM(get_logger(), std::format("Exception while running point cloud thread: {}", e.what()));
+            rclcpp::shutdown();
+            std::exit(EXIT_FAILURE);
+        }
+    }
+
+    ZedWrapper::~ZedWrapper() {
+        RCLCPP_INFO(get_logger(), "ZED node shutting down");
+        mPointCloudThread.join();
+        mGrabThread.join();
+    }
+
+    ZedWrapper::Measures::Measures(Measures&& other) noexcept {
+        *this = std::move(other);
+    }
+
+    auto ZedWrapper::Measures::operator=(Measures&& other) noexcept -> Measures& {
+        sl::Mat::swap(other.leftImage, leftImage);
+        sl::Mat::swap(other.rightImage, rightImage);
+        sl::Mat::swap(other.leftPoints, leftPoints);
+        sl::Mat::swap(other.leftNormals, leftNormals);
+        std::swap(time, other.time);
+        return *this;
+    }
+}; // namespace mrover
+
+auto main(int argc, char* argv[]) -> int {
+    rclcpp::init(argc, argv);
+    rclcpp::spin(std::make_shared<mrover::ZedWrapper>());
+    rclcpp::shutdown();
+    return 0;
+}
diff --git a/perception/zed_wrapper/zed_wrapper.hpp b/perception/zed_wrapper/zed_wrapper.hpp
new file mode 100644
index 00000000..f9b1f6d0
--- /dev/null
+++ b/perception/zed_wrapper/zed_wrapper.hpp
@@ -0,0 +1,103 @@
+#pragma once
+
+#include "pch.hpp"
+
+namespace mrover {
+
+    using PointCloudGpu = thrust::device_vector<Point>;
+
+    class ZedWrapper : public rclcpp::Node {
+    private:
+        static constexpr char const* NODE_NAME = "zed_wrapper";
+
+        std::unique_ptr<tf2_ros::Buffer> mTfBuffer = std::make_unique<tf2_ros::Buffer>(get_clock());
+        std::shared_ptr<tf2_ros::TransformBroadcaster> mTfBroadcaster = std::make_shared<tf2_ros::TransformBroadcaster>(this);
+        std::shared_ptr<tf2_ros::TransformListener> mTfListener = std::make_shared<tf2_ros::TransformListener>(*mTfBuffer);
+
+        struct Measures {
+            rclcpp::Time time;
+            sl::Mat leftImage;
+            sl::Mat rightImage;
+            sl::Mat leftPoints;
+            sl::Mat leftNormals;
+
+            Measures() = default;
+
+            Measures(Measures&) = delete;
+            auto operator=(Measures&) -> Measures& = delete;
+
+            Measures(Measures&&) noexcept;
+            auto operator=(Measures&&) noexcept -> Measures&;
+        };
+
+        LoopProfiler mLoopProfilerGrab;
+        LoopProfiler mLoopProfilerUpdate;
+
+        // Params
+        int mSerialNumber{};
+        int mGrabTargetFps{};
+        int mDepthConfidence{};
+        int mTextureConfidence{};
+
+        bool mUseDepthStabilization{};
+        bool mDepthEnabled{};
+        bool mUseBuiltinPosTracking{};
+        bool mUsePoseSmoothing{};
+        bool mUseAreaMemory{};
+
+        double mDepthMaximumDistance{};
+
+        // CUDA
+        PointCloudGpu mPointCloudGpu;
+
+        // ZED
+        sl::Camera mZed;
+        sl::CameraInformation mZedInfo;
+
+        Measures mGrabMeasures, mPcMeasures;
+
+        sl::Resolution mImageResolution, mPointResolution, mNormalsResolution;
+
+        sl::String mSvoPath;
+
+        // Publishers
+        rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mRightImgPub;
+        rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr mLeftImgPub;
+        rclcpp::Publisher<sensor_msgs::msg::Imu>::SharedPtr mImuPub;
+        rclcpp::Publisher<sensor_msgs::msg::MagneticField>::SharedPtr mMagPub;
+        rclcpp::Publisher<sensor_msgs::msg::PointCloud2>::SharedPtr mPcPub;
+        rclcpp::Publisher<sensor_msgs::msg::CameraInfo>::SharedPtr mLeftCamInfoPub;
+        rclcpp::Publisher<sensor_msgs::msg::CameraInfo>::SharedPtr mRightCamInfoPub;
+
+        // Thread
+
+        std::thread mPointCloudThread, mGrabThread;
+        std::mutex mSwapMutex;
+        std::condition_variable mSwapCv;
+        bool mIsSwapReady = false;
+
+        auto grabThread() -> void;
+
+        auto pointCloudUpdateThread() -> void;
+
+    public:
+        ZedWrapper();
+
+        ~ZedWrapper() override;
+    };
+
+    auto slTime2Ros(sl::Timestamp t) -> rclcpp::Time;
+
+    auto fillImuMessage(rclcpp::Node* node, sl::SensorsData::IMUData& imuData, sensor_msgs::msg::Imu& msg) -> void;
+
+    auto fillMagMessage(sl::SensorsData::MagnetometerData const& magData, sensor_msgs::msg::MagneticField& msg) -> void;
+
+    auto checkCudaError(cudaError_t error) -> void;
+
+    void fillPointCloudMessageFromGpu(sl::Mat& xyzGpu, sl::Mat& bgraGpu, sl::Mat& normalsGpu, PointCloudGpu& pcGpu, sensor_msgs::msg::PointCloud2::UniquePtr const& msg);
+
+    auto fillCameraInfoMessages(sl::CalibrationParameters& calibration, sl::Resolution const& resolution,
+                                sensor_msgs::msg::CameraInfo& leftInfoMsg, sensor_msgs::msg::CameraInfo& rightInfoMsg) -> void;
+
+    auto fillImageMessage(sl::Mat const& bgra, sensor_msgs::msg::Image::UniquePtr const& msg) -> void;
+}; // namespace mrover
diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu
new file mode 100644
index 00000000..17bda4c3
--- /dev/null
+++ b/tensorrt/inference.cu
@@ -0,0 +1,172 @@
+#include "inference.cuh"
+
+using namespace nvinfer1;
+
+#include <NvOnnxParser.h>
+
+/**
+* cudaMemcpys CPU memory in inputTensor to GPU based on bindings
+* Queues that tensor to be passed through model
+* cudaMemcpys the result back to CPU memory
+* Requires bindings, inputTensor, stream
+* Modifies stream, outputTensor
+*/
+constexpr static auto INPUT_BINDING_NAME = "images";
+constexpr static auto OUTPUT_BINDING_NAME = "output0";
+
+Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} {
+    mModelPath = onnxModelPath.string();
+
+    // Create the engine object from either the file or from onnx file
+    mEngine = std::unique_ptr<ICudaEngine>{createCudaEngine(onnxModelPath, modelName)};
+    if (!mEngine) throw std::runtime_error("Failed to create CUDA engine");
+
+    mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine");
+
+    // Check some assumptions about the model
+    if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count");
+    if (mEngine->getTensorIOMode(INPUT_BINDING_NAME) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input");
+    if (mEngine->getTensorIOMode(OUTPUT_BINDING_NAME) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Input Binding Input To Be 1");
+
+    createExecutionContext();
+
+    prepTensors();
+}
+
+auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine* {
+    constexpr auto explicitBatch = 1U << static_cast<std::uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+
+    std::unique_ptr<IBuilder> builder{createInferBuilder(mLogger)};
+    if (!builder) throw std::runtime_error("Failed to create Infer Builder");
+    mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder");
+
+    std::unique_ptr<INetworkDefinition> network{builder->createNetworkV2(explicitBatch)};
+    if (!network) throw std::runtime_error("Failed to create Network Definition");
+    mLogger.log(ILogger::Severity::kINFO, "Created Network Definition");
+
+    std::unique_ptr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, mLogger)};
+    if (!parser) throw std::runtime_error("Failed to create ONNX Parser");
+    mLogger.log(ILogger::Severity::kINFO, "Created ONNX Parser");
+
+    std::unique_ptr<IBuilderConfig> config{builder->createBuilderConfig()};
+    if (!config) throw std::runtime_error("Failed to create Builder Config");
+    mLogger.log(ILogger::Severity::kINFO, "Created Builder Config");
+
+    if (!parser->parseFromFile(onnxModelPath.c_str(), static_cast<int>(ILogger::Severity::kINFO))) {
+        throw std::runtime_error("Failed to parse ONNX file");
+    }
+
+    IRuntime* runtime = createInferRuntime(mLogger);
+
+    // Define the engine file location relative to the mrover package
+    std::filesystem::path packagePath{mPackagePath};
+    std::filesystem::path enginePath = packagePath / "data" / std::string("tensorrt-engine-").append(modelName).append(".engine");
+    // Check if engine file exists
+    if (!exists(enginePath)) {
+		std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl;
+
+        // Create the Engine from onnx file
+        IHostMemory* serializedEngine = builder->buildSerializedNetwork(*network, *config);
+        if (!serializedEngine) throw std::runtime_error("Failed to serialize engine");
+
+        // Create temporary engine for serializing
+        ICudaEngine* tempEng = runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size());
+        if (!tempEng) throw std::runtime_error("Failed to create temporary engine");
+
+        // Save Engine to File
+        auto trtModelStream = tempEng->serialize();
+        std::ofstream outputFileStream{enginePath, std::ios::binary};
+        outputFileStream.write(static_cast<char const*>(trtModelStream->data()), trtModelStream->size());
+        outputFileStream.close();
+
+        return tempEng;
+    }
+
+    // Load engine from file
+    std::ifstream inputFileStream{enginePath, std::ios::binary};
+    std::stringstream engineBuffer;
+
+    // Stream in the engine file to the buffer
+    engineBuffer << inputFileStream.rdbuf();
+    std::string enginePlan = engineBuffer.str();
+    // Deserialize the Cuda Engine file from the buffer
+    return runtime->deserializeCudaEngine(enginePlan.data(), enginePlan.size());
+}
+
+auto Inference::createExecutionContext() -> void {
+    // Create Execution Context
+    mContext.reset(mEngine->createExecutionContext());
+    if (!mContext) throw std::runtime_error("Failed to create execution context");
+
+    // Set up the input tensor sizing
+    mContext->setInputShape(INPUT_BINDING_NAME, mEngine->getTensorShape(INPUT_BINDING_NAME));
+}
+
+auto Inference::doDetections(cv::Mat const& img) const -> void {
+    // Do the forward pass on the network
+    launchInference(img, mOutputTensor);
+}
+
+auto Inference::getOutputTensor() -> cv::Mat {
+    return mOutputTensor;
+}
+
+auto Inference::launchInference(cv::Mat const& input, cv::Mat const& output) const -> void {
+    //Assert these items have been initialized
+    assert(!input.empty());
+    assert(!output.empty());
+    assert(input.isContinuous());
+    assert(output.isContinuous());
+    assert(mContext);
+
+    // Get the binding id for the input tensor
+    int inputId = getBindingInputIndex(mContext.get());
+
+    // Memcpy the input tensor from the host to the gpu
+    cudaMemcpy(mBindings[inputId], input.data, input.total() * input.elemSize(), cudaMemcpyHostToDevice);
+
+    // Execute the model on the gpu
+    mContext->executeV2(mBindings.data());
+
+    // Memcpy the output tensor from the gpu to the host
+    cudaMemcpy(output.data, mBindings[1 - inputId], output.total() * output.elemSize(), cudaMemcpyDeviceToHost);
+}
+
+auto Inference::prepTensors() -> void {
+    // Assign the properties to the input and output tensors
+    for (int i = 0; i < mEngine->getNbIOTensors(); i++) {
+        char const* tensorName = mEngine->getIOTensorName(i);
+        auto [rank, extents] = mEngine->getTensorShape(tensorName);
+
+        // Multiply sizeof(float) by the product of the extents
+        // This is essentially: element count * size of each element
+        std::size_t size = 1;
+		for(int32_t i = 0; i < rank; ++i){
+			size *= extents[i];
+		}
+		std::cout << tensorName << " is getting allocated to size " << size << std::endl;
+
+		// Create GPU memory for TensorRT to operate on
+        if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess)
+            throw std::runtime_error{"Failed to allocate GPU memory: " + std::string{cudaGetErrorString(result)}};
+    }
+
+    assert(mContext);
+    // Create an appropriately sized output tensor
+    auto const [nbDims, d] = mEngine->getTensorShape(OUTPUT_BINDING_NAME);
+    for (int i = 0; i < nbDims; i++) {
+        std::array<char, 512> message;
+        std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]);
+        mLogger.log(ILogger::Severity::kINFO, message.data());
+    }
+
+    // Create the mat wrapper around the output matrix for ease of use
+    assert(nbDims == 3);
+    assert(d[0] == 1);
+    mOutputTensor = cv::Mat::zeros(d[1], d[2], CV_32FC1);
+}
+
+auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int {
+    // Returns the id for the input tensor
+    return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise
+}
diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh
new file mode 100644
index 00000000..d0b65469
--- /dev/null
+++ b/tensorrt/inference.cuh
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "pch.hpp"
+
+#include "logger.cuh"
+
+#include <NvInfer.h>
+
+using nvinfer1::ICudaEngine;
+using nvinfer1::IExecutionContext;
+
+
+class Inference {
+    std::string mModelPath;
+	std::string mPackagePath;
+
+    nvinfer1::Logger mLogger;
+
+    std::unique_ptr<ICudaEngine> mEngine{};
+
+    std::unique_ptr<IExecutionContext> mContext{};
+
+    cv::Mat mInputTensor;
+    cv::Mat mOutputTensor;
+
+    std::array<void*, 2> mBindings{};
+
+    cv::Size mModelInputShape;
+    cv::Size mModelOutputShape;
+
+    static auto getBindingInputIndex(IExecutionContext const* context) -> int;
+
+    /**
+     * @brief Creates the Cuda engine which is machine specific
+     *
+     * Uses the file path to locate the onnx model and the modelName to locate the engine file
+     */
+    auto createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine*;
+
+    /**
+     * @brief Performs the Model forward pass and allocates the reults in the output tensor
+     *
+     * Takes the input Mat in the format for CNN and requires a SIZED empty Mat to store the output
+     */
+    auto launchInference(cv::Mat const& input, cv::Mat const& output) const -> void;
+
+    /**
+     * @brief Prepares the tensors for inference.
+     *
+     * Takes tensor bindings and allocates memory on the GPU for input and output tensors
+     * Requires enginePtr, bindings, inputTensor, and outputTensor
+     *
+     * Requires enginePtr, bindings, inputTensor, and outputTensor
+     * Modifies bindings, inputTensor, and outputTensor
+     */
+    auto prepTensors() -> void;
+
+    /**
+    * @brief Creates the execution context for the model
+     */
+    auto createExecutionContext() -> void;
+
+public:
+    explicit Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString);
+
+    /**
+     * @brief Runs the forward pass on the given input image in CNN format
+     */
+    auto doDetections(cv::Mat const& img) const -> void;
+
+    /**
+     * @brief Retrieves the mat where the output from the forward pass was stored
+     */
+    auto getOutputTensor() -> cv::Mat;
+};
diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp
new file mode 100644
index 00000000..d5849095
--- /dev/null
+++ b/tensorrt/inference_wrapper.cpp
@@ -0,0 +1,26 @@
+#include "inference_wrapper.hpp"
+
+#include "inference.cuh"
+
+using namespace nvinfer1;
+
+/**
+* cudaMemcpys CPU memory in inputTensor to GPU based on bindings
+* Queues that tensor to be passed through model
+* cudaMemcpys the result back to CPU memory
+* Requires bindings, inputTensor, stream
+* Modifies stream, outputTensor
+*/
+
+InferenceWrapper::InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath) {
+    mInference.reset(new Inference(std::move(onnxModelPath), modelName, packagePath));
+}
+
+auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void {
+    // Execute the forward pass on the inference object
+    mInference->doDetections(img);
+}
+
+auto InferenceWrapper::getOutputTensor() const -> cv::Mat {
+    return mInference->getOutputTensor();
+}
diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp
new file mode 100644
index 00000000..f9d83b99
--- /dev/null
+++ b/tensorrt/inference_wrapper.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "pch.hpp"
+
+class Inference;
+
+class InferenceWrapper {
+    std::shared_ptr<Inference> mInference;
+
+public:
+    InferenceWrapper() = default;
+
+    ~InferenceWrapper() = default;
+
+    explicit InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath);
+
+    auto doDetections(cv::Mat const& img) const -> void;
+
+    // Retrieve the output tensor from the previous forward pass
+    [[nodiscard]] auto getOutputTensor() const -> cv::Mat;
+};
diff --git a/tensorrt/logger.cu b/tensorrt/logger.cu
new file mode 100644
index 00000000..d5b7bd63
--- /dev/null
+++ b/tensorrt/logger.cu
@@ -0,0 +1,25 @@
+#include "logger.cuh"
+
+namespace nvinfer1 {
+
+    auto Logger::log(Severity severity, char const* msg) noexcept -> void {
+        switch (severity) {
+            case Severity::kINTERNAL_ERROR:
+				std::cout << "[FATAL] " << msg << "\n";
+                break;
+            case Severity::kERROR:
+				std::cout << "[ERROR] " << msg << "\n";
+                break;
+            case Severity::kWARNING:
+				std::cout << "[WARN] " << msg << "\n";
+                break;
+            case Severity::kINFO:
+				std::cout << "[INFO] " << msg << "\n";
+                break;
+            case Severity::kVERBOSE:
+				std::cout << "[VERBOSE] " << msg << "\n";
+                break;
+        }
+    }
+
+} // namespace nvinfer1
diff --git a/tensorrt/logger.cuh b/tensorrt/logger.cuh
new file mode 100644
index 00000000..b973ddbc
--- /dev/null
+++ b/tensorrt/logger.cuh
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "pch.hpp"
+
+#include <NvInfer.h>
+
+namespace nvinfer1 {
+
+    class Logger final : public ILogger {
+    public:
+        auto log(Severity severity, char const* msg) noexcept -> void override;
+    };
+
+    template<typename T>
+    struct Destroy {
+        auto operator()(T* t) const -> void {
+            t->destroy();
+        }
+    };
+
+} // namespace nvinfer1
diff --git a/tensorrt/pch.hpp b/tensorrt/pch.hpp
new file mode 100644
index 00000000..77fbea9f
--- /dev/null
+++ b/tensorrt/pch.hpp
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <exception>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#include <opencv2/core.hpp>
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "inference_wrapper.hpp"
diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp
new file mode 100644
index 00000000..287b3f2a
--- /dev/null
+++ b/tensorrt/tensorrt.cpp
@@ -0,0 +1,100 @@
+#include "tensorrt.hpp"
+
+using namespace std;
+
+TensortRT::TensortRT() = default;
+
+TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} {
+
+    std::filesystem::path packagePath{std::move(packagePathString)};
+    std::filesystem::path modelFileName = mModelName.append(".onnx");
+    std::filesystem::path modelPath = packagePath / "data" / modelFileName;
+
+    mInferenceWrapper = InferenceWrapper{modelPath, mModelName, packagePath};
+}
+
+TensortRT::~TensortRT() = default;
+
+auto TensortRT::modelForwardPass(cv::Mat const& blob, std::vector<Detection>& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void {
+    mInferenceWrapper.doDetections(blob);
+    cv::Mat output = mInferenceWrapper.getOutputTensor();
+    parseModelOutput(output, detections, modelScoreThreshold, modelNMSThreshold);
+}
+
+auto TensortRT::parseModelOutput(cv::Mat& output, std::vector<Detection>& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void {
+    // Parse model specific dimensioning from the output
+
+    // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows
+    if (output.cols <= output.rows) {
+        throw std::runtime_error("Something is wrong Model with interpretation");
+    }
+
+    int rows = output.cols;
+    int dimensions = output.rows;
+
+    // The output of the model is a batchSizex84x8400 matrix
+    // This converts the model to a TODO: Check this dimensioning
+    output = output.reshape(1, dimensions);
+    cv::transpose(output, output);
+
+    // This function expects the image to already be in the correct format thus no distrotion is needed
+    float const xFactor = 1.0;
+    float const yFactor = 1.0;
+
+    // Intermediate Storage Containers
+    std::vector<int> classIds;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+
+    // Each row of the output is a detection with a bounding box and associated class scores
+    for (int r = 0; r < rows; ++r) {
+        // Skip first four values as they are the box data
+        cv::Mat scores = output.row(r).colRange(4, dimensions);
+
+        cv::Point classId;
+        double maxClassScore;
+        cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId);
+
+        // Determine if the class is an acceptable confidence level, otherwise disregard
+        if (maxClassScore <= modelScoreThreshold) continue;
+
+        confidences.push_back(static_cast<float>(maxClassScore));
+        classIds.push_back(classId.x);
+
+        // Get the bounding box data
+        cv::Mat box = output.row(r).colRange(0, 4);
+        auto x = box.at<float>(0);
+        auto y = box.at<float>(1);
+        auto w = box.at<float>(2);
+        auto h = box.at<float>(3);
+
+        // Cast the corners into integers to be used on pixels
+        auto left = static_cast<int>((x - 0.5 * w) * xFactor);
+        auto top = static_cast<int>((y - 0.5 * h) * yFactor);
+        auto width = static_cast<int>(w * xFactor);
+        auto height = static_cast<int>(h * yFactor);
+
+        // Push abck the box into storage
+        boxes.emplace_back(left, top, width, height);
+    }
+
+    //Coalesce the boxes into a smaller number of distinct boxes
+    std::vector<int> nmsResult;
+    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult);
+
+    // Fill in the output Detections Vector
+    for (int i: nmsResult) {
+        Detection result;
+
+        //Fill in the id and confidence for the class
+        result.classId = classIds[i];
+        result.confidence = confidences[i];
+
+        //Fill in the class name and box information
+        result.className = classes[result.classId];
+        result.box = boxes[i];
+
+        //Push back the detection into the for storagevector
+        detections.push_back(result);
+    }
+}
diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp
new file mode 100644
index 00000000..6d695636
--- /dev/null
+++ b/tensorrt/tensorrt.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "pch.hpp"
+
+struct Detection {
+    int classId{};
+    std::string className;
+    float confidence{};
+    cv::Rect box;
+};
+
+class TensortRT {
+    std::string mModelName;
+
+    std::vector<std::string> classes{"bottle", "hammer"};
+
+    InferenceWrapper mInferenceWrapper;
+
+    auto parseModelOutput(cv::Mat& output,
+                          std::vector<Detection>& detections,
+                          float modelScoreThreshold = 0.75,
+                          float modelNMSThreshold = 0.5) const -> void;
+
+public:
+    TensortRT();
+
+    explicit TensortRT(std::string modelName, std::string packagePathString);
+
+    ~TensortRT();
+
+    auto modelForwardPass(cv::Mat const& blob,
+                          std::vector<Detection>& detections,
+                          float modelScoreThreshold = 0.75,
+                          float modelNMSThreshold = 0.5) const -> void;
+};