From c7e7a48cf9bc5679dc3cf71675c7bc7ffddf2958 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 11:34:45 -0400 Subject: [PATCH 01/12] Get blob dimensions from model --- .../object_detector/object_detector.cpp | 3 +++ .../object_detector/object_detector.hpp | 10 +++++++++- .../object_detector.processing.cpp | 18 ++++++++++++++---- tensorrt/inference.cu | 19 +++++++++++++++++++ tensorrt/inference.cuh | 5 +++++ tensorrt/inference_wrapper.cpp | 4 ++++ tensorrt/inference_wrapper.hpp | 2 ++ tensorrt/tensorrt.cpp | 4 ++++ tensorrt/tensorrt.hpp | 2 ++ 9 files changed, 62 insertions(+), 5 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index dc7f91e6..ad96761f 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -23,7 +23,10 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); + // Initialize TensorRT Inference Object and Get Important Output Information + mModelType = MODEL_TYPE::YOLOv8; mTensorRT = TensortRT{mModelName, packagePath.string()}; + mBlobSize = mTensorRT.getInputBlobSize(); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 28ebe502..cc4056e6 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,22 +7,30 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: + enum MODEL_TYPE { + YOLOv8 = 0, + }; + static constexpr char const* NODE_NAME = "object_detector"; std::unique_ptr mTfBuffer = std::make_unique(get_clock()); std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); + // TF Frames std::string mCameraFrame; std::string mWorldFrame; + // Model Member Variables + MODEL_TYPE mModelType; std::string mModelName; + cv::Mat mRgbImage, mImageBlob; + std::vector mBlobSize; LoopProfiler mLoopProfiler; TensortRT mTensorRT; - cv::Mat mRgbImage, mImageBlob; sensor_msgs::msg::Image mDetectionsImageMessage; rclcpp::Publisher::SharedPtr mDebugImgPub; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 4be823b4..f7e48776 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -16,11 +16,21 @@ namespace mrover { } convertPointCloudToRGB(msg, mRgbImage); - // TODO(quintin): Avoid hard coding blob size - cv::Size blobSize{640, 640}; + + // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + switch(mModelType){ + case MODEL_TYPE::YOLOv8: + { + if(mBlobSize.size() != 4){ + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + cv::Size blobSize{static_cast(mBlobSize[2]), static_cast(mBlobSize[3])}; + cv::resize(mRgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + break; + } + } mLoopProfiler.measureEvent("Conversion"); diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index 17bda4c3..b0582bd6 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -170,3 +170,22 @@ auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { // Returns the id for the input tensor return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise } + + +auto Inference::getInputBlobSize() -> std::vector{ + auto dims = mEngine->getTensorShape(INPUT_BINDING_NAME); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); + } + + for(int i = 0; i < dims.nbDims; ++i){ + std::array message; + std::snprintf(message.data(), message.size(), "Blob Size %d %d", i, dims.d[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + return inputBlobSize; +} diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index d0b65469..736d770e 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -72,4 +72,9 @@ public: * @brief Retrieves the mat where the output from the forward pass was stored */ auto getOutputTensor() -> cv::Mat; + + /** + * @brief Retrives the expected input matrix size + */ + auto getInputBlobSize() -> std::vector; }; diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index d5849095..eeb504f0 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -24,3 +24,7 @@ auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { auto InferenceWrapper::getOutputTensor() const -> cv::Mat { return mInference->getOutputTensor(); } + +auto InferenceWrapper::getInputBlobSize() -> std::vector{ + return mInference->getInputBlobSize(); +} diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index f9d83b99..be9b1eb8 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -18,4 +18,6 @@ class InferenceWrapper { // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; + + auto getInputBlobSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 287b3f2a..5319c328 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -98,3 +98,7 @@ auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detect detections.push_back(result); } } + +auto TensortRT::getInputBlobSize() -> std::vector{ + return mInferenceWrapper.getInputBlobSize(); +} diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 6d695636..70073a97 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -32,4 +32,6 @@ class TensortRT { std::vector& detections, float modelScoreThreshold = 0.75, float modelNMSThreshold = 0.5) const -> void; + + auto getInputBlobSize() -> std::vector; }; From 9eab3dfab5c5613b91b2d8803ef2be56467a3910 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 11:37:06 -0400 Subject: [PATCH 02/12] More Verbose --- perception/object_detector/object_detector.processing.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index f7e48776..5564a853 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -25,9 +25,12 @@ namespace mrover { if(mBlobSize.size() != 4){ throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + cv::Size blobSize{static_cast(mBlobSize[2]), static_cast(mBlobSize[3])}; cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); break; } } From 071bce0ac60d2b1d4b57d704c95b2f0af2e530a0 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 12:36:25 -0400 Subject: [PATCH 03/12] Better Naming --- .../object_detector/object_detector.cpp | 2 +- .../object_detector/object_detector.hpp | 2 +- .../object_detector.processing.cpp | 4 +- tensorrt/inference.cu | 53 ++++++++++++++----- tensorrt/inference.cuh | 11 +++- tensorrt/inference_wrapper.cpp | 4 +- tensorrt/inference_wrapper.hpp | 2 +- tensorrt/tensorrt.cpp | 4 +- tensorrt/tensorrt.hpp | 2 +- 9 files changed, 59 insertions(+), 25 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index ad96761f..f59e33dd 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -26,7 +26,7 @@ namespace mrover { // Initialize TensorRT Inference Object and Get Important Output Information mModelType = MODEL_TYPE::YOLOv8; mTensorRT = TensortRT{mModelName, packagePath.string()}; - mBlobSize = mTensorRT.getInputBlobSize(); + mInputTensorSize = mTensorRT.getInputTensorSize(); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index cc4056e6..29bcd264 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -25,7 +25,7 @@ namespace mrover { MODEL_TYPE mModelType; std::string mModelName; cv::Mat mRgbImage, mImageBlob; - std::vector mBlobSize; + std::vector mInputTensorSize; LoopProfiler mLoopProfiler; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 5564a853..881b6be1 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -22,13 +22,13 @@ namespace mrover { switch(mModelType){ case MODEL_TYPE::YOLOv8: { - if(mBlobSize.size() != 4){ + if(mInputTensorSize.size() != 4){ throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - cv::Size blobSize{static_cast(mBlobSize[2]), static_cast(mBlobSize[3])}; + cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; cv::resize(mRgbImage, blobSizedImage, blobSize); cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); break; diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index b0582bd6..cc5a8eb3 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -11,8 +11,6 @@ using namespace nvinfer1; * Requires bindings, inputTensor, stream * Modifies stream, outputTensor */ -constexpr static auto INPUT_BINDING_NAME = "images"; -constexpr static auto OUTPUT_BINDING_NAME = "output0"; Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} { mModelPath = onnxModelPath.string(); @@ -25,8 +23,32 @@ Inference::Inference(std::filesystem::path const& onnxModelPath, std::string con // Check some assumptions about the model if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count"); - if (mEngine->getTensorIOMode(INPUT_BINDING_NAME) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); - if (mEngine->getTensorIOMode(OUTPUT_BINDING_NAME) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Input Binding Input To Be 1"); + + // Store the IO Tensor Names + mInputTensorName = mEngine->getIOTensorName(0); + mOutputTensorName = mEngine->getIOTensorName(1); + + if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); + if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1"); + + // Be verbose about the input tensor size + auto inputTensorSize = getInputTensorSize(); + std::array message; + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < inputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + // Be verbose about the input tensor size + auto outputTensorSize = getInputTensorSize(); + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mOutputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < outputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } createExecutionContext(); @@ -99,7 +121,7 @@ auto Inference::createExecutionContext() -> void { if (!mContext) throw std::runtime_error("Failed to create execution context"); // Set up the input tensor sizing - mContext->setInputShape(INPUT_BINDING_NAME, mEngine->getTensorShape(INPUT_BINDING_NAME)); + mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str())); } auto Inference::doDetections(cv::Mat const& img) const -> void { @@ -144,7 +166,6 @@ auto Inference::prepTensors() -> void { for(int32_t i = 0; i < rank; ++i){ size *= extents[i]; } - std::cout << tensorName << " is getting allocated to size " << size << std::endl; // Create GPU memory for TensorRT to operate on if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess) @@ -153,7 +174,7 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor - auto const [nbDims, d] = mEngine->getTensorShape(OUTPUT_BINDING_NAME); + auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { std::array message; std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); @@ -172,8 +193,8 @@ auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { } -auto Inference::getInputBlobSize() -> std::vector{ - auto dims = mEngine->getTensorShape(INPUT_BINDING_NAME); +auto Inference::getInputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mInputTensorName.c_str()); std::vector inputBlobSize; inputBlobSize.reserve(dims.nbDims); @@ -181,10 +202,16 @@ auto Inference::getInputBlobSize() -> std::vector{ inputBlobSize.push_back(dims.d[i]); } - for(int i = 0; i < dims.nbDims; ++i){ - std::array message; - std::snprintf(message.data(), message.size(), "Blob Size %d %d", i, dims.d[i]); - mLogger.log(ILogger::Severity::kINFO, message.data()); + return inputBlobSize; +} + +auto Inference::getOutputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mOutputTensorName.c_str()); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); } return inputBlobSize; diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index 736d770e..29c6d4a8 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -21,7 +21,9 @@ class Inference { std::unique_ptr mContext{}; cv::Mat mInputTensor; + std::string mInputTensorName; cv::Mat mOutputTensor; + std::string mOutputTensorName; std::array mBindings{}; @@ -74,7 +76,12 @@ public: auto getOutputTensor() -> cv::Mat; /** - * @brief Retrives the expected input matrix size + * @brief Retrives the expected input tensor size */ - auto getInputBlobSize() -> std::vector; + auto getInputTensorSize() -> std::vector; + + /** + * @brief Retrives the expected input tensor size + */ + auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index eeb504f0..0a52944c 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -25,6 +25,6 @@ auto InferenceWrapper::getOutputTensor() const -> cv::Mat { return mInference->getOutputTensor(); } -auto InferenceWrapper::getInputBlobSize() -> std::vector{ - return mInference->getInputBlobSize(); +auto InferenceWrapper::getInputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); } diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index be9b1eb8..3d9c3aeb 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -19,5 +19,5 @@ class InferenceWrapper { // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; - auto getInputBlobSize() -> std::vector; + auto getInputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 5319c328..5e122edd 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -99,6 +99,6 @@ auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detect } } -auto TensortRT::getInputBlobSize() -> std::vector{ - return mInferenceWrapper.getInputBlobSize(); +auto TensortRT::getInputTensorSize() -> std::vector{ + return mInferenceWrapper.getInputTensorSize(); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 70073a97..d7874ba7 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -33,5 +33,5 @@ class TensortRT { float modelScoreThreshold = 0.75, float modelNMSThreshold = 0.5) const -> void; - auto getInputBlobSize() -> std::vector; + auto getInputTensorSize() -> std::vector; }; From 1f7bcd817b525bb22ad4b25f282ad6f7cf5870b9 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 12:38:19 -0400 Subject: [PATCH 04/12] Better Naming --- tensorrt/inference.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index cc5a8eb3..d74457aa 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -174,6 +174,7 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor + // TODO: Fix this auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { std::array message; From 5024098b38e74312a6dd6ec1f0b7da90f4fa0b2e Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 13:58:45 -0400 Subject: [PATCH 05/12] Improved --- tensorrt/inference.cu | 31 +++++++++++++++++++------------ tensorrt/inference.cuh | 9 ++++++--- tensorrt/inference_wrapper.cpp | 4 ++-- tensorrt/inference_wrapper.hpp | 4 +++- tensorrt/tensorrt.cpp | 8 +++++--- tensorrt/tensorrt.hpp | 2 ++ 6 files changed, 37 insertions(+), 21 deletions(-) diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index d74457aa..6b4463d5 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -12,11 +12,21 @@ using namespace nvinfer1; * Modifies stream, outputTensor */ -Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} { - mModelPath = onnxModelPath.string(); +Inference::Inference(std::string modelName, std::string packagePathString) : mModelName{std::move(modelName)}, mPackagePath{std::move(packagePathString)}{ + + //Create ONNX and engine file paths + + mLogger.log(ILogger::Severity::kINFO, mModelName.c_str()); + mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str()); + mONNXModelPath = std::filesystem::path{packagePathString} / "data" / std::string(mModelName + ".onnx"); + mEngineModelPath = std::filesystem::path{packagePathString} / "data" / std::string("tensorrt-engine-" + mModelName + ".engine");; + + std::array message{}; + std::snprintf(message.data(), message.size(), "Reading from ONNX model at %s and creating TensorRT engine at %s", mONNXModelPath.c_str(), mEngineModelPath.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); // Create the engine object from either the file or from onnx file - mEngine = std::unique_ptr{createCudaEngine(onnxModelPath, modelName)}; + mEngine = std::unique_ptr{createCudaEngine()}; if (!mEngine) throw std::runtime_error("Failed to create CUDA engine"); mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine"); @@ -33,7 +43,6 @@ Inference::Inference(std::filesystem::path const& onnxModelPath, std::string con // Be verbose about the input tensor size auto inputTensorSize = getInputTensorSize(); - std::array message; std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); mLogger.log(ILogger::Severity::kINFO, message.data()); for(size_t i = 0; i < inputTensorSize.size(); ++i){ @@ -55,7 +64,7 @@ Inference::Inference(std::filesystem::path const& onnxModelPath, std::string con prepTensors(); } -auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine* { +auto Inference::createCudaEngine() -> ICudaEngine* { constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); std::unique_ptr builder{createInferBuilder(mLogger)}; @@ -74,17 +83,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std if (!config) throw std::runtime_error("Failed to create Builder Config"); mLogger.log(ILogger::Severity::kINFO, "Created Builder Config"); - if (!parser->parseFromFile(onnxModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { + if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { throw std::runtime_error("Failed to parse ONNX file"); } IRuntime* runtime = createInferRuntime(mLogger); // Define the engine file location relative to the mrover package - std::filesystem::path packagePath{mPackagePath}; - std::filesystem::path enginePath = packagePath / "data" / std::string("tensorrt-engine-").append(modelName).append(".engine"); // Check if engine file exists - if (!exists(enginePath)) { + if (!exists(mEngineModelPath)) { std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl; // Create the Engine from onnx file @@ -97,15 +104,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std // Save Engine to File auto trtModelStream = tempEng->serialize(); - std::ofstream outputFileStream{enginePath, std::ios::binary}; - outputFileStream.write(static_cast(trtModelStream->data()), trtModelStream->size()); + std::ofstream outputFileStream{mEngineModelPath, std::ios::binary}; + outputFileStream.write(static_cast(trtModelStream->data()), static_cast(trtModelStream->size())); outputFileStream.close(); return tempEng; } // Load engine from file - std::ifstream inputFileStream{enginePath, std::ios::binary}; + std::ifstream inputFileStream{mEngineModelPath, std::ios::binary}; std::stringstream engineBuffer; // Stream in the engine file to the buffer diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index 29c6d4a8..0df14ef0 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -11,9 +11,12 @@ using nvinfer1::IExecutionContext; class Inference { - std::string mModelPath; + std::string mModelName; std::string mPackagePath; + std::filesystem::path mONNXModelPath; + std::filesystem::path mEngineModelPath; + nvinfer1::Logger mLogger; std::unique_ptr mEngine{}; @@ -37,7 +40,7 @@ class Inference { * * Uses the file path to locate the onnx model and the modelName to locate the engine file */ - auto createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine*; + auto createCudaEngine() -> ICudaEngine*; /** * @brief Performs the Model forward pass and allocates the reults in the output tensor @@ -63,7 +66,7 @@ class Inference { auto createExecutionContext() -> void; public: - explicit Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString); + explicit Inference(std::string modelName, std::string packagePathString); /** * @brief Runs the forward pass on the given input image in CNN format diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index 0a52944c..d5f9e2f0 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -12,8 +12,8 @@ using namespace nvinfer1; * Modifies stream, outputTensor */ -InferenceWrapper::InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath) { - mInference.reset(new Inference(std::move(onnxModelPath), modelName, packagePath)); +InferenceWrapper::InferenceWrapper(std::string const& modelName, std::string const& packagePath) { + mInference = std::make_shared(modelName, packagePath); } auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index 3d9c3aeb..954a9851 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -12,7 +12,7 @@ class InferenceWrapper { ~InferenceWrapper() = default; - explicit InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath); + explicit InferenceWrapper(std::string const& modelName, std::string const& packagePath); auto doDetections(cv::Mat const& img) const -> void; @@ -20,4 +20,6 @@ class InferenceWrapper { [[nodiscard]] auto getOutputTensor() const -> cv::Mat; auto getInputTensorSize() -> std::vector; + + auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 5e122edd..f1d67415 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -7,10 +7,8 @@ TensortRT::TensortRT() = default; TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} { std::filesystem::path packagePath{std::move(packagePathString)}; - std::filesystem::path modelFileName = mModelName.append(".onnx"); - std::filesystem::path modelPath = packagePath / "data" / modelFileName; - mInferenceWrapper = InferenceWrapper{modelPath, mModelName, packagePath}; + mInferenceWrapper = InferenceWrapper{mModelName, packagePath}; } TensortRT::~TensortRT() = default; @@ -102,3 +100,7 @@ auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detect auto TensortRT::getInputTensorSize() -> std::vector{ return mInferenceWrapper.getInputTensorSize(); } + +auto TensortRT::getOutputTensorSize() -> std::vector{ + return mInferenceWrapper.getOutputTensorSize(); +} diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index d7874ba7..72be9dbf 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -34,4 +34,6 @@ class TensortRT { float modelNMSThreshold = 0.5) const -> void; auto getInputTensorSize() -> std::vector; + + auto getOutputTensorSize() -> std::vector; }; From 32b6b598cfa070f935c649ed04760da16d8fc3c6 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 14:30:35 -0400 Subject: [PATCH 06/12] Refactored to remove parsing from tensorrt --- .../object_detector/object_detector.cpp | 4 +- .../object_detector/object_detector.hpp | 9 +- .../object_detector.parsing.cpp | 83 ++++++++++++++++++ .../object_detector.processing.cpp | 11 ++- tensorrt/tensorrt.cpp | 85 +------------------ tensorrt/tensorrt.hpp | 10 +-- 6 files changed, 106 insertions(+), 96 deletions(-) create mode 100644 perception/object_detector/object_detector.parsing.cpp diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index f59e33dd..1e55f6f6 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -13,7 +13,7 @@ namespace mrover { {"hitcount_max", mObjMaxHitcount, 10}, {"model_name", mModelName, "Large-Dataset"}, {"model_score_threshold", mModelScoreThreshold, 0.75}, - {"model_nms_threshold", mModelNmsThreshold, 0.5}}; + {"model_nms_threshold", mModelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); @@ -30,7 +30,7 @@ namespace mrover { mDebugImgPub = create_publisher("object_detector/debug_img", 1); - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold)); + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 29bcd264..2c12c312 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -38,13 +38,14 @@ namespace mrover { // TODO(quintin): Do not hard code exactly two classes std::vector mObjectHitCounts{0, 0}; + std::vector mClasses{"bottle", "hammer"}; int mObjIncrementWeight{}; int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; float mModelScoreThreshold{}; - float mModelNmsThreshold{}; + float mModelNMSThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -58,6 +59,12 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; + auto static parseYOLOv8Output(cv::Mat& output, + std::vector& detections, + std::vector const& classes, + float modelScoreThreshold = 0.75, + float modelNMSThreshold = 0.5) -> void; + public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp new file mode 100644 index 00000000..a4a98b0c --- /dev/null +++ b/perception/object_detector/object_detector.parsing.cpp @@ -0,0 +1,83 @@ +#include "object_detector.hpp" +namespace mrover { + auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { + // Parse model specific dimensioning from the output + + // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows + if (output.cols <= output.rows) { + std::array message{}; + snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); + throw std::runtime_error(message.data()); + } + + int rows = output.cols; + int dimensions = output.rows; + + // The output of the model is a batchSizex84x8400 matrix + // This converts the model to a TODO: Check this dimensioning + output = output.reshape(1, dimensions); + cv::transpose(output, output); + + // This function expects the image to already be in the correct format thus no distrotion is needed + float const xFactor = 1.0; + float const yFactor = 1.0; + + // Intermediate Storage Containers + std::vector classIds; + std::vector confidences; + std::vector boxes; + + // Each row of the output is a detection with a bounding box and associated class scores + for (int r = 0; r < rows; ++r) { + // Skip first four values as they are the box data + cv::Mat scores = output.row(r).colRange(4, dimensions); + + cv::Point classId; + double maxClassScore; + cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); + + // Determine if the class is an acceptable confidence level, otherwise disregard + if (maxClassScore <= modelScoreThreshold) continue; + + confidences.push_back(static_cast(maxClassScore)); + classIds.push_back(classId.x); + + // Get the bounding box data + cv::Mat box = output.row(r).colRange(0, 4); + auto x = box.at(0); + auto y = box.at(1); + auto w = box.at(2); + auto h = box.at(3); + + // Cast the corners into integers to be used on pixels + auto left = static_cast((x - 0.5 * w) * xFactor); + auto top = static_cast((y - 0.5 * h) * yFactor); + auto width = static_cast(w * xFactor); + auto height = static_cast(h * yFactor); + + // Push abck the box into storage + boxes.emplace_back(left, top, width, height); + } + + //Coalesce the boxes into a smaller number of distinct boxes + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); + + // Fill in the output Detections Vector + for (int i: nmsResult) { + Detection result; + + //Fill in the id and confidence for the class + result.classId = classIds[i]; + result.confidence = confidences[i]; + + //Fill in the class name and box information + result.className = classes[result.classId]; + result.box = boxes[i]; + + //Push back the detection into the for storagevector + detections.push_back(result); + } + } + +}; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 881b6be1..39c56252 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -14,8 +14,13 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), std::format("Image size changed from [{}, {}] to [{}, {}]", mRgbImage.cols, mRgbImage.rows, msg->width, msg->height)); mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } - convertPointCloudToRGB(msg, mRgbImage); + // If 0x0 image + if(!mRgbImage.total()){ + return; + } + + convertPointCloudToRGB(msg, mRgbImage); // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; @@ -39,7 +44,9 @@ namespace mrover { // Run the blob through the model std::vector detections{}; - mTensorRT.modelForwardPass(mImageBlob, detections, mModelScoreThreshold, mModelNmsThreshold); + cv::Mat outputTensor; + mTensorRT.modelForwardPass(mImageBlob, outputTensor); + ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); mLoopProfiler.measureEvent("Execution"); diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index f1d67415..0f6c157d 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -5,7 +5,6 @@ using namespace std; TensortRT::TensortRT() = default; TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} { - std::filesystem::path packagePath{std::move(packagePathString)}; mInferenceWrapper = InferenceWrapper{mModelName, packagePath}; @@ -13,89 +12,11 @@ TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{st TensortRT::~TensortRT() = default; -auto TensortRT::modelForwardPass(cv::Mat const& blob, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - mInferenceWrapper.doDetections(blob); - cv::Mat output = mInferenceWrapper.getOutputTensor(); - parseModelOutput(output, detections, modelScoreThreshold, modelNMSThreshold); +auto TensortRT::modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void { + mInferenceWrapper.doDetections(inputBlobTensor); + outputTensor = mInferenceWrapper.getOutputTensor(); } -auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - // Parse model specific dimensioning from the output - - // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows - if (output.cols <= output.rows) { - throw std::runtime_error("Something is wrong Model with interpretation"); - } - - int rows = output.cols; - int dimensions = output.rows; - - // The output of the model is a batchSizex84x8400 matrix - // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); - cv::transpose(output, output); - - // This function expects the image to already be in the correct format thus no distrotion is needed - float const xFactor = 1.0; - float const yFactor = 1.0; - - // Intermediate Storage Containers - std::vector classIds; - std::vector confidences; - std::vector boxes; - - // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { - // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); - - cv::Point classId; - double maxClassScore; - cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); - - // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; - - confidences.push_back(static_cast(maxClassScore)); - classIds.push_back(classId.x); - - // Get the bounding box data - cv::Mat box = output.row(r).colRange(0, 4); - auto x = box.at(0); - auto y = box.at(1); - auto w = box.at(2); - auto h = box.at(3); - - // Cast the corners into integers to be used on pixels - auto left = static_cast((x - 0.5 * w) * xFactor); - auto top = static_cast((y - 0.5 * h) * yFactor); - auto width = static_cast(w * xFactor); - auto height = static_cast(h * yFactor); - - // Push abck the box into storage - boxes.emplace_back(left, top, width, height); - } - - //Coalesce the boxes into a smaller number of distinct boxes - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); - - // Fill in the output Detections Vector - for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = classes[result.classId]; - result.box = boxes[i]; - - //Push back the detection into the for storagevector - detections.push_back(result); - } -} auto TensortRT::getInputTensorSize() -> std::vector{ return mInferenceWrapper.getInputTensorSize(); diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 72be9dbf..eb02f707 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -12,14 +12,9 @@ struct Detection { class TensortRT { std::string mModelName; - std::vector classes{"bottle", "hammer"}; InferenceWrapper mInferenceWrapper; - auto parseModelOutput(cv::Mat& output, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; public: TensortRT(); @@ -28,10 +23,7 @@ class TensortRT { ~TensortRT(); - auto modelForwardPass(cv::Mat const& blob, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; + auto modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void; auto getInputTensorSize() -> std::vector; From 35525b4d7f06c5671961a595dadad345bec18251 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 14:50:48 -0400 Subject: [PATCH 07/12] Style Checking --- .../object_detector/object_detector.cpp | 6 +- .../object_detector/object_detector.hpp | 26 +-- .../object_detector.parsing.cpp | 162 +++++++++--------- .../object_detector.processing.cpp | 45 +++-- scripts/visualizer.py | 2 +- tensorrt/inference_wrapper.hpp | 4 +- tensorrt/tensorrt.cpp | 1 - tensorrt/tensorrt.hpp | 4 +- 8 files changed, 124 insertions(+), 126 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 1e55f6f6..efd9836d 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -23,10 +23,10 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); - // Initialize TensorRT Inference Object and Get Important Output Information - mModelType = MODEL_TYPE::YOLOv8; + // Initialize TensorRT Inference Object and Get Important Output Information + mModelType = MODEL_TYPE::YOLOv8; mTensorRT = TensortRT{mModelName, packagePath.string()}; - mInputTensorSize = mTensorRT.getInputTensorSize(); + mInputTensorSize = mTensorRT.getInputTensorSize(); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 2c12c312..8898584a 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,9 +7,9 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: - enum MODEL_TYPE { - YOLOv8 = 0, - }; + enum MODEL_TYPE { + YOLOv8 = 0, + }; static constexpr char const* NODE_NAME = "object_detector"; @@ -17,15 +17,15 @@ namespace mrover { std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); - // TF Frames + // TF Frames std::string mCameraFrame; std::string mWorldFrame; - // Model Member Variables - MODEL_TYPE mModelType; + // Model Member Variables + MODEL_TYPE mModelType; std::string mModelName; cv::Mat mRgbImage, mImageBlob; - std::vector mInputTensorSize; + std::vector mInputTensorSize; LoopProfiler mLoopProfiler; @@ -38,7 +38,7 @@ namespace mrover { // TODO(quintin): Do not hard code exactly two classes std::vector mObjectHitCounts{0, 0}; - std::vector mClasses{"bottle", "hammer"}; + std::vector mClasses{"bottle", "hammer"}; int mObjIncrementWeight{}; int mObjDecrementWeight{}; @@ -59,11 +59,11 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; - auto static parseYOLOv8Output(cv::Mat& output, - std::vector& detections, - std::vector const& classes, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) -> void; + auto static parseYOLOv8Output(cv::Mat& output, + std::vector& detections, + std::vector const& classes, + float modelScoreThreshold = 0.75, + float modelNMSThreshold = 0.5) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index a4a98b0c..358e0441 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,83 +1,83 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { - // Parse model specific dimensioning from the output - - // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows - if (output.cols <= output.rows) { - std::array message{}; - snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); - throw std::runtime_error(message.data()); - } - - int rows = output.cols; - int dimensions = output.rows; - - // The output of the model is a batchSizex84x8400 matrix - // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); - cv::transpose(output, output); - - // This function expects the image to already be in the correct format thus no distrotion is needed - float const xFactor = 1.0; - float const yFactor = 1.0; - - // Intermediate Storage Containers - std::vector classIds; - std::vector confidences; - std::vector boxes; - - // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { - // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); - - cv::Point classId; - double maxClassScore; - cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); - - // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; - - confidences.push_back(static_cast(maxClassScore)); - classIds.push_back(classId.x); - - // Get the bounding box data - cv::Mat box = output.row(r).colRange(0, 4); - auto x = box.at(0); - auto y = box.at(1); - auto w = box.at(2); - auto h = box.at(3); - - // Cast the corners into integers to be used on pixels - auto left = static_cast((x - 0.5 * w) * xFactor); - auto top = static_cast((y - 0.5 * h) * yFactor); - auto width = static_cast(w * xFactor); - auto height = static_cast(h * yFactor); - - // Push abck the box into storage - boxes.emplace_back(left, top, width, height); - } - - //Coalesce the boxes into a smaller number of distinct boxes - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); - - // Fill in the output Detections Vector - for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = classes[result.classId]; - result.box = boxes[i]; - - //Push back the detection into the for storagevector - detections.push_back(result); - } - } - -}; + auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { + // Parse model specific dimensioning from the output + + // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows + if (output.cols <= output.rows) { + std::array message{}; + snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); + throw std::runtime_error(message.data()); + } + + int rows = output.cols; + int dimensions = output.rows; + + // The output of the model is a batchSizex84x8400 matrix + // This converts the model to a TODO: Check this dimensioning + output = output.reshape(1, dimensions); + cv::transpose(output, output); + + // This function expects the image to already be in the correct format thus no distrotion is needed + float const xFactor = 1.0; + float const yFactor = 1.0; + + // Intermediate Storage Containers + std::vector classIds; + std::vector confidences; + std::vector boxes; + + // Each row of the output is a detection with a bounding box and associated class scores + for (int r = 0; r < rows; ++r) { + // Skip first four values as they are the box data + cv::Mat scores = output.row(r).colRange(4, dimensions); + + cv::Point classId; + double maxClassScore; + cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); + + // Determine if the class is an acceptable confidence level, otherwise disregard + if (maxClassScore <= modelScoreThreshold) continue; + + confidences.push_back(static_cast(maxClassScore)); + classIds.push_back(classId.x); + + // Get the bounding box data + cv::Mat box = output.row(r).colRange(0, 4); + auto x = box.at(0); + auto y = box.at(1); + auto w = box.at(2); + auto h = box.at(3); + + // Cast the corners into integers to be used on pixels + auto left = static_cast((x - 0.5 * w) * xFactor); + auto top = static_cast((y - 0.5 * h) * yFactor); + auto width = static_cast(w * xFactor); + auto height = static_cast(h * yFactor); + + // Push abck the box into storage + boxes.emplace_back(left, top, width, height); + } + + //Coalesce the boxes into a smaller number of distinct boxes + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); + + // Fill in the output Detections Vector + for (int i: nmsResult) { + Detection result; + + //Fill in the id and confidence for the class + result.classId = classIds[i]; + result.confidence = confidences[i]; + + //Fill in the class name and box information + result.className = classes[result.classId]; + result.box = boxes[i]; + + //Push back the detection into the for storagevector + detections.push_back(result); + } + } + +}; // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 39c56252..bb5f1506 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -15,38 +15,37 @@ namespace mrover { mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } - // If 0x0 image - if(!mRgbImage.total()){ - return; - } + // If 0x0 image + if (!mRgbImage.total()) { + return; + } convertPointCloudToRGB(msg, mRgbImage); - - // Convert the RGB Image into the blob Image format + + // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - switch(mModelType){ - case MODEL_TYPE::YOLOv8: - { - if(mInputTensorSize.size() != 4){ - throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); - } - - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - - cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); - break; - } - } + switch (mModelType) { + case MODEL_TYPE::YOLOv8: { + if (mInputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + + cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; + cv::resize(mRgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + break; + } + } mLoopProfiler.measureEvent("Conversion"); // Run the blob through the model std::vector detections{}; - cv::Mat outputTensor; + cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); + ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); mLoopProfiler.measureEvent("Execution"); diff --git a/scripts/visualizer.py b/scripts/visualizer.py index 5aae7a5d..336f9e67 100755 --- a/scripts/visualizer.py +++ b/scripts/visualizer.py @@ -112,7 +112,7 @@ def paintEvent(self, event): self.resize(self.renderer.defaultSize()) self.renderer.render(painter) - def update(self): + def update(self): # type: ignore[override] rclpy.spin_once(self.viz) with self.state_machine.mutex: if self.graph is None or self.state_machine.needs_redraw: diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index 954a9851..d6e15a36 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -19,7 +19,7 @@ class InferenceWrapper { // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; - auto getInputTensorSize() -> std::vector; + [[nodiscard]] auto getInputTensorSize() -> std::vector; - auto getOutputTensorSize() -> std::vector; + [[nodiscard]] auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 0f6c157d..2c4dc3ff 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -17,7 +17,6 @@ auto TensortRT::modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& output outputTensor = mInferenceWrapper.getOutputTensor(); } - auto TensortRT::getInputTensorSize() -> std::vector{ return mInferenceWrapper.getInputTensorSize(); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index eb02f707..2bd846fe 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -25,7 +25,7 @@ class TensortRT { auto modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void; - auto getInputTensorSize() -> std::vector; + [[nodiscard]] auto getInputTensorSize() -> std::vector; - auto getOutputTensorSize() -> std::vector; + [[nodiscard]] auto getOutputTensorSize() -> std::vector; }; From 2cb094124c546f77e54f11005c2f708c25644824 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 14:52:02 -0400 Subject: [PATCH 08/12] Style Checking --- tensorrt/tensorrt.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 2bd846fe..dd45596b 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -12,10 +12,8 @@ struct Detection { class TensortRT { std::string mModelName; - InferenceWrapper mInferenceWrapper; - public: TensortRT(); From 317645deef2875b728624ce6450cacd9137e9c27 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Wed, 25 Sep 2024 23:30:04 -0400 Subject: [PATCH 09/12] pre struct --- perception/object_detector/object_detector.processing.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index bb5f1506..bdba3321 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -45,7 +45,12 @@ namespace mrover { std::vector detections{}; cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); + + switch (mModelType) { + case MODEL_TYPE::YOLOv8: { + ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); + } + } mLoopProfiler.measureEvent("Execution"); From dc40933fba503f14e60dbf2b993638d88e4dbdaf Mon Sep 17 00:00:00 2001 From: jbrhm Date: Thu, 26 Sep 2024 11:21:03 -0400 Subject: [PATCH 10/12] post model struct --- .../object_detector/object_detector.cpp | 22 ++++++--- .../object_detector/object_detector.hpp | 49 ++++++++++++------- .../object_detector.parsing.cpp | 21 ++++++-- .../object_detector.processing.cpp | 31 +++--------- perception/object_detector/pch.hpp | 1 + 5 files changed, 69 insertions(+), 55 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index efd9836d..3fe57d43 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -4,6 +4,10 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { + std::string modelName; + float modelScoreThreshold{}; + float modelNMSThreshold{}; + std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, {"world_frame", mWorldFrame, "map"}, @@ -11,26 +15,28 @@ namespace mrover { {"decrement_weight", mObjDecrementWeight, 1}, {"hitcount_threshold", mObjHitThreshold, 5}, {"hitcount_max", mObjMaxHitcount, 10}, - {"model_name", mModelName, "Large-Dataset"}, - {"model_score_threshold", mModelScoreThreshold, 0.75}, - {"model_nms_threshold", mModelNMSThreshold, 0.5}}; + {"model_name", modelName, "Large-Dataset"}, + {"model_score_threshold", modelScoreThreshold, 0.75}, + {"model_nms_threshold", modelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); + + // All of these variables will be invalidated after calling this function std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName); RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); // Initialize TensorRT Inference Object and Get Important Output Information - mModelType = MODEL_TYPE::YOLOv8; - mTensorRT = TensortRT{mModelName, packagePath.string()}; - mInputTensorSize = mTensorRT.getInputTensorSize(); + mTensorRT = TensortRT{modelName, packagePath.string()}; + + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); mDebugImgPub = create_publisher("object_detector/debug_img", 1); - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNMSThreshold)); + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 8898584a..a605059a 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,9 +7,29 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: - enum MODEL_TYPE { - YOLOv8 = 0, - }; + struct Model { + std::string modelName; + + std::vector objectHitCounts; + + std::vector classes; + + std::vector inputTensorSize; + + std::vector outputTensorSize; + + std::vector buffer; + + // Converts an rgb image to a blob + std::function rbgImageToBlob; + + // Converts an output tensor into a vector of detections + std::function&)> outputTensorToDetections; + + Model() = default; + + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + }; static constexpr char const* NODE_NAME = "object_detector"; @@ -17,15 +37,12 @@ namespace mrover { std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); - // TF Frames + Model mModel; + std::string mCameraFrame; std::string mWorldFrame; - // Model Member Variables - MODEL_TYPE mModelType; - std::string mModelName; cv::Mat mRgbImage, mImageBlob; - std::vector mInputTensorSize; LoopProfiler mLoopProfiler; @@ -36,16 +53,10 @@ namespace mrover { rclcpp::Publisher::SharedPtr mDebugImgPub; rclcpp::Subscription::SharedPtr mSensorSub; - // TODO(quintin): Do not hard code exactly two classes - std::vector mObjectHitCounts{0, 0}; - std::vector mClasses{"bottle", "hammer"}; - int mObjIncrementWeight{}; int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; - float mModelScoreThreshold{}; - float mModelNMSThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -59,11 +70,11 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; - auto static parseYOLOv8Output(cv::Mat& output, - std::vector& detections, - std::vector const& classes, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) -> void; + auto static parseYOLOv8Output(Model const& model, + cv::Mat& output, + std::vector& detections) -> void; + + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index 358e0441..7aab71ec 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,6 +1,19 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void{ + cv::Mat blobSizedImage; + if (model.inputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + + cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; + cv::resize(rgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + } + + auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { // Parse model specific dimensioning from the output // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows @@ -37,7 +50,7 @@ namespace mrover { cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; + if (maxClassScore <= model.buffer[0]) continue; confidences.push_back(static_cast(maxClassScore)); classIds.push_back(classId.x); @@ -61,7 +74,7 @@ namespace mrover { //Coalesce the boxes into a smaller number of distinct boxes std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); + cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult); // Fill in the output Detections Vector for (int i: nmsResult) { @@ -72,7 +85,7 @@ namespace mrover { result.confidence = confidences[i]; //Fill in the class name and box information - result.className = classes[result.classId]; + result.className = model.classes[result.classId]; result.box = boxes[i]; //Push back the detection into the for storagevector diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index bdba3321..ad2010ac 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -15,7 +15,7 @@ namespace mrover { mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } - // If 0x0 image + // if 0x0 image if (!mRgbImage.total()) { return; } @@ -24,20 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - switch (mModelType) { - case MODEL_TYPE::YOLOv8: { - if (mInputTensorSize.size() != 4) { - throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); - } - - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - - cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); - break; - } - } + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage); mLoopProfiler.measureEvent("Conversion"); @@ -46,11 +33,7 @@ namespace mrover { cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - switch (mModelType) { - case MODEL_TYPE::YOLOv8: { - ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); - } - } + mModel.outputTensorToDetections(mModel, outputTensor, detections); mLoopProfiler.measureEvent("Execution"); @@ -69,7 +52,7 @@ namespace mrover { // Set of flags indicating if the given object has been seen // TODO(quintin): Do not hard code exactly two classes std::bitset<2> seenObjects{0b00}; - for (auto const& [classId, className, confidence, box]: detections) { + for (auto const& [classId, className, confidence, box] : detections) { // Resize from blob space to image space cv::Point2f centerInBlob = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; float xRatio = static_cast(msg->width) / static_cast(imageSize.width); @@ -90,10 +73,10 @@ namespace mrover { // Push the immediate detections to the camera frame SE3Conversions::pushToTfTree(*mTfBroadcaster, objectImmediateFrame, mCameraFrame, objectInCamera.value(), get_clock()->now()); // Since the object is seen we need to increment the hit counter - mObjectHitCounts[classId] = std::min(mObjMaxHitcount, mObjectHitCounts[classId] + mObjIncrementWeight); + mModel.objectHitCounts[classId] = std::min(mObjMaxHitcount, mModel.objectHitCounts[classId] + mObjIncrementWeight); // Only publish to permament if we are confident in the object - if (mObjectHitCounts[classId] > mObjHitThreshold) { + if (mModel.objectHitCounts[classId] > mObjHitThreshold) { std::string objectPermanentFrame = className; // Grab the object inside of the camera frame and push it into the map frame SE3d objectInMap = SE3Conversions::fromTfTree(*mTfBuffer, objectImmediateFrame, mWorldFrame); @@ -114,7 +97,7 @@ namespace mrover { if (seenObjects[i]) continue; assert(i < mObjectHitCounts.size()); - mObjectHitCounts[i] = std::max(0, mObjectHitCounts[i] - mObjDecrementWeight); + mModel.objectHitCounts[i] = std::max(0, mModel.objectHitCounts[i] - mObjDecrementWeight); } } diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index 7ab5af34..026f6020 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include From 816bed3d9272461e8576f8e7451e92c2d0e522b3 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Thu, 26 Sep 2024 11:42:45 -0400 Subject: [PATCH 11/12] Works on zed --- perception/object_detector/object_detector.hpp | 7 ++++--- perception/object_detector/object_detector.parsing.cpp | 7 +++++-- perception/object_detector/object_detector.processing.cpp | 2 +- tensorrt/inference_wrapper.cpp | 4 ++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index a605059a..766bee26 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -18,17 +18,18 @@ namespace mrover { std::vector outputTensorSize; + // Additional space for params that one algorithm might need and not others std::vector buffer; // Converts an rgb image to a blob - std::function rbgImageToBlob; + std::function rbgImageToBlob; // Converts an output tensor into a vector of detections std::function&)> outputTensorToDetections; Model() = default; - Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} }; static constexpr char const* NODE_NAME = "object_detector"; @@ -74,7 +75,7 @@ namespace mrover { cv::Mat& output, std::vector& detections) -> void; - auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void; + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index 7aab71ec..7daf4aaa 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,11 +1,14 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void{ - cv::Mat blobSizedImage; + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void{ if (model.inputTensorSize.size() != 4) { throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } + if (model.buffer.size() != 2){ + throw std::runtime_error("Expected 2 additional parameters!"); + } + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index ad2010ac..434de661 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -24,7 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage); + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index d5f9e2f0..abd1a897 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -28,3 +28,7 @@ auto InferenceWrapper::getOutputTensor() const -> cv::Mat { auto InferenceWrapper::getInputTensorSize() -> std::vector{ return mInference->getInputTensorSize(); } + +auto InferenceWrapper::getOutputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); +} From 98283f0f3f9d4652fc322dbeeba03db7e04e00e9 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Fri, 27 Sep 2024 10:38:49 -0400 Subject: [PATCH 12/12] Style fixes --- .../object_detector/object_detector.cpp | 8 ++-- .../object_detector/object_detector.hpp | 40 +++++++++---------- .../object_detector.parsing.cpp | 24 +++++------ .../object_detector.processing.cpp | 8 ++-- perception/object_detector/pch.hpp | 2 +- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 3fe57d43..545a0d96 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -4,7 +4,7 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { - std::string modelName; + std::string modelName; float modelScoreThreshold{}; float modelNMSThreshold{}; @@ -20,8 +20,8 @@ namespace mrover { {"model_nms_threshold", modelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); - - // All of these variables will be invalidated after calling this function + + // All of these variables will be invalidated after calling this function std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; @@ -32,7 +32,7 @@ namespace mrover { // Initialize TensorRT Inference Object and Get Important Output Information mTensorRT = TensortRT{modelName, packagePath.string()}; - mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 766bee26..08893b24 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,30 +7,30 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: - struct Model { - std::string modelName; + struct Model { + std::string modelName; - std::vector objectHitCounts; + std::vector objectHitCounts; - std::vector classes; + std::vector classes; - std::vector inputTensorSize; + std::vector inputTensorSize; - std::vector outputTensorSize; + std::vector outputTensorSize; - // Additional space for params that one algorithm might need and not others - std::vector buffer; - - // Converts an rgb image to a blob - std::function rbgImageToBlob; + // Additional space for params that one algorithm might need and not others + std::vector buffer; - // Converts an output tensor into a vector of detections - std::function&)> outputTensorToDetections; + // Converts an rgb image to a blob + std::function rbgImageToBlob; - Model() = default; + // Converts an output tensor into a vector of detections + std::function&)> outputTensorToDetections; - Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} - }; + Model() = default; + + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + }; static constexpr char const* NODE_NAME = "object_detector"; @@ -38,7 +38,7 @@ namespace mrover { std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); - Model mModel; + Model mModel; std::string mCameraFrame; std::string mWorldFrame; @@ -72,10 +72,10 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; auto static parseYOLOv8Output(Model const& model, - cv::Mat& output, - std::vector& detections) -> void; + cv::Mat& output, + std::vector& detections) -> void; - auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index 7daf4aaa..dc7d7167 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,20 +1,20 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void{ - if (model.inputTensorSize.size() != 4) { - throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); - } + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { + if (model.inputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } - if (model.buffer.size() != 2){ - throw std::runtime_error("Expected 2 additional parameters!"); - } + if (model.buffer.size() != 2) { + throw std::runtime_error("Expected 2 additional parameters!"); + } - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; - cv::resize(rgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); - } + cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; + cv::resize(rgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + } auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { // Parse model specific dimensioning from the output diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 434de661..29a3b9ba 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -24,7 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); @@ -32,8 +32,8 @@ namespace mrover { std::vector detections{}; cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - - mModel.outputTensorToDetections(mModel, outputTensor, detections); + + mModel.outputTensorToDetections(mModel, outputTensor, detections); mLoopProfiler.measureEvent("Execution"); @@ -52,7 +52,7 @@ namespace mrover { // Set of flags indicating if the given object has been seen // TODO(quintin): Do not hard code exactly two classes std::bitset<2> seenObjects{0b00}; - for (auto const& [classId, className, confidence, box] : detections) { + for (auto const& [classId, className, confidence, box]: detections) { // Resize from blob space to image space cv::Point2f centerInBlob = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; float xRatio = static_cast(msg->width) / static_cast(imageSize.width); diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index 026f6020..ea7badcf 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include