From c7e7a48cf9bc5679dc3cf71675c7bc7ffddf2958 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 11:34:45 -0400 Subject: [PATCH 01/25] Get blob dimensions from model --- .../object_detector/object_detector.cpp | 3 +++ .../object_detector/object_detector.hpp | 10 +++++++++- .../object_detector.processing.cpp | 18 ++++++++++++++---- tensorrt/inference.cu | 19 +++++++++++++++++++ tensorrt/inference.cuh | 5 +++++ tensorrt/inference_wrapper.cpp | 4 ++++ tensorrt/inference_wrapper.hpp | 2 ++ tensorrt/tensorrt.cpp | 4 ++++ tensorrt/tensorrt.hpp | 2 ++ 9 files changed, 62 insertions(+), 5 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index dc7f91e6..ad96761f 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -23,7 +23,10 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); + // Initialize TensorRT Inference Object and Get Important Output Information + mModelType = MODEL_TYPE::YOLOv8; mTensorRT = TensortRT{mModelName, packagePath.string()}; + mBlobSize = mTensorRT.getInputBlobSize(); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 28ebe502..cc4056e6 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,22 +7,30 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: + enum MODEL_TYPE { + YOLOv8 = 0, + }; + static constexpr char const* NODE_NAME = "object_detector"; std::unique_ptr mTfBuffer = std::make_unique(get_clock()); std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); + // TF Frames std::string mCameraFrame; std::string mWorldFrame; + // Model Member Variables + MODEL_TYPE mModelType; std::string mModelName; + cv::Mat mRgbImage, mImageBlob; + std::vector mBlobSize; LoopProfiler mLoopProfiler; TensortRT mTensorRT; - cv::Mat mRgbImage, mImageBlob; sensor_msgs::msg::Image mDetectionsImageMessage; rclcpp::Publisher::SharedPtr mDebugImgPub; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 4be823b4..f7e48776 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -16,11 +16,21 @@ namespace mrover { } convertPointCloudToRGB(msg, mRgbImage); - // TODO(quintin): Avoid hard coding blob size - cv::Size blobSize{640, 640}; + + // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + switch(mModelType){ + case MODEL_TYPE::YOLOv8: + { + if(mBlobSize.size() != 4){ + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + cv::Size blobSize{static_cast(mBlobSize[2]), static_cast(mBlobSize[3])}; + cv::resize(mRgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + break; + } + } mLoopProfiler.measureEvent("Conversion"); diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index 17bda4c3..b0582bd6 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -170,3 +170,22 @@ auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { // Returns the id for the input tensor return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise } + + +auto Inference::getInputBlobSize() -> std::vector{ + auto dims = mEngine->getTensorShape(INPUT_BINDING_NAME); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); + } + + for(int i = 0; i < dims.nbDims; ++i){ + std::array message; + std::snprintf(message.data(), message.size(), "Blob Size %d %d", i, dims.d[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + return inputBlobSize; +} diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index d0b65469..736d770e 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -72,4 +72,9 @@ public: * @brief Retrieves the mat where the output from the forward pass was stored */ auto getOutputTensor() -> cv::Mat; + + /** + * @brief Retrives the expected input matrix size + */ + auto getInputBlobSize() -> std::vector; }; diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index d5849095..eeb504f0 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -24,3 +24,7 @@ auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { auto InferenceWrapper::getOutputTensor() const -> cv::Mat { return mInference->getOutputTensor(); } + +auto InferenceWrapper::getInputBlobSize() -> std::vector{ + return mInference->getInputBlobSize(); +} diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index f9d83b99..be9b1eb8 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -18,4 +18,6 @@ class InferenceWrapper { // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; + + auto getInputBlobSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 287b3f2a..5319c328 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -98,3 +98,7 @@ auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detect detections.push_back(result); } } + +auto TensortRT::getInputBlobSize() -> std::vector{ + return mInferenceWrapper.getInputBlobSize(); +} diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 6d695636..70073a97 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -32,4 +32,6 @@ class TensortRT { std::vector& detections, float modelScoreThreshold = 0.75, float modelNMSThreshold = 0.5) const -> void; + + auto getInputBlobSize() -> std::vector; }; From 9eab3dfab5c5613b91b2d8803ef2be56467a3910 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 11:37:06 -0400 Subject: [PATCH 02/25] More Verbose --- perception/object_detector/object_detector.processing.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index f7e48776..5564a853 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -25,9 +25,12 @@ namespace mrover { if(mBlobSize.size() != 4){ throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + cv::Size blobSize{static_cast(mBlobSize[2]), static_cast(mBlobSize[3])}; cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); break; } } From 071bce0ac60d2b1d4b57d704c95b2f0af2e530a0 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 12:36:25 -0400 Subject: [PATCH 03/25] Better Naming --- .../object_detector/object_detector.cpp | 2 +- .../object_detector/object_detector.hpp | 2 +- .../object_detector.processing.cpp | 4 +- tensorrt/inference.cu | 53 ++++++++++++++----- tensorrt/inference.cuh | 11 +++- tensorrt/inference_wrapper.cpp | 4 +- tensorrt/inference_wrapper.hpp | 2 +- tensorrt/tensorrt.cpp | 4 +- tensorrt/tensorrt.hpp | 2 +- 9 files changed, 59 insertions(+), 25 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index ad96761f..f59e33dd 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -26,7 +26,7 @@ namespace mrover { // Initialize TensorRT Inference Object and Get Important Output Information mModelType = MODEL_TYPE::YOLOv8; mTensorRT = TensortRT{mModelName, packagePath.string()}; - mBlobSize = mTensorRT.getInputBlobSize(); + mInputTensorSize = mTensorRT.getInputTensorSize(); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index cc4056e6..29bcd264 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -25,7 +25,7 @@ namespace mrover { MODEL_TYPE mModelType; std::string mModelName; cv::Mat mRgbImage, mImageBlob; - std::vector mBlobSize; + std::vector mInputTensorSize; LoopProfiler mLoopProfiler; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 5564a853..881b6be1 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -22,13 +22,13 @@ namespace mrover { switch(mModelType){ case MODEL_TYPE::YOLOv8: { - if(mBlobSize.size() != 4){ + if(mInputTensorSize.size() != 4){ throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - cv::Size blobSize{static_cast(mBlobSize[2]), static_cast(mBlobSize[3])}; + cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; cv::resize(mRgbImage, blobSizedImage, blobSize); cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); break; diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index b0582bd6..cc5a8eb3 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -11,8 +11,6 @@ using namespace nvinfer1; * Requires bindings, inputTensor, stream * Modifies stream, outputTensor */ -constexpr static auto INPUT_BINDING_NAME = "images"; -constexpr static auto OUTPUT_BINDING_NAME = "output0"; Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} { mModelPath = onnxModelPath.string(); @@ -25,8 +23,32 @@ Inference::Inference(std::filesystem::path const& onnxModelPath, std::string con // Check some assumptions about the model if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count"); - if (mEngine->getTensorIOMode(INPUT_BINDING_NAME) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); - if (mEngine->getTensorIOMode(OUTPUT_BINDING_NAME) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Input Binding Input To Be 1"); + + // Store the IO Tensor Names + mInputTensorName = mEngine->getIOTensorName(0); + mOutputTensorName = mEngine->getIOTensorName(1); + + if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); + if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1"); + + // Be verbose about the input tensor size + auto inputTensorSize = getInputTensorSize(); + std::array message; + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < inputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + // Be verbose about the input tensor size + auto outputTensorSize = getInputTensorSize(); + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mOutputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < outputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } createExecutionContext(); @@ -99,7 +121,7 @@ auto Inference::createExecutionContext() -> void { if (!mContext) throw std::runtime_error("Failed to create execution context"); // Set up the input tensor sizing - mContext->setInputShape(INPUT_BINDING_NAME, mEngine->getTensorShape(INPUT_BINDING_NAME)); + mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str())); } auto Inference::doDetections(cv::Mat const& img) const -> void { @@ -144,7 +166,6 @@ auto Inference::prepTensors() -> void { for(int32_t i = 0; i < rank; ++i){ size *= extents[i]; } - std::cout << tensorName << " is getting allocated to size " << size << std::endl; // Create GPU memory for TensorRT to operate on if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess) @@ -153,7 +174,7 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor - auto const [nbDims, d] = mEngine->getTensorShape(OUTPUT_BINDING_NAME); + auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { std::array message; std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); @@ -172,8 +193,8 @@ auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { } -auto Inference::getInputBlobSize() -> std::vector{ - auto dims = mEngine->getTensorShape(INPUT_BINDING_NAME); +auto Inference::getInputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mInputTensorName.c_str()); std::vector inputBlobSize; inputBlobSize.reserve(dims.nbDims); @@ -181,10 +202,16 @@ auto Inference::getInputBlobSize() -> std::vector{ inputBlobSize.push_back(dims.d[i]); } - for(int i = 0; i < dims.nbDims; ++i){ - std::array message; - std::snprintf(message.data(), message.size(), "Blob Size %d %d", i, dims.d[i]); - mLogger.log(ILogger::Severity::kINFO, message.data()); + return inputBlobSize; +} + +auto Inference::getOutputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mOutputTensorName.c_str()); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); } return inputBlobSize; diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index 736d770e..29c6d4a8 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -21,7 +21,9 @@ class Inference { std::unique_ptr mContext{}; cv::Mat mInputTensor; + std::string mInputTensorName; cv::Mat mOutputTensor; + std::string mOutputTensorName; std::array mBindings{}; @@ -74,7 +76,12 @@ public: auto getOutputTensor() -> cv::Mat; /** - * @brief Retrives the expected input matrix size + * @brief Retrives the expected input tensor size */ - auto getInputBlobSize() -> std::vector; + auto getInputTensorSize() -> std::vector; + + /** + * @brief Retrives the expected input tensor size + */ + auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index eeb504f0..0a52944c 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -25,6 +25,6 @@ auto InferenceWrapper::getOutputTensor() const -> cv::Mat { return mInference->getOutputTensor(); } -auto InferenceWrapper::getInputBlobSize() -> std::vector{ - return mInference->getInputBlobSize(); +auto InferenceWrapper::getInputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); } diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index be9b1eb8..3d9c3aeb 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -19,5 +19,5 @@ class InferenceWrapper { // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; - auto getInputBlobSize() -> std::vector; + auto getInputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 5319c328..5e122edd 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -99,6 +99,6 @@ auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detect } } -auto TensortRT::getInputBlobSize() -> std::vector{ - return mInferenceWrapper.getInputBlobSize(); +auto TensortRT::getInputTensorSize() -> std::vector{ + return mInferenceWrapper.getInputTensorSize(); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 70073a97..d7874ba7 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -33,5 +33,5 @@ class TensortRT { float modelScoreThreshold = 0.75, float modelNMSThreshold = 0.5) const -> void; - auto getInputBlobSize() -> std::vector; + auto getInputTensorSize() -> std::vector; }; From 1f7bcd817b525bb22ad4b25f282ad6f7cf5870b9 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 23 Sep 2024 12:38:19 -0400 Subject: [PATCH 04/25] Better Naming --- tensorrt/inference.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index cc5a8eb3..d74457aa 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -174,6 +174,7 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor + // TODO: Fix this auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { std::array message; From 5024098b38e74312a6dd6ec1f0b7da90f4fa0b2e Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 13:58:45 -0400 Subject: [PATCH 05/25] Improved --- tensorrt/inference.cu | 31 +++++++++++++++++++------------ tensorrt/inference.cuh | 9 ++++++--- tensorrt/inference_wrapper.cpp | 4 ++-- tensorrt/inference_wrapper.hpp | 4 +++- tensorrt/tensorrt.cpp | 8 +++++--- tensorrt/tensorrt.hpp | 2 ++ 6 files changed, 37 insertions(+), 21 deletions(-) diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index d74457aa..6b4463d5 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -12,11 +12,21 @@ using namespace nvinfer1; * Modifies stream, outputTensor */ -Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} { - mModelPath = onnxModelPath.string(); +Inference::Inference(std::string modelName, std::string packagePathString) : mModelName{std::move(modelName)}, mPackagePath{std::move(packagePathString)}{ + + //Create ONNX and engine file paths + + mLogger.log(ILogger::Severity::kINFO, mModelName.c_str()); + mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str()); + mONNXModelPath = std::filesystem::path{packagePathString} / "data" / std::string(mModelName + ".onnx"); + mEngineModelPath = std::filesystem::path{packagePathString} / "data" / std::string("tensorrt-engine-" + mModelName + ".engine");; + + std::array message{}; + std::snprintf(message.data(), message.size(), "Reading from ONNX model at %s and creating TensorRT engine at %s", mONNXModelPath.c_str(), mEngineModelPath.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); // Create the engine object from either the file or from onnx file - mEngine = std::unique_ptr{createCudaEngine(onnxModelPath, modelName)}; + mEngine = std::unique_ptr{createCudaEngine()}; if (!mEngine) throw std::runtime_error("Failed to create CUDA engine"); mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine"); @@ -33,7 +43,6 @@ Inference::Inference(std::filesystem::path const& onnxModelPath, std::string con // Be verbose about the input tensor size auto inputTensorSize = getInputTensorSize(); - std::array message; std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); mLogger.log(ILogger::Severity::kINFO, message.data()); for(size_t i = 0; i < inputTensorSize.size(); ++i){ @@ -55,7 +64,7 @@ Inference::Inference(std::filesystem::path const& onnxModelPath, std::string con prepTensors(); } -auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine* { +auto Inference::createCudaEngine() -> ICudaEngine* { constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); std::unique_ptr builder{createInferBuilder(mLogger)}; @@ -74,17 +83,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std if (!config) throw std::runtime_error("Failed to create Builder Config"); mLogger.log(ILogger::Severity::kINFO, "Created Builder Config"); - if (!parser->parseFromFile(onnxModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { + if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { throw std::runtime_error("Failed to parse ONNX file"); } IRuntime* runtime = createInferRuntime(mLogger); // Define the engine file location relative to the mrover package - std::filesystem::path packagePath{mPackagePath}; - std::filesystem::path enginePath = packagePath / "data" / std::string("tensorrt-engine-").append(modelName).append(".engine"); // Check if engine file exists - if (!exists(enginePath)) { + if (!exists(mEngineModelPath)) { std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl; // Create the Engine from onnx file @@ -97,15 +104,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std // Save Engine to File auto trtModelStream = tempEng->serialize(); - std::ofstream outputFileStream{enginePath, std::ios::binary}; - outputFileStream.write(static_cast(trtModelStream->data()), trtModelStream->size()); + std::ofstream outputFileStream{mEngineModelPath, std::ios::binary}; + outputFileStream.write(static_cast(trtModelStream->data()), static_cast(trtModelStream->size())); outputFileStream.close(); return tempEng; } // Load engine from file - std::ifstream inputFileStream{enginePath, std::ios::binary}; + std::ifstream inputFileStream{mEngineModelPath, std::ios::binary}; std::stringstream engineBuffer; // Stream in the engine file to the buffer diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index 29c6d4a8..0df14ef0 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -11,9 +11,12 @@ using nvinfer1::IExecutionContext; class Inference { - std::string mModelPath; + std::string mModelName; std::string mPackagePath; + std::filesystem::path mONNXModelPath; + std::filesystem::path mEngineModelPath; + nvinfer1::Logger mLogger; std::unique_ptr mEngine{}; @@ -37,7 +40,7 @@ class Inference { * * Uses the file path to locate the onnx model and the modelName to locate the engine file */ - auto createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine*; + auto createCudaEngine() -> ICudaEngine*; /** * @brief Performs the Model forward pass and allocates the reults in the output tensor @@ -63,7 +66,7 @@ class Inference { auto createExecutionContext() -> void; public: - explicit Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString); + explicit Inference(std::string modelName, std::string packagePathString); /** * @brief Runs the forward pass on the given input image in CNN format diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index 0a52944c..d5f9e2f0 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -12,8 +12,8 @@ using namespace nvinfer1; * Modifies stream, outputTensor */ -InferenceWrapper::InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath) { - mInference.reset(new Inference(std::move(onnxModelPath), modelName, packagePath)); +InferenceWrapper::InferenceWrapper(std::string const& modelName, std::string const& packagePath) { + mInference = std::make_shared(modelName, packagePath); } auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index 3d9c3aeb..954a9851 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -12,7 +12,7 @@ class InferenceWrapper { ~InferenceWrapper() = default; - explicit InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath); + explicit InferenceWrapper(std::string const& modelName, std::string const& packagePath); auto doDetections(cv::Mat const& img) const -> void; @@ -20,4 +20,6 @@ class InferenceWrapper { [[nodiscard]] auto getOutputTensor() const -> cv::Mat; auto getInputTensorSize() -> std::vector; + + auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 5e122edd..f1d67415 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -7,10 +7,8 @@ TensortRT::TensortRT() = default; TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} { std::filesystem::path packagePath{std::move(packagePathString)}; - std::filesystem::path modelFileName = mModelName.append(".onnx"); - std::filesystem::path modelPath = packagePath / "data" / modelFileName; - mInferenceWrapper = InferenceWrapper{modelPath, mModelName, packagePath}; + mInferenceWrapper = InferenceWrapper{mModelName, packagePath}; } TensortRT::~TensortRT() = default; @@ -102,3 +100,7 @@ auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detect auto TensortRT::getInputTensorSize() -> std::vector{ return mInferenceWrapper.getInputTensorSize(); } + +auto TensortRT::getOutputTensorSize() -> std::vector{ + return mInferenceWrapper.getOutputTensorSize(); +} diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index d7874ba7..72be9dbf 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -34,4 +34,6 @@ class TensortRT { float modelNMSThreshold = 0.5) const -> void; auto getInputTensorSize() -> std::vector; + + auto getOutputTensorSize() -> std::vector; }; From 32b6b598cfa070f935c649ed04760da16d8fc3c6 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 14:30:35 -0400 Subject: [PATCH 06/25] Refactored to remove parsing from tensorrt --- .../object_detector/object_detector.cpp | 4 +- .../object_detector/object_detector.hpp | 9 +- .../object_detector.parsing.cpp | 83 ++++++++++++++++++ .../object_detector.processing.cpp | 11 ++- tensorrt/tensorrt.cpp | 85 +------------------ tensorrt/tensorrt.hpp | 10 +-- 6 files changed, 106 insertions(+), 96 deletions(-) create mode 100644 perception/object_detector/object_detector.parsing.cpp diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index f59e33dd..1e55f6f6 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -13,7 +13,7 @@ namespace mrover { {"hitcount_max", mObjMaxHitcount, 10}, {"model_name", mModelName, "Large-Dataset"}, {"model_score_threshold", mModelScoreThreshold, 0.75}, - {"model_nms_threshold", mModelNmsThreshold, 0.5}}; + {"model_nms_threshold", mModelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); @@ -30,7 +30,7 @@ namespace mrover { mDebugImgPub = create_publisher("object_detector/debug_img", 1); - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold)); + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 29bcd264..2c12c312 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -38,13 +38,14 @@ namespace mrover { // TODO(quintin): Do not hard code exactly two classes std::vector mObjectHitCounts{0, 0}; + std::vector mClasses{"bottle", "hammer"}; int mObjIncrementWeight{}; int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; float mModelScoreThreshold{}; - float mModelNmsThreshold{}; + float mModelNMSThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -58,6 +59,12 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; + auto static parseYOLOv8Output(cv::Mat& output, + std::vector& detections, + std::vector const& classes, + float modelScoreThreshold = 0.75, + float modelNMSThreshold = 0.5) -> void; + public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp new file mode 100644 index 00000000..a4a98b0c --- /dev/null +++ b/perception/object_detector/object_detector.parsing.cpp @@ -0,0 +1,83 @@ +#include "object_detector.hpp" +namespace mrover { + auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { + // Parse model specific dimensioning from the output + + // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows + if (output.cols <= output.rows) { + std::array message{}; + snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); + throw std::runtime_error(message.data()); + } + + int rows = output.cols; + int dimensions = output.rows; + + // The output of the model is a batchSizex84x8400 matrix + // This converts the model to a TODO: Check this dimensioning + output = output.reshape(1, dimensions); + cv::transpose(output, output); + + // This function expects the image to already be in the correct format thus no distrotion is needed + float const xFactor = 1.0; + float const yFactor = 1.0; + + // Intermediate Storage Containers + std::vector classIds; + std::vector confidences; + std::vector boxes; + + // Each row of the output is a detection with a bounding box and associated class scores + for (int r = 0; r < rows; ++r) { + // Skip first four values as they are the box data + cv::Mat scores = output.row(r).colRange(4, dimensions); + + cv::Point classId; + double maxClassScore; + cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); + + // Determine if the class is an acceptable confidence level, otherwise disregard + if (maxClassScore <= modelScoreThreshold) continue; + + confidences.push_back(static_cast(maxClassScore)); + classIds.push_back(classId.x); + + // Get the bounding box data + cv::Mat box = output.row(r).colRange(0, 4); + auto x = box.at(0); + auto y = box.at(1); + auto w = box.at(2); + auto h = box.at(3); + + // Cast the corners into integers to be used on pixels + auto left = static_cast((x - 0.5 * w) * xFactor); + auto top = static_cast((y - 0.5 * h) * yFactor); + auto width = static_cast(w * xFactor); + auto height = static_cast(h * yFactor); + + // Push abck the box into storage + boxes.emplace_back(left, top, width, height); + } + + //Coalesce the boxes into a smaller number of distinct boxes + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); + + // Fill in the output Detections Vector + for (int i: nmsResult) { + Detection result; + + //Fill in the id and confidence for the class + result.classId = classIds[i]; + result.confidence = confidences[i]; + + //Fill in the class name and box information + result.className = classes[result.classId]; + result.box = boxes[i]; + + //Push back the detection into the for storagevector + detections.push_back(result); + } + } + +}; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 881b6be1..39c56252 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -14,8 +14,13 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), std::format("Image size changed from [{}, {}] to [{}, {}]", mRgbImage.cols, mRgbImage.rows, msg->width, msg->height)); mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } - convertPointCloudToRGB(msg, mRgbImage); + // If 0x0 image + if(!mRgbImage.total()){ + return; + } + + convertPointCloudToRGB(msg, mRgbImage); // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; @@ -39,7 +44,9 @@ namespace mrover { // Run the blob through the model std::vector detections{}; - mTensorRT.modelForwardPass(mImageBlob, detections, mModelScoreThreshold, mModelNmsThreshold); + cv::Mat outputTensor; + mTensorRT.modelForwardPass(mImageBlob, outputTensor); + ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); mLoopProfiler.measureEvent("Execution"); diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index f1d67415..0f6c157d 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -5,7 +5,6 @@ using namespace std; TensortRT::TensortRT() = default; TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} { - std::filesystem::path packagePath{std::move(packagePathString)}; mInferenceWrapper = InferenceWrapper{mModelName, packagePath}; @@ -13,89 +12,11 @@ TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{st TensortRT::~TensortRT() = default; -auto TensortRT::modelForwardPass(cv::Mat const& blob, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - mInferenceWrapper.doDetections(blob); - cv::Mat output = mInferenceWrapper.getOutputTensor(); - parseModelOutput(output, detections, modelScoreThreshold, modelNMSThreshold); +auto TensortRT::modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void { + mInferenceWrapper.doDetections(inputBlobTensor); + outputTensor = mInferenceWrapper.getOutputTensor(); } -auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - // Parse model specific dimensioning from the output - - // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows - if (output.cols <= output.rows) { - throw std::runtime_error("Something is wrong Model with interpretation"); - } - - int rows = output.cols; - int dimensions = output.rows; - - // The output of the model is a batchSizex84x8400 matrix - // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); - cv::transpose(output, output); - - // This function expects the image to already be in the correct format thus no distrotion is needed - float const xFactor = 1.0; - float const yFactor = 1.0; - - // Intermediate Storage Containers - std::vector classIds; - std::vector confidences; - std::vector boxes; - - // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { - // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); - - cv::Point classId; - double maxClassScore; - cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); - - // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; - - confidences.push_back(static_cast(maxClassScore)); - classIds.push_back(classId.x); - - // Get the bounding box data - cv::Mat box = output.row(r).colRange(0, 4); - auto x = box.at(0); - auto y = box.at(1); - auto w = box.at(2); - auto h = box.at(3); - - // Cast the corners into integers to be used on pixels - auto left = static_cast((x - 0.5 * w) * xFactor); - auto top = static_cast((y - 0.5 * h) * yFactor); - auto width = static_cast(w * xFactor); - auto height = static_cast(h * yFactor); - - // Push abck the box into storage - boxes.emplace_back(left, top, width, height); - } - - //Coalesce the boxes into a smaller number of distinct boxes - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); - - // Fill in the output Detections Vector - for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = classes[result.classId]; - result.box = boxes[i]; - - //Push back the detection into the for storagevector - detections.push_back(result); - } -} auto TensortRT::getInputTensorSize() -> std::vector{ return mInferenceWrapper.getInputTensorSize(); diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 72be9dbf..eb02f707 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -12,14 +12,9 @@ struct Detection { class TensortRT { std::string mModelName; - std::vector classes{"bottle", "hammer"}; InferenceWrapper mInferenceWrapper; - auto parseModelOutput(cv::Mat& output, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; public: TensortRT(); @@ -28,10 +23,7 @@ class TensortRT { ~TensortRT(); - auto modelForwardPass(cv::Mat const& blob, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; + auto modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void; auto getInputTensorSize() -> std::vector; From 35525b4d7f06c5671961a595dadad345bec18251 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 14:50:48 -0400 Subject: [PATCH 07/25] Style Checking --- .../object_detector/object_detector.cpp | 6 +- .../object_detector/object_detector.hpp | 26 +-- .../object_detector.parsing.cpp | 162 +++++++++--------- .../object_detector.processing.cpp | 45 +++-- scripts/visualizer.py | 2 +- tensorrt/inference_wrapper.hpp | 4 +- tensorrt/tensorrt.cpp | 1 - tensorrt/tensorrt.hpp | 4 +- 8 files changed, 124 insertions(+), 126 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 1e55f6f6..efd9836d 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -23,10 +23,10 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); - // Initialize TensorRT Inference Object and Get Important Output Information - mModelType = MODEL_TYPE::YOLOv8; + // Initialize TensorRT Inference Object and Get Important Output Information + mModelType = MODEL_TYPE::YOLOv8; mTensorRT = TensortRT{mModelName, packagePath.string()}; - mInputTensorSize = mTensorRT.getInputTensorSize(); + mInputTensorSize = mTensorRT.getInputTensorSize(); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 2c12c312..8898584a 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,9 +7,9 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: - enum MODEL_TYPE { - YOLOv8 = 0, - }; + enum MODEL_TYPE { + YOLOv8 = 0, + }; static constexpr char const* NODE_NAME = "object_detector"; @@ -17,15 +17,15 @@ namespace mrover { std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); - // TF Frames + // TF Frames std::string mCameraFrame; std::string mWorldFrame; - // Model Member Variables - MODEL_TYPE mModelType; + // Model Member Variables + MODEL_TYPE mModelType; std::string mModelName; cv::Mat mRgbImage, mImageBlob; - std::vector mInputTensorSize; + std::vector mInputTensorSize; LoopProfiler mLoopProfiler; @@ -38,7 +38,7 @@ namespace mrover { // TODO(quintin): Do not hard code exactly two classes std::vector mObjectHitCounts{0, 0}; - std::vector mClasses{"bottle", "hammer"}; + std::vector mClasses{"bottle", "hammer"}; int mObjIncrementWeight{}; int mObjDecrementWeight{}; @@ -59,11 +59,11 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; - auto static parseYOLOv8Output(cv::Mat& output, - std::vector& detections, - std::vector const& classes, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) -> void; + auto static parseYOLOv8Output(cv::Mat& output, + std::vector& detections, + std::vector const& classes, + float modelScoreThreshold = 0.75, + float modelNMSThreshold = 0.5) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index a4a98b0c..358e0441 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,83 +1,83 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { - // Parse model specific dimensioning from the output - - // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows - if (output.cols <= output.rows) { - std::array message{}; - snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); - throw std::runtime_error(message.data()); - } - - int rows = output.cols; - int dimensions = output.rows; - - // The output of the model is a batchSizex84x8400 matrix - // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); - cv::transpose(output, output); - - // This function expects the image to already be in the correct format thus no distrotion is needed - float const xFactor = 1.0; - float const yFactor = 1.0; - - // Intermediate Storage Containers - std::vector classIds; - std::vector confidences; - std::vector boxes; - - // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { - // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); - - cv::Point classId; - double maxClassScore; - cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); - - // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; - - confidences.push_back(static_cast(maxClassScore)); - classIds.push_back(classId.x); - - // Get the bounding box data - cv::Mat box = output.row(r).colRange(0, 4); - auto x = box.at(0); - auto y = box.at(1); - auto w = box.at(2); - auto h = box.at(3); - - // Cast the corners into integers to be used on pixels - auto left = static_cast((x - 0.5 * w) * xFactor); - auto top = static_cast((y - 0.5 * h) * yFactor); - auto width = static_cast(w * xFactor); - auto height = static_cast(h * yFactor); - - // Push abck the box into storage - boxes.emplace_back(left, top, width, height); - } - - //Coalesce the boxes into a smaller number of distinct boxes - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); - - // Fill in the output Detections Vector - for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = classes[result.classId]; - result.box = boxes[i]; - - //Push back the detection into the for storagevector - detections.push_back(result); - } - } - -}; + auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { + // Parse model specific dimensioning from the output + + // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows + if (output.cols <= output.rows) { + std::array message{}; + snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); + throw std::runtime_error(message.data()); + } + + int rows = output.cols; + int dimensions = output.rows; + + // The output of the model is a batchSizex84x8400 matrix + // This converts the model to a TODO: Check this dimensioning + output = output.reshape(1, dimensions); + cv::transpose(output, output); + + // This function expects the image to already be in the correct format thus no distrotion is needed + float const xFactor = 1.0; + float const yFactor = 1.0; + + // Intermediate Storage Containers + std::vector classIds; + std::vector confidences; + std::vector boxes; + + // Each row of the output is a detection with a bounding box and associated class scores + for (int r = 0; r < rows; ++r) { + // Skip first four values as they are the box data + cv::Mat scores = output.row(r).colRange(4, dimensions); + + cv::Point classId; + double maxClassScore; + cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); + + // Determine if the class is an acceptable confidence level, otherwise disregard + if (maxClassScore <= modelScoreThreshold) continue; + + confidences.push_back(static_cast(maxClassScore)); + classIds.push_back(classId.x); + + // Get the bounding box data + cv::Mat box = output.row(r).colRange(0, 4); + auto x = box.at(0); + auto y = box.at(1); + auto w = box.at(2); + auto h = box.at(3); + + // Cast the corners into integers to be used on pixels + auto left = static_cast((x - 0.5 * w) * xFactor); + auto top = static_cast((y - 0.5 * h) * yFactor); + auto width = static_cast(w * xFactor); + auto height = static_cast(h * yFactor); + + // Push abck the box into storage + boxes.emplace_back(left, top, width, height); + } + + //Coalesce the boxes into a smaller number of distinct boxes + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); + + // Fill in the output Detections Vector + for (int i: nmsResult) { + Detection result; + + //Fill in the id and confidence for the class + result.classId = classIds[i]; + result.confidence = confidences[i]; + + //Fill in the class name and box information + result.className = classes[result.classId]; + result.box = boxes[i]; + + //Push back the detection into the for storagevector + detections.push_back(result); + } + } + +}; // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 39c56252..bb5f1506 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -15,38 +15,37 @@ namespace mrover { mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } - // If 0x0 image - if(!mRgbImage.total()){ - return; - } + // If 0x0 image + if (!mRgbImage.total()) { + return; + } convertPointCloudToRGB(msg, mRgbImage); - - // Convert the RGB Image into the blob Image format + + // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - switch(mModelType){ - case MODEL_TYPE::YOLOv8: - { - if(mInputTensorSize.size() != 4){ - throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); - } - - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - - cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); - break; - } - } + switch (mModelType) { + case MODEL_TYPE::YOLOv8: { + if (mInputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + + cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; + cv::resize(mRgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + break; + } + } mLoopProfiler.measureEvent("Conversion"); // Run the blob through the model std::vector detections{}; - cv::Mat outputTensor; + cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); + ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); mLoopProfiler.measureEvent("Execution"); diff --git a/scripts/visualizer.py b/scripts/visualizer.py index 5aae7a5d..336f9e67 100755 --- a/scripts/visualizer.py +++ b/scripts/visualizer.py @@ -112,7 +112,7 @@ def paintEvent(self, event): self.resize(self.renderer.defaultSize()) self.renderer.render(painter) - def update(self): + def update(self): # type: ignore[override] rclpy.spin_once(self.viz) with self.state_machine.mutex: if self.graph is None or self.state_machine.needs_redraw: diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index 954a9851..d6e15a36 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -19,7 +19,7 @@ class InferenceWrapper { // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; - auto getInputTensorSize() -> std::vector; + [[nodiscard]] auto getInputTensorSize() -> std::vector; - auto getOutputTensorSize() -> std::vector; + [[nodiscard]] auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 0f6c157d..2c4dc3ff 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -17,7 +17,6 @@ auto TensortRT::modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& output outputTensor = mInferenceWrapper.getOutputTensor(); } - auto TensortRT::getInputTensorSize() -> std::vector{ return mInferenceWrapper.getInputTensorSize(); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index eb02f707..2bd846fe 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -25,7 +25,7 @@ class TensortRT { auto modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void; - auto getInputTensorSize() -> std::vector; + [[nodiscard]] auto getInputTensorSize() -> std::vector; - auto getOutputTensorSize() -> std::vector; + [[nodiscard]] auto getOutputTensorSize() -> std::vector; }; From 2cb094124c546f77e54f11005c2f708c25644824 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 24 Sep 2024 14:52:02 -0400 Subject: [PATCH 08/25] Style Checking --- tensorrt/tensorrt.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 2bd846fe..dd45596b 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -12,10 +12,8 @@ struct Detection { class TensortRT { std::string mModelName; - InferenceWrapper mInferenceWrapper; - public: TensortRT(); From 317645deef2875b728624ce6450cacd9137e9c27 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Wed, 25 Sep 2024 23:30:04 -0400 Subject: [PATCH 09/25] pre struct --- perception/object_detector/object_detector.processing.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index bb5f1506..bdba3321 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -45,7 +45,12 @@ namespace mrover { std::vector detections{}; cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); + + switch (mModelType) { + case MODEL_TYPE::YOLOv8: { + ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); + } + } mLoopProfiler.measureEvent("Execution"); From dc40933fba503f14e60dbf2b993638d88e4dbdaf Mon Sep 17 00:00:00 2001 From: jbrhm Date: Thu, 26 Sep 2024 11:21:03 -0400 Subject: [PATCH 10/25] post model struct --- .../object_detector/object_detector.cpp | 22 ++++++--- .../object_detector/object_detector.hpp | 49 ++++++++++++------- .../object_detector.parsing.cpp | 21 ++++++-- .../object_detector.processing.cpp | 31 +++--------- perception/object_detector/pch.hpp | 1 + 5 files changed, 69 insertions(+), 55 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index efd9836d..3fe57d43 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -4,6 +4,10 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { + std::string modelName; + float modelScoreThreshold{}; + float modelNMSThreshold{}; + std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, {"world_frame", mWorldFrame, "map"}, @@ -11,26 +15,28 @@ namespace mrover { {"decrement_weight", mObjDecrementWeight, 1}, {"hitcount_threshold", mObjHitThreshold, 5}, {"hitcount_max", mObjMaxHitcount, 10}, - {"model_name", mModelName, "Large-Dataset"}, - {"model_score_threshold", mModelScoreThreshold, 0.75}, - {"model_nms_threshold", mModelNMSThreshold, 0.5}}; + {"model_name", modelName, "Large-Dataset"}, + {"model_score_threshold", modelScoreThreshold, 0.75}, + {"model_nms_threshold", modelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); + + // All of these variables will be invalidated after calling this function std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName); RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); // Initialize TensorRT Inference Object and Get Important Output Information - mModelType = MODEL_TYPE::YOLOv8; - mTensorRT = TensortRT{mModelName, packagePath.string()}; - mInputTensorSize = mTensorRT.getInputTensorSize(); + mTensorRT = TensortRT{modelName, packagePath.string()}; + + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); mDebugImgPub = create_publisher("object_detector/debug_img", 1); - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNMSThreshold)); + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 8898584a..a605059a 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,9 +7,29 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: - enum MODEL_TYPE { - YOLOv8 = 0, - }; + struct Model { + std::string modelName; + + std::vector objectHitCounts; + + std::vector classes; + + std::vector inputTensorSize; + + std::vector outputTensorSize; + + std::vector buffer; + + // Converts an rgb image to a blob + std::function rbgImageToBlob; + + // Converts an output tensor into a vector of detections + std::function&)> outputTensorToDetections; + + Model() = default; + + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + }; static constexpr char const* NODE_NAME = "object_detector"; @@ -17,15 +37,12 @@ namespace mrover { std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); - // TF Frames + Model mModel; + std::string mCameraFrame; std::string mWorldFrame; - // Model Member Variables - MODEL_TYPE mModelType; - std::string mModelName; cv::Mat mRgbImage, mImageBlob; - std::vector mInputTensorSize; LoopProfiler mLoopProfiler; @@ -36,16 +53,10 @@ namespace mrover { rclcpp::Publisher::SharedPtr mDebugImgPub; rclcpp::Subscription::SharedPtr mSensorSub; - // TODO(quintin): Do not hard code exactly two classes - std::vector mObjectHitCounts{0, 0}; - std::vector mClasses{"bottle", "hammer"}; - int mObjIncrementWeight{}; int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; - float mModelScoreThreshold{}; - float mModelNMSThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -59,11 +70,11 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; - auto static parseYOLOv8Output(cv::Mat& output, - std::vector& detections, - std::vector const& classes, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) -> void; + auto static parseYOLOv8Output(Model const& model, + cv::Mat& output, + std::vector& detections) -> void; + + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index 358e0441..7aab71ec 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,6 +1,19 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::parseYOLOv8Output(cv::Mat& output, std::vector& detections, std::vector const& classes, float modelScoreThreshold, float modelNMSThreshold) -> void { + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void{ + cv::Mat blobSizedImage; + if (model.inputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + + cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; + cv::resize(rgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + } + + auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { // Parse model specific dimensioning from the output // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows @@ -37,7 +50,7 @@ namespace mrover { cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; + if (maxClassScore <= model.buffer[0]) continue; confidences.push_back(static_cast(maxClassScore)); classIds.push_back(classId.x); @@ -61,7 +74,7 @@ namespace mrover { //Coalesce the boxes into a smaller number of distinct boxes std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); + cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult); // Fill in the output Detections Vector for (int i: nmsResult) { @@ -72,7 +85,7 @@ namespace mrover { result.confidence = confidences[i]; //Fill in the class name and box information - result.className = classes[result.classId]; + result.className = model.classes[result.classId]; result.box = boxes[i]; //Push back the detection into the for storagevector diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index bdba3321..ad2010ac 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -15,7 +15,7 @@ namespace mrover { mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } - // If 0x0 image + // if 0x0 image if (!mRgbImage.total()) { return; } @@ -24,20 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - switch (mModelType) { - case MODEL_TYPE::YOLOv8: { - if (mInputTensorSize.size() != 4) { - throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); - } - - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - - cv::Size blobSize{static_cast(mInputTensorSize[2]), static_cast(mInputTensorSize[3])}; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); - break; - } - } + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage); mLoopProfiler.measureEvent("Conversion"); @@ -46,11 +33,7 @@ namespace mrover { cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - switch (mModelType) { - case MODEL_TYPE::YOLOv8: { - ObjectDetectorBase::parseYOLOv8Output(outputTensor, detections, mClasses, mModelScoreThreshold, mModelNMSThreshold); - } - } + mModel.outputTensorToDetections(mModel, outputTensor, detections); mLoopProfiler.measureEvent("Execution"); @@ -69,7 +52,7 @@ namespace mrover { // Set of flags indicating if the given object has been seen // TODO(quintin): Do not hard code exactly two classes std::bitset<2> seenObjects{0b00}; - for (auto const& [classId, className, confidence, box]: detections) { + for (auto const& [classId, className, confidence, box] : detections) { // Resize from blob space to image space cv::Point2f centerInBlob = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; float xRatio = static_cast(msg->width) / static_cast(imageSize.width); @@ -90,10 +73,10 @@ namespace mrover { // Push the immediate detections to the camera frame SE3Conversions::pushToTfTree(*mTfBroadcaster, objectImmediateFrame, mCameraFrame, objectInCamera.value(), get_clock()->now()); // Since the object is seen we need to increment the hit counter - mObjectHitCounts[classId] = std::min(mObjMaxHitcount, mObjectHitCounts[classId] + mObjIncrementWeight); + mModel.objectHitCounts[classId] = std::min(mObjMaxHitcount, mModel.objectHitCounts[classId] + mObjIncrementWeight); // Only publish to permament if we are confident in the object - if (mObjectHitCounts[classId] > mObjHitThreshold) { + if (mModel.objectHitCounts[classId] > mObjHitThreshold) { std::string objectPermanentFrame = className; // Grab the object inside of the camera frame and push it into the map frame SE3d objectInMap = SE3Conversions::fromTfTree(*mTfBuffer, objectImmediateFrame, mWorldFrame); @@ -114,7 +97,7 @@ namespace mrover { if (seenObjects[i]) continue; assert(i < mObjectHitCounts.size()); - mObjectHitCounts[i] = std::max(0, mObjectHitCounts[i] - mObjDecrementWeight); + mModel.objectHitCounts[i] = std::max(0, mModel.objectHitCounts[i] - mObjDecrementWeight); } } diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index 7ab5af34..026f6020 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include From 816bed3d9272461e8576f8e7451e92c2d0e522b3 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Thu, 26 Sep 2024 11:42:45 -0400 Subject: [PATCH 11/25] Works on zed --- perception/object_detector/object_detector.hpp | 7 ++++--- perception/object_detector/object_detector.parsing.cpp | 7 +++++-- perception/object_detector/object_detector.processing.cpp | 2 +- tensorrt/inference_wrapper.cpp | 4 ++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index a605059a..766bee26 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -18,17 +18,18 @@ namespace mrover { std::vector outputTensorSize; + // Additional space for params that one algorithm might need and not others std::vector buffer; // Converts an rgb image to a blob - std::function rbgImageToBlob; + std::function rbgImageToBlob; // Converts an output tensor into a vector of detections std::function&)> outputTensorToDetections; Model() = default; - Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} }; static constexpr char const* NODE_NAME = "object_detector"; @@ -74,7 +75,7 @@ namespace mrover { cv::Mat& output, std::vector& detections) -> void; - auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void; + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index 7aab71ec..7daf4aaa 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,11 +1,14 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blob) -> void{ - cv::Mat blobSizedImage; + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void{ if (model.inputTensorSize.size() != 4) { throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); } + if (model.buffer.size() != 2){ + throw std::runtime_error("Expected 2 additional parameters!"); + } + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index ad2010ac..434de661 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -24,7 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage); + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index d5f9e2f0..abd1a897 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -28,3 +28,7 @@ auto InferenceWrapper::getOutputTensor() const -> cv::Mat { auto InferenceWrapper::getInputTensorSize() -> std::vector{ return mInference->getInputTensorSize(); } + +auto InferenceWrapper::getOutputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); +} From 98283f0f3f9d4652fc322dbeeba03db7e04e00e9 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Fri, 27 Sep 2024 10:38:49 -0400 Subject: [PATCH 12/25] Style fixes --- .../object_detector/object_detector.cpp | 8 ++-- .../object_detector/object_detector.hpp | 40 +++++++++---------- .../object_detector.parsing.cpp | 24 +++++------ .../object_detector.processing.cpp | 8 ++-- perception/object_detector/pch.hpp | 2 +- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 3fe57d43..545a0d96 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -4,7 +4,7 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { - std::string modelName; + std::string modelName; float modelScoreThreshold{}; float modelNMSThreshold{}; @@ -20,8 +20,8 @@ namespace mrover { {"model_nms_threshold", modelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); - - // All of these variables will be invalidated after calling this function + + // All of these variables will be invalidated after calling this function std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; @@ -32,7 +32,7 @@ namespace mrover { // Initialize TensorRT Inference Object and Get Important Output Information mTensorRT = TensortRT{modelName, packagePath.string()}; - mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); mDebugImgPub = create_publisher("object_detector/debug_img", 1); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 766bee26..08893b24 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,30 +7,30 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: - struct Model { - std::string modelName; + struct Model { + std::string modelName; - std::vector objectHitCounts; + std::vector objectHitCounts; - std::vector classes; + std::vector classes; - std::vector inputTensorSize; + std::vector inputTensorSize; - std::vector outputTensorSize; + std::vector outputTensorSize; - // Additional space for params that one algorithm might need and not others - std::vector buffer; - - // Converts an rgb image to a blob - std::function rbgImageToBlob; + // Additional space for params that one algorithm might need and not others + std::vector buffer; - // Converts an output tensor into a vector of detections - std::function&)> outputTensorToDetections; + // Converts an rgb image to a blob + std::function rbgImageToBlob; - Model() = default; + // Converts an output tensor into a vector of detections + std::function&)> outputTensorToDetections; - Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} - }; + Model() = default; + + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + }; static constexpr char const* NODE_NAME = "object_detector"; @@ -38,7 +38,7 @@ namespace mrover { std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); - Model mModel; + Model mModel; std::string mCameraFrame; std::string mWorldFrame; @@ -72,10 +72,10 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; auto static parseYOLOv8Output(Model const& model, - cv::Mat& output, - std::vector& detections) -> void; + cv::Mat& output, + std::vector& detections) -> void; - auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp index 7daf4aaa..dc7d7167 100644 --- a/perception/object_detector/object_detector.parsing.cpp +++ b/perception/object_detector/object_detector.parsing.cpp @@ -1,20 +1,20 @@ #include "object_detector.hpp" namespace mrover { - auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void{ - if (model.inputTensorSize.size() != 4) { - throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); - } + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { + if (model.inputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } - if (model.buffer.size() != 2){ - throw std::runtime_error("Expected 2 additional parameters!"); - } + if (model.buffer.size() != 2) { + throw std::runtime_error("Expected 2 additional parameters!"); + } - static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; - cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; - cv::resize(rgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); - } + cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; + cv::resize(rgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + } auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { // Parse model specific dimensioning from the output diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 434de661..29a3b9ba 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -24,7 +24,7 @@ namespace mrover { // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); @@ -32,8 +32,8 @@ namespace mrover { std::vector detections{}; cv::Mat outputTensor; mTensorRT.modelForwardPass(mImageBlob, outputTensor); - - mModel.outputTensorToDetections(mModel, outputTensor, detections); + + mModel.outputTensorToDetections(mModel, outputTensor, detections); mLoopProfiler.measureEvent("Execution"); @@ -52,7 +52,7 @@ namespace mrover { // Set of flags indicating if the given object has been seen // TODO(quintin): Do not hard code exactly two classes std::bitset<2> seenObjects{0b00}; - for (auto const& [classId, className, confidence, box] : detections) { + for (auto const& [classId, className, confidence, box]: detections) { // Resize from blob space to image space cv::Point2f centerInBlob = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; float xRatio = static_cast(msg->width) / static_cast(imageSize.width); diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index 026f6020..ea7badcf 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include From 5835ccb62ecd0294dd0ae2833d25b569ec403553 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Fri, 27 Sep 2024 15:04:36 -0400 Subject: [PATCH 13/25] boiler plate --- perception/usb_camera/pch.hpp | 33 ++++++++++++++++++++++++++++ perception/usb_camera/usb_camera.cpp | 0 perception/usb_camera/usb_camera.hpp | 0 3 files changed, 33 insertions(+) create mode 100644 perception/usb_camera/pch.hpp create mode 100644 perception/usb_camera/usb_camera.cpp create mode 100644 perception/usb_camera/usb_camera.hpp diff --git a/perception/usb_camera/pch.hpp b/perception/usb_camera/pch.hpp new file mode 100644 index 00000000..451516c2 --- /dev/null +++ b/perception/usb_camera/pch.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include diff --git a/perception/usb_camera/usb_camera.cpp b/perception/usb_camera/usb_camera.cpp new file mode 100644 index 00000000..e69de29b diff --git a/perception/usb_camera/usb_camera.hpp b/perception/usb_camera/usb_camera.hpp new file mode 100644 index 00000000..e69de29b From b7be39c2bf272793fe645e6d2bfc2e544fe5c87a Mon Sep 17 00:00:00 2001 From: jbrhm Date: Fri, 27 Sep 2024 15:13:49 -0400 Subject: [PATCH 14/25] old code --- perception/usb_camera/usb_camera.cpp | 143 +++++++++++++++++++++++++++ perception/usb_camera/usb_camera.hpp | 38 +++++++ 2 files changed, 181 insertions(+) diff --git a/perception/usb_camera/usb_camera.cpp b/perception/usb_camera/usb_camera.cpp index e69de29b..f0053533 100644 --- a/perception/usb_camera/usb_camera.cpp +++ b/perception/usb_camera/usb_camera.cpp @@ -0,0 +1,143 @@ +#include "usb_camera.hpp" + +namespace mrover { + + template + auto gstCheck(T* t) -> T* { + if (!t) throw std::runtime_error{"Failed to create"}; + return t; + } + + auto UsbCameraNodelet::onInit() -> void { + try { + mNh = getMTNodeHandle(); + mPnh = getMTPrivateNodeHandle(); + + mWidth = mPnh.param("width", 640); + mHeight = mPnh.param("height", 480); + auto framerate = mPnh.param("framerate", 30); + auto device = mPnh.param("device", "/dev/video0"); + auto imageTopicName = mPnh.param("image_topic", "/image"); + auto cameraInfoTopicName = mPnh.param("camera_info_topic", "/camera_info"); + auto watchdogTimeout = mPnh.param("watchdog_timeout", 1.0); + mRestartDelay = mPnh.param("restart_delay", 2.0); + auto decodeJpegFromDevice = mPnh.param("decode_jpeg_from_device", true); + + mImgPub = mNh.advertise(imageTopicName, 1); + mCamInfoPub = mNh.advertise(cameraInfoTopicName, 1); + + gst_init(nullptr, nullptr); + + mMainLoop = gstCheck(g_main_loop_new(nullptr, FALSE)); + + std::string launch = std::format("v4l2src device={} ", device); + if (decodeJpegFromDevice) { + launch += std::format("! image/jpeg,width={},height={},framerate={}/1 ", mWidth, mHeight, framerate); + if (gst_element_factory_find("nvv4l2decoder")) { + launch += "! nvv4l2decoder mjpeg=1 ! nvvidconv ! video/x-raw,format=YUY2 "; + } else { + throw std::runtime_error{"Not supported yet!"}; + } + } else { + launch += std::format("! video/x-raw,format=YUY2,width={},height={},framerate={}/1 ", mWidth, mHeight, framerate); + } + launch += "! appsink name=streamSink sync=false"; + NODELET_INFO_STREAM(std::format("GStreamer launch string: {}", launch)); + + mPipeline = gstCheck(gst_parse_launch(launch.c_str(), nullptr)); + + mStreamSink = gstCheck(gst_bin_get_by_name(GST_BIN(mPipeline), "streamSink")); + + if (gst_element_set_state(mPipeline, GST_STATE_PAUSED) == GST_STATE_CHANGE_FAILURE) + throw std::runtime_error{"Failed initial pause on GStreamer pipeline"}; + + mMainLoopThread = std::thread{[this] { + ROS_INFO("Started GStreamer main loop"); + g_main_loop_run(mMainLoop); + std::cout << "Stopped GStreamer main loop" << std::endl; + }}; + + mStreamSinkThread = std::thread{[this] { + ROS_INFO("Started stream sink thread"); + pullSampleLoop(); + std::cout << "Stopped stream sink thread" << std::endl; + }}; + + if (gst_element_set_state(mPipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_FAILURE) + throw std::runtime_error{"Failed to play GStreamer pipeline"}; + + mWatchdogTimer = mNh.createTimer(ros::Duration{watchdogTimeout}, &UsbCameraNodelet::watchdogTriggered, this); + + NODELET_INFO_STREAM("Initialized and started GStreamer pipeline"); + + } catch (std::exception const& e) { + ROS_ERROR_STREAM(std::format("Exception initializing USB camera: {}", e.what())); + ros::requestShutdown(); + } + } + + auto UsbCameraNodelet::watchdogTriggered(ros::TimerEvent const&) -> void { + mWatchdogTimer.stop(); + ROS_ERROR("Watchdog hit! Attemping to restart..."); + + // Attempt to restart the pipeline + if (gst_element_set_state(mPipeline, GST_STATE_NULL) == GST_STATE_CHANGE_FAILURE || + gst_element_set_state(mPipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_FAILURE) + ROS_ERROR_STREAM("Failed to restart GStreamer pipeline"); + + if (mStreamSinkThread.joinable()) { + mStreamSinkThread.join(); + mStreamSinkThread = std::thread{[this] { + ROS_INFO("Started stream sink thread"); + pullSampleLoop(); + std::cout << "Stopped stream sink thread" << std::endl; + }}; + } + + ros::Duration{mRestartDelay}.sleep(); + mWatchdogTimer.start(); + } + + auto UsbCameraNodelet::pullSampleLoop() -> void { + while (GstSample* sample = gst_app_sink_pull_sample(GST_APP_SINK(mStreamSink))) { + mWatchdogTimer.stop(); + mWatchdogTimer.start(); + + GstBuffer* buffer = gst_sample_get_buffer(sample); + GstMapInfo map; + gst_buffer_map(buffer, &map, GST_MAP_READ); + + sensor_msgs::Image image; + image.header.stamp = ros::Time::now(); + image.encoding = sensor_msgs::image_encodings::BGRA8; + image.is_bigendian = __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; + image.height = mHeight; + image.width = mWidth; + image.step = mWidth * 4; + image.data.resize(image.step * mHeight); + // These do not clone the data (due to passing the pointer), just provide a way for OpenCV to access it + // We are converting directly from GStreamer YUV2 memory to the ROS image BGRA memory + cv::Mat yuvView{mHeight, mWidth, CV_8UC2, map.data}; + cv::Mat bgraView{mHeight, mWidth, CV_8UC4, image.data.data()}; + cv::cvtColor(yuvView, bgraView, cv::COLOR_YUV2BGRA_YUY2); + mImgPub.publish(image); + + gst_buffer_unmap(buffer, &map); + gst_sample_unref(sample); + } + } + + UsbCameraNodelet::~UsbCameraNodelet() { + if (mMainLoop) { + g_main_loop_quit(mMainLoop); + mMainLoopThread.join(); + g_main_loop_unref(mMainLoop); + } + + if (mPipeline) { + gst_element_set_state(mPipeline, GST_STATE_NULL); + mStreamSinkThread.join(); + gst_object_unref(mPipeline); + } + } +} // namespace mrover diff --git a/perception/usb_camera/usb_camera.hpp b/perception/usb_camera/usb_camera.hpp index e69de29b..e42f6555 100644 --- a/perception/usb_camera/usb_camera.hpp +++ b/perception/usb_camera/usb_camera.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include "pch.hpp" + +namespace mrover { + + class UsbCameraNodelet final : public nodelet::Nodelet { + + ros::NodeHandle mNh, mPnh; + + ros::Publisher mCamInfoPub; + ros::Publisher mImgPub; + + int mWidth{}, mHeight{}; + double mRestartDelay{}; + + GstElement *mStreamSink{}, *mPipeline{}; + GMainLoop* mMainLoop{}; + + std::thread mMainLoopThread, mStreamSinkThread; + + ros::Timer mWatchdogTimer; + + LoopProfiler mGrabThreadProfiler{"Long Range Cam Grab"}; + + auto onInit() -> void override; + + auto watchdogTriggered(ros::TimerEvent const&) -> void; + + public: + UsbCameraNodelet() = default; + + ~UsbCameraNodelet() override; + + auto pullSampleLoop() -> void; + }; + +} // namespace mrover From abf847f15e15d7980af12ff316d3a901325922e4 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Sun, 29 Sep 2024 09:52:28 -0400 Subject: [PATCH 15/25] removed --- perception/usb_camera/pch.hpp | 33 ------- perception/usb_camera/usb_camera.cpp | 143 --------------------------- perception/usb_camera/usb_camera.hpp | 38 ------- 3 files changed, 214 deletions(-) delete mode 100644 perception/usb_camera/pch.hpp delete mode 100644 perception/usb_camera/usb_camera.cpp delete mode 100644 perception/usb_camera/usb_camera.hpp diff --git a/perception/usb_camera/pch.hpp b/perception/usb_camera/pch.hpp deleted file mode 100644 index 451516c2..00000000 --- a/perception/usb_camera/pch.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include diff --git a/perception/usb_camera/usb_camera.cpp b/perception/usb_camera/usb_camera.cpp deleted file mode 100644 index f0053533..00000000 --- a/perception/usb_camera/usb_camera.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include "usb_camera.hpp" - -namespace mrover { - - template - auto gstCheck(T* t) -> T* { - if (!t) throw std::runtime_error{"Failed to create"}; - return t; - } - - auto UsbCameraNodelet::onInit() -> void { - try { - mNh = getMTNodeHandle(); - mPnh = getMTPrivateNodeHandle(); - - mWidth = mPnh.param("width", 640); - mHeight = mPnh.param("height", 480); - auto framerate = mPnh.param("framerate", 30); - auto device = mPnh.param("device", "/dev/video0"); - auto imageTopicName = mPnh.param("image_topic", "/image"); - auto cameraInfoTopicName = mPnh.param("camera_info_topic", "/camera_info"); - auto watchdogTimeout = mPnh.param("watchdog_timeout", 1.0); - mRestartDelay = mPnh.param("restart_delay", 2.0); - auto decodeJpegFromDevice = mPnh.param("decode_jpeg_from_device", true); - - mImgPub = mNh.advertise(imageTopicName, 1); - mCamInfoPub = mNh.advertise(cameraInfoTopicName, 1); - - gst_init(nullptr, nullptr); - - mMainLoop = gstCheck(g_main_loop_new(nullptr, FALSE)); - - std::string launch = std::format("v4l2src device={} ", device); - if (decodeJpegFromDevice) { - launch += std::format("! image/jpeg,width={},height={},framerate={}/1 ", mWidth, mHeight, framerate); - if (gst_element_factory_find("nvv4l2decoder")) { - launch += "! nvv4l2decoder mjpeg=1 ! nvvidconv ! video/x-raw,format=YUY2 "; - } else { - throw std::runtime_error{"Not supported yet!"}; - } - } else { - launch += std::format("! video/x-raw,format=YUY2,width={},height={},framerate={}/1 ", mWidth, mHeight, framerate); - } - launch += "! appsink name=streamSink sync=false"; - NODELET_INFO_STREAM(std::format("GStreamer launch string: {}", launch)); - - mPipeline = gstCheck(gst_parse_launch(launch.c_str(), nullptr)); - - mStreamSink = gstCheck(gst_bin_get_by_name(GST_BIN(mPipeline), "streamSink")); - - if (gst_element_set_state(mPipeline, GST_STATE_PAUSED) == GST_STATE_CHANGE_FAILURE) - throw std::runtime_error{"Failed initial pause on GStreamer pipeline"}; - - mMainLoopThread = std::thread{[this] { - ROS_INFO("Started GStreamer main loop"); - g_main_loop_run(mMainLoop); - std::cout << "Stopped GStreamer main loop" << std::endl; - }}; - - mStreamSinkThread = std::thread{[this] { - ROS_INFO("Started stream sink thread"); - pullSampleLoop(); - std::cout << "Stopped stream sink thread" << std::endl; - }}; - - if (gst_element_set_state(mPipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_FAILURE) - throw std::runtime_error{"Failed to play GStreamer pipeline"}; - - mWatchdogTimer = mNh.createTimer(ros::Duration{watchdogTimeout}, &UsbCameraNodelet::watchdogTriggered, this); - - NODELET_INFO_STREAM("Initialized and started GStreamer pipeline"); - - } catch (std::exception const& e) { - ROS_ERROR_STREAM(std::format("Exception initializing USB camera: {}", e.what())); - ros::requestShutdown(); - } - } - - auto UsbCameraNodelet::watchdogTriggered(ros::TimerEvent const&) -> void { - mWatchdogTimer.stop(); - ROS_ERROR("Watchdog hit! Attemping to restart..."); - - // Attempt to restart the pipeline - if (gst_element_set_state(mPipeline, GST_STATE_NULL) == GST_STATE_CHANGE_FAILURE || - gst_element_set_state(mPipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_FAILURE) - ROS_ERROR_STREAM("Failed to restart GStreamer pipeline"); - - if (mStreamSinkThread.joinable()) { - mStreamSinkThread.join(); - mStreamSinkThread = std::thread{[this] { - ROS_INFO("Started stream sink thread"); - pullSampleLoop(); - std::cout << "Stopped stream sink thread" << std::endl; - }}; - } - - ros::Duration{mRestartDelay}.sleep(); - mWatchdogTimer.start(); - } - - auto UsbCameraNodelet::pullSampleLoop() -> void { - while (GstSample* sample = gst_app_sink_pull_sample(GST_APP_SINK(mStreamSink))) { - mWatchdogTimer.stop(); - mWatchdogTimer.start(); - - GstBuffer* buffer = gst_sample_get_buffer(sample); - GstMapInfo map; - gst_buffer_map(buffer, &map, GST_MAP_READ); - - sensor_msgs::Image image; - image.header.stamp = ros::Time::now(); - image.encoding = sensor_msgs::image_encodings::BGRA8; - image.is_bigendian = __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; - image.height = mHeight; - image.width = mWidth; - image.step = mWidth * 4; - image.data.resize(image.step * mHeight); - // These do not clone the data (due to passing the pointer), just provide a way for OpenCV to access it - // We are converting directly from GStreamer YUV2 memory to the ROS image BGRA memory - cv::Mat yuvView{mHeight, mWidth, CV_8UC2, map.data}; - cv::Mat bgraView{mHeight, mWidth, CV_8UC4, image.data.data()}; - cv::cvtColor(yuvView, bgraView, cv::COLOR_YUV2BGRA_YUY2); - mImgPub.publish(image); - - gst_buffer_unmap(buffer, &map); - gst_sample_unref(sample); - } - } - - UsbCameraNodelet::~UsbCameraNodelet() { - if (mMainLoop) { - g_main_loop_quit(mMainLoop); - mMainLoopThread.join(); - g_main_loop_unref(mMainLoop); - } - - if (mPipeline) { - gst_element_set_state(mPipeline, GST_STATE_NULL); - mStreamSinkThread.join(); - gst_object_unref(mPipeline); - } - } -} // namespace mrover diff --git a/perception/usb_camera/usb_camera.hpp b/perception/usb_camera/usb_camera.hpp deleted file mode 100644 index e42f6555..00000000 --- a/perception/usb_camera/usb_camera.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include "pch.hpp" - -namespace mrover { - - class UsbCameraNodelet final : public nodelet::Nodelet { - - ros::NodeHandle mNh, mPnh; - - ros::Publisher mCamInfoPub; - ros::Publisher mImgPub; - - int mWidth{}, mHeight{}; - double mRestartDelay{}; - - GstElement *mStreamSink{}, *mPipeline{}; - GMainLoop* mMainLoop{}; - - std::thread mMainLoopThread, mStreamSinkThread; - - ros::Timer mWatchdogTimer; - - LoopProfiler mGrabThreadProfiler{"Long Range Cam Grab"}; - - auto onInit() -> void override; - - auto watchdogTriggered(ros::TimerEvent const&) -> void; - - public: - UsbCameraNodelet() = default; - - ~UsbCameraNodelet() override; - - auto pullSampleLoop() -> void; - }; - -} // namespace mrover From 5d86825e4ff06bc250e93d047789492c06c6c1bb Mon Sep 17 00:00:00 2001 From: jbrhm Date: Sun, 29 Sep 2024 11:26:54 -0400 Subject: [PATCH 16/25] Chunky Camera is just cooked --- CMakeLists.txt | 2 +- esw/usb_camera/pch.hpp | 2 ++ esw/usb_camera/usb_camera.cpp | 42 ++++++++++++++++------------------- esw/usb_camera/usb_camera.hpp | 2 +- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c0ad9825..d51f90d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -283,7 +283,7 @@ ament_target_dependencies(arm_controller rclcpp) if (Gst_FOUND AND GstApp_FOUND) mrover_add_node(usb_camera esw/usb_camera/*.cpp esw/usb_camera/pch.hpp) - target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs) + target_link_libraries(usb_camera PkgConfig::Gst PkgConfig::GstApp opencv_core opencv_imgcodecs parameter_utils) ament_target_dependencies(usb_camera rclcpp) if (LibUdev_FOUND) diff --git a/esw/usb_camera/pch.hpp b/esw/usb_camera/pch.hpp index 76b73552..da4a1ba0 100644 --- a/esw/usb_camera/pch.hpp +++ b/esw/usb_camera/pch.hpp @@ -28,3 +28,5 @@ #include #include #include + +#include diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp index 20d83109..d0a68c5d 100644 --- a/esw/usb_camera/usb_camera.cpp +++ b/esw/usb_camera/usb_camera.cpp @@ -13,29 +13,25 @@ namespace mrover { UsbCamera::UsbCamera() : Node{"usb_camera", rclcpp::NodeOptions{}.use_intra_process_comms(true)} { try { /* Parameters */ - declare_parameter("width", rclcpp::ParameterType::PARAMETER_INTEGER); - declare_parameter("height", rclcpp::ParameterType::PARAMETER_INTEGER); - declare_parameter("framerate", rclcpp::ParameterType::PARAMETER_INTEGER); - declare_parameter("device", rclcpp::ParameterType::PARAMETER_STRING, [] { - rcl_interfaces::msg::ParameterDescriptor d; - d.read_only = true; - d.description = "The device path to the USB camera, e.g. /dev/video0"; - return d; - }()); - declare_parameter("decode_jpeg_from_device", rclcpp::ParameterType::PARAMETER_BOOL, [] { - rcl_interfaces::msg::ParameterDescriptor d; - d.read_only = true; - d.description = "If true, decode JPEG directly from the USB device instead of YUY2. " - "This decreases image quality but increases performance and reduces USB bus load. " - "It also reduces the 'jelly' effect caused by large YUY2 formats."; - return d; - }()); - - mWidth = get_parameter("width").as_int(); - mHeight = get_parameter("height").as_int(); - std::int64_t framerate = get_parameter("framerate").as_int(); - std::string device = get_parameter("device").as_string(); - bool decodeJpegFromDevice = get_parameter("decode_jpeg_from_device").as_bool(); + int framerate{}; + std::string device{}; + std::string imageTopicName{}; + std::string cameraInfoTopicName{}; + double watchdogTimeout{}; + bool decodeJpegFromDevice{}; + + std::vector params{ + {"width", mWidth, 640}, + {"height", mHeight, 480}, + {"framerate", framerate, 30}, + {"device", device, "/dev/video0"}, + {"image_topic", imageTopicName, "/image"}, + {"camera_info_topic", cameraInfoTopicName, "/camera_info"}, + {"watchdog_timeout", watchdogTimeout, 1.0}, + {"decode_jpeg_from_device", decodeJpegFromDevice, false} + }; + + ParameterWrapper::declareParameters(this, params); /* Interfaces */ mImgPub = create_publisher("image", 1); diff --git a/esw/usb_camera/usb_camera.hpp b/esw/usb_camera/usb_camera.hpp index 27212a90..3c9eecff 100644 --- a/esw/usb_camera/usb_camera.hpp +++ b/esw/usb_camera/usb_camera.hpp @@ -9,7 +9,7 @@ namespace mrover { rclcpp::Publisher::SharedPtr mCamInfoPub; rclcpp::Publisher::SharedPtr mImgPub; - std::uint16_t mWidth{}, mHeight{}; + int mWidth{}, mHeight{}; GstElement *mStreamSink{}, *mPipeline{}; GMainLoop* mMainLoop{}; From 8bc512179a4e21c6e53e41d84bb44e0699b35000 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Sun, 29 Sep 2024 12:06:46 -0400 Subject: [PATCH 17/25] Builds --- esw/usb_camera/usb_camera.cpp | 8 +-- .../object_detector/object_detector.hpp | 17 ++++++- .../object_detector.processing.cpp | 50 +++++++++++++++++++ perception/object_detector/pch.hpp | 2 + 4 files changed, 72 insertions(+), 5 deletions(-) diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp index d0a68c5d..1d90298d 100644 --- a/esw/usb_camera/usb_camera.cpp +++ b/esw/usb_camera/usb_camera.cpp @@ -25,8 +25,8 @@ namespace mrover { {"height", mHeight, 480}, {"framerate", framerate, 30}, {"device", device, "/dev/video0"}, - {"image_topic", imageTopicName, "/image"}, - {"camera_info_topic", cameraInfoTopicName, "/camera_info"}, + {"image_topic", imageTopicName, "/usb_camera/image"}, + {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"}, {"watchdog_timeout", watchdogTimeout, 1.0}, {"decode_jpeg_from_device", decodeJpegFromDevice, false} }; @@ -34,8 +34,8 @@ namespace mrover { ParameterWrapper::declareParameters(this, params); /* Interfaces */ - mImgPub = create_publisher("image", 1); - mCamInfoPub = create_publisher("camera_info", 1); + mImgPub = create_publisher(imageTopicName, 1); + mCamInfoPub = create_publisher(cameraInfoTopicName, 1); /* Pipeline */ gst_init(nullptr, nullptr); diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 08893b24..46a7dcb2 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -52,7 +52,6 @@ namespace mrover { sensor_msgs::msg::Image mDetectionsImageMessage; rclcpp::Publisher::SharedPtr mDebugImgPub; - rclcpp::Subscription::SharedPtr mSensorSub; int mObjIncrementWeight{}; int mObjDecrementWeight{}; @@ -87,8 +86,24 @@ namespace mrover { public: explicit StereoObjectDetector(); + rclcpp::Subscription::SharedPtr mSensorSub; + static auto convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void; auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void; }; + + class ImageObjectDetector final : public ObjectDetectorBase { + rclcpp::Publisher::SharedPtr mTargetsPub; + + rclcpp::Subscription::SharedPtr mSensorSub; + + float mCameraHorizontalFov{}; + + explicit ImageObjectDetector(); + + auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float; + + auto imageCallback(sensor_msgs::msg::Image::SharedPtr const& msg) -> void; + }; } // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 29a3b9ba..8f6caef5 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -177,4 +177,54 @@ namespace mrover { }); } + auto ImageObjectDetector::imageCallback(sensor_msgs::msg::Image::SharedPtr const& msg) -> void { + assert(msg); + assert(msg->height > 0); + assert(msg->width > 0); + assert(msg->encoding == sensor_msgs::image_encodings::BGRA8); + + mLoopProfiler.beginLoop(); + + cv::Mat bgraImage{static_cast(msg->height), static_cast(msg->width), CV_8UC4, const_cast(msg->data.data())}; + cv::cvtColor(bgraImage, mRgbImage, cv::COLOR_BGRA2RGB); + + // Convert the RGB Image into the blob Image format + cv::Mat blobSizedImage; + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); + + mLoopProfiler.measureEvent("Conversion"); + + // Run the blob through the model + std::vector detections{}; + cv::Mat outputTensor; + mTensorRT.modelForwardPass(mImageBlob, outputTensor); + + mModel.outputTensorToDetections(mModel, outputTensor, detections); + + mLoopProfiler.measureEvent("Execution"); + + mrover::msg::ImageTargets targets; + for (auto const& [classId, className, confidence, box]: detections) { + mrover::msg::ImageTarget target; + target.name = className; + target.bearing = getTagBearing(blobSizedImage, box); + targets.targets.push_back(target); + } + + mTargetsPub->publish(targets); + + drawDetectionBoxes(blobSizedImage, detections); + publishDetectedObjects(blobSizedImage); + + mLoopProfiler.measureEvent("Publication"); + } + + auto ImageObjectDetector::getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float { + cv::Point2f center = cv::Point2f{box.tl()} + cv::Point2f{box.size()} / 2; + float xNormalized = center.x / static_cast(image.cols()); + float xRecentered = 0.5f - xNormalized; + float bearingDegrees = xRecentered * mCameraHorizontalFov; + return bearingDegrees * std::numbers::pi_v / 180.0f; + } + } // namespace mrover diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index ea7badcf..d1589dbd 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include #include From 054602033a9e050bb5d96fd26160ebf60e2e00e8 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Sun, 29 Sep 2024 15:01:05 -0400 Subject: [PATCH 18/25] bruuhhhhh --- perception/object_detector/object_detector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 545a0d96..510590e8 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -49,7 +49,8 @@ namespace mrover { auto main(int argc, char** argv) -> int { rclcpp::init(argc, argv); - rclcpp::spin(std::make_shared()); + //rclcpp::spin(std::make_shared()); + rclcpp::spin(std::make_shared()); rclcpp::shutdown(); return EXIT_SUCCESS; } From 8260544a6988938304a36fbdd98b203125606cc0 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 30 Sep 2024 08:44:53 -0400 Subject: [PATCH 19/25] Builds --- perception/object_detector/object_detector.cpp | 11 +++++++++++ perception/object_detector/object_detector.hpp | 11 ++++++++--- .../object_detector/object_detector.processing.cpp | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 510590e8..35f4ecfd 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -44,6 +44,17 @@ namespace mrover { StereoObjectDetector::pointCloudCallback(msg); }); } + + ImageObjectDetector::ImageObjectDetector() { + mSensorSub = create_subscription("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) { + ImageObjectDetector::imageCallback(msg); + }); + + std::vector params{ + {"long_range_camera/fov", mCameraHorizontalFov, 80.0}}; + + ParameterWrapper::declareParameters(this, params); + } } // namespace mrover diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 46a7dcb2..2a544799 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -83,27 +83,32 @@ namespace mrover { }; class StereoObjectDetector final : public ObjectDetectorBase { - public: - explicit StereoObjectDetector(); + private: rclcpp::Subscription::SharedPtr mSensorSub; + public: + explicit StereoObjectDetector(); + static auto convertPointCloudToRGB(sensor_msgs::msg::PointCloud2::UniquePtr const& msg, cv::Mat const& image) -> void; auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void; }; class ImageObjectDetector final : public ObjectDetectorBase { + private: rclcpp::Publisher::SharedPtr mTargetsPub; rclcpp::Subscription::SharedPtr mSensorSub; float mCameraHorizontalFov{}; + + public: explicit ImageObjectDetector(); auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float; - auto imageCallback(sensor_msgs::msg::Image::SharedPtr const& msg) -> void; + auto imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void; }; } // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 8f6caef5..abef8096 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -177,7 +177,7 @@ namespace mrover { }); } - auto ImageObjectDetector::imageCallback(sensor_msgs::msg::Image::SharedPtr const& msg) -> void { + auto ImageObjectDetector::imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void { assert(msg); assert(msg->height > 0); assert(msg->width > 0); From 703a82a23c83d7fb5c11d5194f61726feab488db Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 30 Sep 2024 09:18:52 -0400 Subject: [PATCH 20/25] USB based OD works --- esw/usb_camera/usb_camera.cpp | 2 +- perception/object_detector/object_detector.cpp | 2 ++ perception/object_detector/object_detector.processing.cpp | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp index 1d90298d..d338e198 100644 --- a/esw/usb_camera/usb_camera.cpp +++ b/esw/usb_camera/usb_camera.cpp @@ -24,7 +24,7 @@ namespace mrover { {"width", mWidth, 640}, {"height", mHeight, 480}, {"framerate", framerate, 30}, - {"device", device, "/dev/video0"}, + {"device", device, "/dev/video4"}, {"image_topic", imageTopicName, "/usb_camera/image"}, {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"}, {"watchdog_timeout", watchdogTimeout, 1.0}, diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 35f4ecfd..0e462cdf 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -54,6 +54,8 @@ namespace mrover { {"long_range_camera/fov", mCameraHorizontalFov, 80.0}}; ParameterWrapper::declareParameters(this, params); + + mTargetsPub = create_publisher("/long_range_camera/objects", 1); } } // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index abef8096..1c9fb1e5 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -203,7 +203,7 @@ namespace mrover { mLoopProfiler.measureEvent("Execution"); - mrover::msg::ImageTargets targets; + mrover::msg::ImageTargets targets{}; for (auto const& [classId, className, confidence, box]: detections) { mrover::msg::ImageTarget target; target.name = className; From 0886e3bb09ffc120000a349787c6f0e9ed41d71a Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 30 Sep 2024 10:55:44 -0400 Subject: [PATCH 21/25] Segfault --- .../object_detector/object_detector.cpp | 35 ++++++++++++++----- tensorrt/inference.cu | 3 +- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 0e462cdf..11798fab 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -3,11 +3,13 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); std::string modelName; float modelScoreThreshold{}; float modelNMSThreshold{}; + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, {"world_frame", mWorldFrame, "map"}, @@ -19,51 +21,68 @@ namespace mrover { {"model_score_threshold", modelScoreThreshold, 0.75}, {"model_nms_threshold", modelNMSThreshold, 0.5}}; + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); ParameterWrapper::declareParameters(this, params); // All of these variables will be invalidated after calling this function std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); // Initialize TensorRT Inference Object and Get Important Output Information mTensorRT = TensortRT{modelName, packagePath.string()}; + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); - mDebugImgPub = create_publisher("object_detector/debug_img", 1); - + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { + mDebugImgPub = create_publisher("/stereo_object_detector/debug_img", 1); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); + mSensorSub = create_subscription("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { StereoObjectDetector::pointCloudCallback(msg); }); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); } ImageObjectDetector::ImageObjectDetector() { - mSensorSub = create_subscription("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) { - ImageObjectDetector::imageCallback(msg); - }); - std::vector params{ {"long_range_camera/fov", mCameraHorizontalFov, 80.0}}; + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); ParameterWrapper::declareParameters(this, params); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); + mDebugImgPub = create_publisher("/long_range_object_detector/debug_img", 1); + + mSensorSub = create_subscription("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) { + ImageObjectDetector::imageCallback(msg); + }); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); + mTargetsPub = create_publisher("/long_range_camera/objects", 1); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); } } // namespace mrover auto main(int argc, char** argv) -> int { rclcpp::init(argc, argv); - //rclcpp::spin(std::make_shared()); - rclcpp::spin(std::make_shared()); + + rclcpp::executors::SingleThreadedExecutor executor; + //executor.add_node(std::make_shared()); + executor.add_node(std::make_shared()); + executor.spin(); + rclcpp::shutdown(); return EXIT_SUCCESS; } diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index 6b4463d5..f9fb6b5a 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -65,9 +65,10 @@ Inference::Inference(std::string modelName, std::string packagePathString) : mM } auto Inference::createCudaEngine() -> ICudaEngine* { + mLogger.log(ILogger::Severity::kINFO, "Creating engine building tools...") constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - std::unique_ptr builder{createInferBuilder(mLogger)}; + IBuilder* builder = createInferBuilder(mLogger); if (!builder) throw std::runtime_error("Failed to create Infer Builder"); mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder"); From 5f58d94e8c26ef04bb21b0046bcde2692062bd74 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 30 Sep 2024 11:21:00 -0400 Subject: [PATCH 22/25] Both OD work at the same tiem --- esw/usb_camera/usb_camera.cpp | 2 +- .../object_detector/object_detector.cpp | 26 +++++++------------ tensorrt/inference.cu | 2 +- tensorrt/inference_wrapper.cpp | 2 +- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp index d338e198..d05549a0 100644 --- a/esw/usb_camera/usb_camera.cpp +++ b/esw/usb_camera/usb_camera.cpp @@ -24,7 +24,7 @@ namespace mrover { {"width", mWidth, 640}, {"height", mHeight, 480}, {"framerate", framerate, 30}, - {"device", device, "/dev/video4"}, + {"device", device, "/dev/video2"}, {"image_topic", imageTopicName, "/usb_camera/image"}, {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"}, {"watchdog_timeout", watchdogTimeout, 1.0}, diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 11798fab..985351c8 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -3,13 +3,10 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); - std::string modelName; float modelScoreThreshold{}; float modelNMSThreshold{}; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, {"world_frame", mWorldFrame, "map"}, @@ -21,56 +18,49 @@ namespace mrover { {"model_score_threshold", modelScoreThreshold, 0.75}, {"model_nms_threshold", modelNMSThreshold, 0.5}}; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); ParameterWrapper::declareParameters(this, params); // All of these variables will be invalidated after calling this function std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); // Initialize TensorRT Inference Object and Get Important Output Information mTensorRT = TensortRT{modelName, packagePath.string()}; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { + RCLCPP_INFO_STREAM(get_logger(), "Creating Stereo Object Detector..."); + mDebugImgPub = create_publisher("/stereo_object_detector/debug_img", 1); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); mSensorSub = create_subscription("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { StereoObjectDetector::pointCloudCallback(msg); }); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); } ImageObjectDetector::ImageObjectDetector() { + RCLCPP_INFO_STREAM(get_logger(), "Creating Image Object Detector..."); + std::vector params{ {"long_range_camera/fov", mCameraHorizontalFov, 80.0}}; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); ParameterWrapper::declareParameters(this, params); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); mDebugImgPub = create_publisher("/long_range_object_detector/debug_img", 1); mSensorSub = create_subscription("/usb_camera/image", 1, [this](sensor_msgs::msg::Image::UniquePtr const& msg) { ImageObjectDetector::imageCallback(msg); }); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); mTargetsPub = create_publisher("/long_range_camera/objects", 1); - RCLCPP_INFO_STREAM(get_logger(), "Opening Model"); } } // namespace mrover @@ -78,9 +68,13 @@ namespace mrover { auto main(int argc, char** argv) -> int { rclcpp::init(argc, argv); + // DO NOT REMOVE OR ELSE REF COUNT WILL GO TO ZERO + auto imgOD = std::make_shared(); + auto stereoOD = std::make_shared(); + rclcpp::executors::SingleThreadedExecutor executor; - //executor.add_node(std::make_shared()); - executor.add_node(std::make_shared()); + executor.add_node(imgOD); + executor.add_node(stereoOD); executor.spin(); rclcpp::shutdown(); diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index f9fb6b5a..0e288c79 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -65,7 +65,7 @@ Inference::Inference(std::string modelName, std::string packagePathString) : mM } auto Inference::createCudaEngine() -> ICudaEngine* { - mLogger.log(ILogger::Severity::kINFO, "Creating engine building tools...") + mLogger.log(ILogger::Severity::kINFO, "Creating engine building tools..."); constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); IBuilder* builder = createInferBuilder(mLogger); diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index abd1a897..54f5c87d 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -30,5 +30,5 @@ auto InferenceWrapper::getInputTensorSize() -> std::vector{ } auto InferenceWrapper::getOutputTensorSize() -> std::vector{ - return mInference->getInputTensorSize(); + return mInference->getOutputTensorSize(); } From ba2da04469224a2273c801e7485e7ded5865a4fb Mon Sep 17 00:00:00 2001 From: jbrhm Date: Mon, 30 Sep 2024 17:22:10 -0400 Subject: [PATCH 23/25] style --- esw/usb_camera/usb_camera.cpp | 37 +++++++++---------- .../object_detector/object_detector.cpp | 16 ++++---- .../object_detector/object_detector.hpp | 12 +++--- .../object_detector.processing.cpp | 6 +-- perception/object_detector/pch.hpp | 4 +- 5 files changed, 36 insertions(+), 39 deletions(-) diff --git a/esw/usb_camera/usb_camera.cpp b/esw/usb_camera/usb_camera.cpp index d05549a0..46677ba6 100644 --- a/esw/usb_camera/usb_camera.cpp +++ b/esw/usb_camera/usb_camera.cpp @@ -13,25 +13,24 @@ namespace mrover { UsbCamera::UsbCamera() : Node{"usb_camera", rclcpp::NodeOptions{}.use_intra_process_comms(true)} { try { /* Parameters */ - int framerate{}; - std::string device{}; - std::string imageTopicName{}; - std::string cameraInfoTopicName{}; - double watchdogTimeout{}; - bool decodeJpegFromDevice{}; - - std::vector params{ - {"width", mWidth, 640}, - {"height", mHeight, 480}, - {"framerate", framerate, 30}, - {"device", device, "/dev/video2"}, - {"image_topic", imageTopicName, "/usb_camera/image"}, - {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"}, - {"watchdog_timeout", watchdogTimeout, 1.0}, - {"decode_jpeg_from_device", decodeJpegFromDevice, false} - }; - - ParameterWrapper::declareParameters(this, params); + int framerate{}; + std::string device{}; + std::string imageTopicName{}; + std::string cameraInfoTopicName{}; + double watchdogTimeout{}; + bool decodeJpegFromDevice{}; + + std::vector params{ + {"width", mWidth, 640}, + {"height", mHeight, 480}, + {"framerate", framerate, 30}, + {"device", device, "/dev/video2"}, + {"image_topic", imageTopicName, "/usb_camera/image"}, + {"camera_info_topic", cameraInfoTopicName, "/usb_camera/camera_info"}, + {"watchdog_timeout", watchdogTimeout, 1.0}, + {"decode_jpeg_from_device", decodeJpegFromDevice, false}}; + + ParameterWrapper::declareParameters(this, params); /* Interfaces */ mImgPub = create_publisher(imageTopicName, 1); diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 985351c8..4a8f0895 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -68,14 +68,14 @@ namespace mrover { auto main(int argc, char** argv) -> int { rclcpp::init(argc, argv); - // DO NOT REMOVE OR ELSE REF COUNT WILL GO TO ZERO - auto imgOD = std::make_shared(); - auto stereoOD = std::make_shared(); - - rclcpp::executors::SingleThreadedExecutor executor; - executor.add_node(imgOD); - executor.add_node(stereoOD); - executor.spin(); + // DO NOT REMOVE OR ELSE REF COUNT WILL GO TO ZERO + auto imgOD = std::make_shared(); + auto stereoOD = std::make_shared(); + + rclcpp::executors::SingleThreadedExecutor executor; + executor.add_node(imgOD); + executor.add_node(stereoOD); + executor.spin(); rclcpp::shutdown(); return EXIT_SUCCESS; diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 2a544799..1e266d74 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -83,8 +83,7 @@ namespace mrover { }; class StereoObjectDetector final : public ObjectDetectorBase { - private: - + private: rclcpp::Subscription::SharedPtr mSensorSub; public: @@ -95,17 +94,16 @@ namespace mrover { auto pointCloudCallback(sensor_msgs::msg::PointCloud2::UniquePtr const& msg) -> void; }; - class ImageObjectDetector final : public ObjectDetectorBase { - private: + class ImageObjectDetector final : public ObjectDetectorBase { + private: rclcpp::Publisher::SharedPtr mTargetsPub; rclcpp::Subscription::SharedPtr mSensorSub; float mCameraHorizontalFov{}; - - public: - explicit ImageObjectDetector(); + public: + explicit ImageObjectDetector(); auto getTagBearing(cv::InputArray image, cv::Rect const& box) const -> float; diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 1c9fb1e5..92a628bf 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -177,7 +177,7 @@ namespace mrover { }); } - auto ImageObjectDetector::imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void { + auto ImageObjectDetector::imageCallback(sensor_msgs::msg::Image::UniquePtr const& msg) -> void { assert(msg); assert(msg->height > 0); assert(msg->width > 0); @@ -203,9 +203,9 @@ namespace mrover { mLoopProfiler.measureEvent("Execution"); - mrover::msg::ImageTargets targets{}; + mrover::msg::ImageTargets targets{}; for (auto const& [classId, className, confidence, box]: detections) { - mrover::msg::ImageTarget target; + mrover::msg::ImageTarget target; target.name = className; target.bearing = getTagBearing(blobSizedImage, box); targets.targets.push_back(target); diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index d1589dbd..1ed375b0 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -37,6 +37,8 @@ // Messages #include "sensor_msgs/image_encodings.hpp" #include "sensor_msgs/msg/image.hpp" +#include +#include #include #include #include @@ -45,8 +47,6 @@ #include #include #include -#include -#include #include #include From 2bb7799eea22302f49aeca499d8124996d05c687 Mon Sep 17 00:00:00 2001 From: John <134429827+jbrhm@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:01:43 -0400 Subject: [PATCH 24/25] Fixed topic names for ZED wrapper and Sim (#19) * ZED Naming conflicts * OD in the sim and on ZED * style * style --- perception/object_detector/object_detector.cpp | 2 +- perception/zed_wrapper/zed_wrapper.cpp | 10 +++++----- scripts/visualizer.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index dc7f91e6..c5848f47 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -31,7 +31,7 @@ namespace mrover { } StereoObjectDetector::StereoObjectDetector() { - mSensorSub = create_subscription("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { + mSensorSub = create_subscription("/zed/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { StereoObjectDetector::pointCloudCallback(msg); }); } diff --git a/perception/zed_wrapper/zed_wrapper.cpp b/perception/zed_wrapper/zed_wrapper.cpp index cf35ca6b..3309cbcf 100644 --- a/perception/zed_wrapper/zed_wrapper.cpp +++ b/perception/zed_wrapper/zed_wrapper.cpp @@ -18,13 +18,13 @@ namespace mrover { RCLCPP_INFO(this->get_logger(), "Created Zed Wrapper Node, %s", NODE_NAME); // Publishers - mRightImgPub = create_publisher("right/image", 1); - mLeftImgPub = create_publisher("left/image", 1); + mRightImgPub = create_publisher("zed/right/image", 1); + mLeftImgPub = create_publisher("zed/left/image", 1); mImuPub = create_publisher("zed_imu/data_raw", 1); mMagPub = create_publisher("zed_imu/mag", 1); - mPcPub = create_publisher("camera/left/points", 1); - mRightCamInfoPub = create_publisher("camera/right/camera_info", 1); - mLeftCamInfoPub = create_publisher("camera/left/camera_info", 1); + mPcPub = create_publisher("zed/left/points", 1); + mRightCamInfoPub = create_publisher("zed/right/camera_info", 1); + mLeftCamInfoPub = create_publisher("zed/left/camera_info", 1); // Declare and set Params int imageWidth{}; diff --git a/scripts/visualizer.py b/scripts/visualizer.py index 5aae7a5d..336f9e67 100755 --- a/scripts/visualizer.py +++ b/scripts/visualizer.py @@ -112,7 +112,7 @@ def paintEvent(self, event): self.resize(self.renderer.defaultSize()) self.renderer.render(painter) - def update(self): + def update(self): # type: ignore[override] rclpy.spin_once(self.viz) with self.state_machine.mutex: if self.graph is None or self.state_machine.needs_redraw: From 9b3d105a98529c1e189bc66377e125d08d483bf0 Mon Sep 17 00:00:00 2001 From: jbrhm Date: Sat, 5 Oct 2024 21:48:59 -0400 Subject: [PATCH 25/25] fixed build --- perception/object_detector/object_detector.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index 53e3821b..8ed72b0d 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -37,15 +37,11 @@ namespace mrover { } StereoObjectDetector::StereoObjectDetector() { -<<<<<<< HEAD RCLCPP_INFO_STREAM(get_logger(), "Creating Stereo Object Detector..."); mDebugImgPub = create_publisher("/stereo_object_detector/debug_img", 1); - mSensorSub = create_subscription("/camera/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { -======= mSensorSub = create_subscription("/zed/left/points", 1, [this](sensor_msgs::msg::PointCloud2::UniquePtr const& msg) { ->>>>>>> main StereoObjectDetector::pointCloudCallback(msg); }); }