diff --git a/perception/object_detector/object_detector.cpp b/perception/object_detector/object_detector.cpp index c5848f47..9260337e 100644 --- a/perception/object_detector/object_detector.cpp +++ b/perception/object_detector/object_detector.cpp @@ -4,6 +4,10 @@ namespace mrover { ObjectDetectorBase::ObjectDetectorBase() : rclcpp::Node(NODE_NAME), mLoopProfiler{get_logger()} { + std::string modelName; + float modelScoreThreshold{}; + float modelNMSThreshold{}; + std::vector params{ {"camera_frame", mCameraFrame, "zed_left_camera_frame"}, {"world_frame", mWorldFrame, "map"}, @@ -11,23 +15,28 @@ namespace mrover { {"decrement_weight", mObjDecrementWeight, 1}, {"hitcount_threshold", mObjHitThreshold, 5}, {"hitcount_max", mObjMaxHitcount, 10}, - {"model_name", mModelName, "Large-Dataset"}, - {"model_score_threshold", mModelScoreThreshold, 0.75}, - {"model_nms_threshold", mModelNmsThreshold, 0.5}}; + {"model_name", modelName, "Large-Dataset"}, + {"model_score_threshold", modelScoreThreshold, 0.75}, + {"model_nms_threshold", modelNMSThreshold, 0.5}}; ParameterWrapper::declareParameters(this, params); + // All of these variables will be invalidated after calling this function + std::filesystem::path packagePath = std::filesystem::path{ament_index_cpp::get_package_prefix("mrover")} / ".." / ".." / "src" / "mrover"; - RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << mModelName); + RCLCPP_INFO_STREAM(get_logger(), "Opening Model " << modelName); RCLCPP_INFO_STREAM(get_logger(), "Found package path " << packagePath); - mTensorRT = TensortRT{mModelName, packagePath.string()}; + // Initialize TensorRT Inference Object and Get Important Output Information + mTensorRT = TensortRT{modelName, packagePath.string()}; + + mModel = Model(modelName, {0, 0}, {"bottle", "hammer"}, mTensorRT.getInputTensorSize(), mTensorRT.getOutputTensorSize(), {modelScoreThreshold, modelNMSThreshold}, ObjectDetectorBase::preprocessYOLOv8Input, ObjectDetectorBase::parseYOLOv8Output); mDebugImgPub = create_publisher("object_detector/debug_img", 1); - RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModelName, mModelScoreThreshold, mModelNmsThreshold)); + RCLCPP_INFO_STREAM(get_logger(), std::format("Object detector initialized with model: {} and thresholds: {} and {}", mModel.modelName, modelScoreThreshold, modelNMSThreshold)); } StereoObjectDetector::StereoObjectDetector() { diff --git a/perception/object_detector/object_detector.hpp b/perception/object_detector/object_detector.hpp index 28ebe502..08893b24 100644 --- a/perception/object_detector/object_detector.hpp +++ b/perception/object_detector/object_detector.hpp @@ -7,36 +7,57 @@ namespace mrover { class ObjectDetectorBase : public rclcpp::Node { protected: + struct Model { + std::string modelName; + + std::vector objectHitCounts; + + std::vector classes; + + std::vector inputTensorSize; + + std::vector outputTensorSize; + + // Additional space for params that one algorithm might need and not others + std::vector buffer; + + // Converts an rgb image to a blob + std::function rbgImageToBlob; + + // Converts an output tensor into a vector of detections + std::function&)> outputTensorToDetections; + + Model() = default; + + Model(std::string _modelName, std::vector _objectHitCounts, std::vector _classes, std::vector _inputTensorSize, std::vector _outputTensorSize, std::vector _buffer, std::function _rbgImageToBlob, std::function&)> _outputTensorToDetections) : modelName{std::move(_modelName)}, objectHitCounts(std::move(_objectHitCounts)), classes(std::move(_classes)), inputTensorSize(std::move(_inputTensorSize)), outputTensorSize(std::move(_outputTensorSize)), buffer(std::move(_buffer)), rbgImageToBlob{std::move(_rbgImageToBlob)}, outputTensorToDetections{std::move(_outputTensorToDetections)} {} + }; + static constexpr char const* NODE_NAME = "object_detector"; std::unique_ptr mTfBuffer = std::make_unique(get_clock()); std::shared_ptr mTfBroadcaster = std::make_shared(this); std::shared_ptr mTfListener = std::make_shared(*mTfBuffer); + Model mModel; + std::string mCameraFrame; std::string mWorldFrame; - std::string mModelName; + cv::Mat mRgbImage, mImageBlob; LoopProfiler mLoopProfiler; TensortRT mTensorRT; - cv::Mat mRgbImage, mImageBlob; sensor_msgs::msg::Image mDetectionsImageMessage; rclcpp::Publisher::SharedPtr mDebugImgPub; rclcpp::Subscription::SharedPtr mSensorSub; - // TODO(quintin): Do not hard code exactly two classes - std::vector mObjectHitCounts{0, 0}; - int mObjIncrementWeight{}; int mObjDecrementWeight{}; int mObjHitThreshold{}; int mObjMaxHitcount{}; - float mModelScoreThreshold{}; - float mModelNmsThreshold{}; auto spiralSearchForValidPoint(sensor_msgs::msg::PointCloud2::UniquePtr const& cloudPtr, std::size_t u, std::size_t v, @@ -50,6 +71,12 @@ namespace mrover { static auto drawDetectionBoxes(cv::InputOutputArray image, std::span detections) -> void; + auto static parseYOLOv8Output(Model const& model, + cv::Mat& output, + std::vector& detections) -> void; + + auto static preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void; + public: explicit ObjectDetectorBase(); diff --git a/perception/object_detector/object_detector.parsing.cpp b/perception/object_detector/object_detector.parsing.cpp new file mode 100644 index 00000000..dc7d7167 --- /dev/null +++ b/perception/object_detector/object_detector.parsing.cpp @@ -0,0 +1,99 @@ +#include "object_detector.hpp" +namespace mrover { + auto ObjectDetectorBase::preprocessYOLOv8Input(Model const& model, cv::Mat& rgbImage, cv::Mat& blobSizedImage, cv::Mat& blob) -> void { + if (model.inputTensorSize.size() != 4) { + throw std::runtime_error("Expected Blob Size to be of size 4, are you using the correct model type?"); + } + + if (model.buffer.size() != 2) { + throw std::runtime_error("Expected 2 additional parameters!"); + } + + static constexpr double UCHAR_TO_DOUBLE = 1.0 / 255.0; + + cv::Size blobSize{static_cast(model.inputTensorSize[2]), static_cast(model.inputTensorSize[3])}; + cv::resize(rgbImage, blobSizedImage, blobSize); + cv::dnn::blobFromImage(blobSizedImage, blob, UCHAR_TO_DOUBLE, blobSize, cv::Scalar{}, false, false); + } + + auto ObjectDetectorBase::parseYOLOv8Output(Model const& model, cv::Mat& output, std::vector& detections) -> void { + // Parse model specific dimensioning from the output + + // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows + if (output.cols <= output.rows) { + std::array message{}; + snprintf(message.data(), message.size(), "Something is wrong model interpretation, %d cols and %d rows", output.cols, output.rows); + throw std::runtime_error(message.data()); + } + + int rows = output.cols; + int dimensions = output.rows; + + // The output of the model is a batchSizex84x8400 matrix + // This converts the model to a TODO: Check this dimensioning + output = output.reshape(1, dimensions); + cv::transpose(output, output); + + // This function expects the image to already be in the correct format thus no distrotion is needed + float const xFactor = 1.0; + float const yFactor = 1.0; + + // Intermediate Storage Containers + std::vector classIds; + std::vector confidences; + std::vector boxes; + + // Each row of the output is a detection with a bounding box and associated class scores + for (int r = 0; r < rows; ++r) { + // Skip first four values as they are the box data + cv::Mat scores = output.row(r).colRange(4, dimensions); + + cv::Point classId; + double maxClassScore; + cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); + + // Determine if the class is an acceptable confidence level, otherwise disregard + if (maxClassScore <= model.buffer[0]) continue; + + confidences.push_back(static_cast(maxClassScore)); + classIds.push_back(classId.x); + + // Get the bounding box data + cv::Mat box = output.row(r).colRange(0, 4); + auto x = box.at(0); + auto y = box.at(1); + auto w = box.at(2); + auto h = box.at(3); + + // Cast the corners into integers to be used on pixels + auto left = static_cast((x - 0.5 * w) * xFactor); + auto top = static_cast((y - 0.5 * h) * yFactor); + auto width = static_cast(w * xFactor); + auto height = static_cast(h * yFactor); + + // Push abck the box into storage + boxes.emplace_back(left, top, width, height); + } + + //Coalesce the boxes into a smaller number of distinct boxes + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, model.buffer[0], model.buffer[1], nmsResult); + + // Fill in the output Detections Vector + for (int i: nmsResult) { + Detection result; + + //Fill in the id and confidence for the class + result.classId = classIds[i]; + result.confidence = confidences[i]; + + //Fill in the class name and box information + result.className = model.classes[result.classId]; + result.box = boxes[i]; + + //Push back the detection into the for storagevector + detections.push_back(result); + } + } + +}; // namespace mrover diff --git a/perception/object_detector/object_detector.processing.cpp b/perception/object_detector/object_detector.processing.cpp index 4be823b4..29a3b9ba 100644 --- a/perception/object_detector/object_detector.processing.cpp +++ b/perception/object_detector/object_detector.processing.cpp @@ -14,19 +14,26 @@ namespace mrover { RCLCPP_INFO_STREAM(get_logger(), std::format("Image size changed from [{}, {}] to [{}, {}]", mRgbImage.cols, mRgbImage.rows, msg->width, msg->height)); mRgbImage = cv::Mat{static_cast(msg->height), static_cast(msg->width), CV_8UC3, cv::Scalar{0, 0, 0, 0}}; } + + // if 0x0 image + if (!mRgbImage.total()) { + return; + } + convertPointCloudToRGB(msg, mRgbImage); - // TODO(quintin): Avoid hard coding blob size - cv::Size blobSize{640, 640}; + // Convert the RGB Image into the blob Image format cv::Mat blobSizedImage; - cv::resize(mRgbImage, blobSizedImage, blobSize); - cv::dnn::blobFromImage(blobSizedImage, mImageBlob, 1.0 / 255.0, blobSize, cv::Scalar{}, false, false); + mModel.rbgImageToBlob(mModel, mRgbImage, blobSizedImage, mImageBlob); mLoopProfiler.measureEvent("Conversion"); // Run the blob through the model std::vector detections{}; - mTensorRT.modelForwardPass(mImageBlob, detections, mModelScoreThreshold, mModelNmsThreshold); + cv::Mat outputTensor; + mTensorRT.modelForwardPass(mImageBlob, outputTensor); + + mModel.outputTensorToDetections(mModel, outputTensor, detections); mLoopProfiler.measureEvent("Execution"); @@ -66,10 +73,10 @@ namespace mrover { // Push the immediate detections to the camera frame SE3Conversions::pushToTfTree(*mTfBroadcaster, objectImmediateFrame, mCameraFrame, objectInCamera.value(), get_clock()->now()); // Since the object is seen we need to increment the hit counter - mObjectHitCounts[classId] = std::min(mObjMaxHitcount, mObjectHitCounts[classId] + mObjIncrementWeight); + mModel.objectHitCounts[classId] = std::min(mObjMaxHitcount, mModel.objectHitCounts[classId] + mObjIncrementWeight); // Only publish to permament if we are confident in the object - if (mObjectHitCounts[classId] > mObjHitThreshold) { + if (mModel.objectHitCounts[classId] > mObjHitThreshold) { std::string objectPermanentFrame = className; // Grab the object inside of the camera frame and push it into the map frame SE3d objectInMap = SE3Conversions::fromTfTree(*mTfBuffer, objectImmediateFrame, mWorldFrame); @@ -90,7 +97,7 @@ namespace mrover { if (seenObjects[i]) continue; assert(i < mObjectHitCounts.size()); - mObjectHitCounts[i] = std::max(0, mObjectHitCounts[i] - mObjDecrementWeight); + mModel.objectHitCounts[i] = std::max(0, mModel.objectHitCounts[i] - mObjDecrementWeight); } } diff --git a/perception/object_detector/pch.hpp b/perception/object_detector/pch.hpp index 7ab5af34..ea7badcf 100644 --- a/perception/object_detector/pch.hpp +++ b/perception/object_detector/pch.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index 17bda4c3..6b4463d5 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -11,29 +11,60 @@ using namespace nvinfer1; * Requires bindings, inputTensor, stream * Modifies stream, outputTensor */ -constexpr static auto INPUT_BINDING_NAME = "images"; -constexpr static auto OUTPUT_BINDING_NAME = "output0"; -Inference::Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString) : mPackagePath{std::move(packagePathString)} { - mModelPath = onnxModelPath.string(); +Inference::Inference(std::string modelName, std::string packagePathString) : mModelName{std::move(modelName)}, mPackagePath{std::move(packagePathString)}{ + + //Create ONNX and engine file paths + + mLogger.log(ILogger::Severity::kINFO, mModelName.c_str()); + mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str()); + mONNXModelPath = std::filesystem::path{packagePathString} / "data" / std::string(mModelName + ".onnx"); + mEngineModelPath = std::filesystem::path{packagePathString} / "data" / std::string("tensorrt-engine-" + mModelName + ".engine");; + + std::array message{}; + std::snprintf(message.data(), message.size(), "Reading from ONNX model at %s and creating TensorRT engine at %s", mONNXModelPath.c_str(), mEngineModelPath.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); // Create the engine object from either the file or from onnx file - mEngine = std::unique_ptr{createCudaEngine(onnxModelPath, modelName)}; + mEngine = std::unique_ptr{createCudaEngine()}; if (!mEngine) throw std::runtime_error("Failed to create CUDA engine"); mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine"); // Check some assumptions about the model if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count"); - if (mEngine->getTensorIOMode(INPUT_BINDING_NAME) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); - if (mEngine->getTensorIOMode(OUTPUT_BINDING_NAME) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Input Binding Input To Be 1"); + + // Store the IO Tensor Names + mInputTensorName = mEngine->getIOTensorName(0); + mOutputTensorName = mEngine->getIOTensorName(1); + + if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); + if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1"); + + // Be verbose about the input tensor size + auto inputTensorSize = getInputTensorSize(); + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < inputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + // Be verbose about the input tensor size + auto outputTensorSize = getInputTensorSize(); + std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mOutputTensorName.c_str()); + mLogger.log(ILogger::Severity::kINFO, message.data()); + for(size_t i = 0; i < outputTensorSize.size(); ++i){ + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } createExecutionContext(); prepTensors(); } -auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine* { +auto Inference::createCudaEngine() -> ICudaEngine* { constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); std::unique_ptr builder{createInferBuilder(mLogger)}; @@ -52,17 +83,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std if (!config) throw std::runtime_error("Failed to create Builder Config"); mLogger.log(ILogger::Severity::kINFO, "Created Builder Config"); - if (!parser->parseFromFile(onnxModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { + if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { throw std::runtime_error("Failed to parse ONNX file"); } IRuntime* runtime = createInferRuntime(mLogger); // Define the engine file location relative to the mrover package - std::filesystem::path packagePath{mPackagePath}; - std::filesystem::path enginePath = packagePath / "data" / std::string("tensorrt-engine-").append(modelName).append(".engine"); // Check if engine file exists - if (!exists(enginePath)) { + if (!exists(mEngineModelPath)) { std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl; // Create the Engine from onnx file @@ -75,15 +104,15 @@ auto Inference::createCudaEngine(std::filesystem::path const& onnxModelPath, std // Save Engine to File auto trtModelStream = tempEng->serialize(); - std::ofstream outputFileStream{enginePath, std::ios::binary}; - outputFileStream.write(static_cast(trtModelStream->data()), trtModelStream->size()); + std::ofstream outputFileStream{mEngineModelPath, std::ios::binary}; + outputFileStream.write(static_cast(trtModelStream->data()), static_cast(trtModelStream->size())); outputFileStream.close(); return tempEng; } // Load engine from file - std::ifstream inputFileStream{enginePath, std::ios::binary}; + std::ifstream inputFileStream{mEngineModelPath, std::ios::binary}; std::stringstream engineBuffer; // Stream in the engine file to the buffer @@ -99,7 +128,7 @@ auto Inference::createExecutionContext() -> void { if (!mContext) throw std::runtime_error("Failed to create execution context"); // Set up the input tensor sizing - mContext->setInputShape(INPUT_BINDING_NAME, mEngine->getTensorShape(INPUT_BINDING_NAME)); + mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str())); } auto Inference::doDetections(cv::Mat const& img) const -> void { @@ -144,7 +173,6 @@ auto Inference::prepTensors() -> void { for(int32_t i = 0; i < rank; ++i){ size *= extents[i]; } - std::cout << tensorName << " is getting allocated to size " << size << std::endl; // Create GPU memory for TensorRT to operate on if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess) @@ -153,7 +181,8 @@ auto Inference::prepTensors() -> void { assert(mContext); // Create an appropriately sized output tensor - auto const [nbDims, d] = mEngine->getTensorShape(OUTPUT_BINDING_NAME); + // TODO: Fix this + auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); for (int i = 0; i < nbDims; i++) { std::array message; std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); @@ -170,3 +199,28 @@ auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { // Returns the id for the input tensor return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise } + + +auto Inference::getInputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mInputTensorName.c_str()); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); + } + + return inputBlobSize; +} + +auto Inference::getOutputTensorSize() -> std::vector{ + auto dims = mEngine->getTensorShape(mOutputTensorName.c_str()); + std::vector inputBlobSize; + inputBlobSize.reserve(dims.nbDims); + + for(int32_t i = 0; i < dims.nbDims; ++i){ + inputBlobSize.push_back(dims.d[i]); + } + + return inputBlobSize; +} diff --git a/tensorrt/inference.cuh b/tensorrt/inference.cuh index d0b65469..0df14ef0 100644 --- a/tensorrt/inference.cuh +++ b/tensorrt/inference.cuh @@ -11,9 +11,12 @@ using nvinfer1::IExecutionContext; class Inference { - std::string mModelPath; + std::string mModelName; std::string mPackagePath; + std::filesystem::path mONNXModelPath; + std::filesystem::path mEngineModelPath; + nvinfer1::Logger mLogger; std::unique_ptr mEngine{}; @@ -21,7 +24,9 @@ class Inference { std::unique_ptr mContext{}; cv::Mat mInputTensor; + std::string mInputTensorName; cv::Mat mOutputTensor; + std::string mOutputTensorName; std::array mBindings{}; @@ -35,7 +40,7 @@ class Inference { * * Uses the file path to locate the onnx model and the modelName to locate the engine file */ - auto createCudaEngine(std::filesystem::path const& onnxModelPath, std::string const& modelName) -> ICudaEngine*; + auto createCudaEngine() -> ICudaEngine*; /** * @brief Performs the Model forward pass and allocates the reults in the output tensor @@ -61,7 +66,7 @@ class Inference { auto createExecutionContext() -> void; public: - explicit Inference(std::filesystem::path const& onnxModelPath, std::string const& modelName, std::string packagePathString); + explicit Inference(std::string modelName, std::string packagePathString); /** * @brief Runs the forward pass on the given input image in CNN format @@ -72,4 +77,14 @@ public: * @brief Retrieves the mat where the output from the forward pass was stored */ auto getOutputTensor() -> cv::Mat; + + /** + * @brief Retrives the expected input tensor size + */ + auto getInputTensorSize() -> std::vector; + + /** + * @brief Retrives the expected input tensor size + */ + auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index d5849095..abd1a897 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -12,8 +12,8 @@ using namespace nvinfer1; * Modifies stream, outputTensor */ -InferenceWrapper::InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath) { - mInference.reset(new Inference(std::move(onnxModelPath), modelName, packagePath)); +InferenceWrapper::InferenceWrapper(std::string const& modelName, std::string const& packagePath) { + mInference = std::make_shared(modelName, packagePath); } auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { @@ -24,3 +24,11 @@ auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { auto InferenceWrapper::getOutputTensor() const -> cv::Mat { return mInference->getOutputTensor(); } + +auto InferenceWrapper::getInputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); +} + +auto InferenceWrapper::getOutputTensorSize() -> std::vector{ + return mInference->getInputTensorSize(); +} diff --git a/tensorrt/inference_wrapper.hpp b/tensorrt/inference_wrapper.hpp index f9d83b99..d6e15a36 100644 --- a/tensorrt/inference_wrapper.hpp +++ b/tensorrt/inference_wrapper.hpp @@ -12,10 +12,14 @@ class InferenceWrapper { ~InferenceWrapper() = default; - explicit InferenceWrapper(std::string onnxModelPath, std::string const& modelName, std::string const& packagePath); + explicit InferenceWrapper(std::string const& modelName, std::string const& packagePath); auto doDetections(cv::Mat const& img) const -> void; // Retrieve the output tensor from the previous forward pass [[nodiscard]] auto getOutputTensor() const -> cv::Mat; + + [[nodiscard]] auto getInputTensorSize() -> std::vector; + + [[nodiscard]] auto getOutputTensorSize() -> std::vector; }; diff --git a/tensorrt/tensorrt.cpp b/tensorrt/tensorrt.cpp index 287b3f2a..2c4dc3ff 100644 --- a/tensorrt/tensorrt.cpp +++ b/tensorrt/tensorrt.cpp @@ -5,96 +5,22 @@ using namespace std; TensortRT::TensortRT() = default; TensortRT::TensortRT(string modelName, string packagePathString) : mModelName{std::move(modelName)} { - std::filesystem::path packagePath{std::move(packagePathString)}; - std::filesystem::path modelFileName = mModelName.append(".onnx"); - std::filesystem::path modelPath = packagePath / "data" / modelFileName; - mInferenceWrapper = InferenceWrapper{modelPath, mModelName, packagePath}; + mInferenceWrapper = InferenceWrapper{mModelName, packagePath}; } TensortRT::~TensortRT() = default; -auto TensortRT::modelForwardPass(cv::Mat const& blob, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - mInferenceWrapper.doDetections(blob); - cv::Mat output = mInferenceWrapper.getOutputTensor(); - parseModelOutput(output, detections, modelScoreThreshold, modelNMSThreshold); +auto TensortRT::modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void { + mInferenceWrapper.doDetections(inputBlobTensor); + outputTensor = mInferenceWrapper.getOutputTensor(); } -auto TensortRT::parseModelOutput(cv::Mat& output, std::vector& detections, float modelScoreThreshold, float modelNMSThreshold) const -> void { - // Parse model specific dimensioning from the output - - // The input to this function is expecting a YOLOv8 style model, thus the dimensions should be > rows - if (output.cols <= output.rows) { - throw std::runtime_error("Something is wrong Model with interpretation"); - } - - int rows = output.cols; - int dimensions = output.rows; - - // The output of the model is a batchSizex84x8400 matrix - // This converts the model to a TODO: Check this dimensioning - output = output.reshape(1, dimensions); - cv::transpose(output, output); - - // This function expects the image to already be in the correct format thus no distrotion is needed - float const xFactor = 1.0; - float const yFactor = 1.0; - - // Intermediate Storage Containers - std::vector classIds; - std::vector confidences; - std::vector boxes; - - // Each row of the output is a detection with a bounding box and associated class scores - for (int r = 0; r < rows; ++r) { - // Skip first four values as they are the box data - cv::Mat scores = output.row(r).colRange(4, dimensions); - - cv::Point classId; - double maxClassScore; - cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &classId); - - // Determine if the class is an acceptable confidence level, otherwise disregard - if (maxClassScore <= modelScoreThreshold) continue; - - confidences.push_back(static_cast(maxClassScore)); - classIds.push_back(classId.x); - - // Get the bounding box data - cv::Mat box = output.row(r).colRange(0, 4); - auto x = box.at(0); - auto y = box.at(1); - auto w = box.at(2); - auto h = box.at(3); - - // Cast the corners into integers to be used on pixels - auto left = static_cast((x - 0.5 * w) * xFactor); - auto top = static_cast((y - 0.5 * h) * yFactor); - auto width = static_cast(w * xFactor); - auto height = static_cast(h * yFactor); - - // Push abck the box into storage - boxes.emplace_back(left, top, width, height); - } - - //Coalesce the boxes into a smaller number of distinct boxes - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nmsResult); - - // Fill in the output Detections Vector - for (int i: nmsResult) { - Detection result; - - //Fill in the id and confidence for the class - result.classId = classIds[i]; - result.confidence = confidences[i]; - - //Fill in the class name and box information - result.className = classes[result.classId]; - result.box = boxes[i]; +auto TensortRT::getInputTensorSize() -> std::vector{ + return mInferenceWrapper.getInputTensorSize(); +} - //Push back the detection into the for storagevector - detections.push_back(result); - } +auto TensortRT::getOutputTensorSize() -> std::vector{ + return mInferenceWrapper.getOutputTensorSize(); } diff --git a/tensorrt/tensorrt.hpp b/tensorrt/tensorrt.hpp index 6d695636..dd45596b 100644 --- a/tensorrt/tensorrt.hpp +++ b/tensorrt/tensorrt.hpp @@ -12,15 +12,8 @@ struct Detection { class TensortRT { std::string mModelName; - std::vector classes{"bottle", "hammer"}; - InferenceWrapper mInferenceWrapper; - auto parseModelOutput(cv::Mat& output, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; - public: TensortRT(); @@ -28,8 +21,9 @@ class TensortRT { ~TensortRT(); - auto modelForwardPass(cv::Mat const& blob, - std::vector& detections, - float modelScoreThreshold = 0.75, - float modelNMSThreshold = 0.5) const -> void; + auto modelForwardPass(cv::Mat const& inputBlobTensor, cv::Mat& outputTensor) const -> void; + + [[nodiscard]] auto getInputTensorSize() -> std::vector; + + [[nodiscard]] auto getOutputTensorSize() -> std::vector; };