From fd1be80a84e1d20428c9480ba7afed604ba1ee7b Mon Sep 17 00:00:00 2001 From: jbrhm Date: Tue, 26 Nov 2024 10:34:42 -0500 Subject: [PATCH] fixed formatting --- tensorrt/inference.cu | 224 ++++++++++++++++----------------- tensorrt/inference_wrapper.cpp | 20 +-- 2 files changed, 122 insertions(+), 122 deletions(-) diff --git a/tensorrt/inference.cu b/tensorrt/inference.cu index 0e288c79..eb2673ba 100644 --- a/tensorrt/inference.cu +++ b/tensorrt/inference.cu @@ -5,19 +5,19 @@ using namespace nvinfer1; #include /** -* cudaMemcpys CPU memory in inputTensor to GPU based on bindings -* Queues that tensor to be passed through model -* cudaMemcpys the result back to CPU memory -* Requires bindings, inputTensor, stream -* Modifies stream, outputTensor -*/ + * cudaMemcpys CPU memory in inputTensor to GPU based on bindings + * Queues that tensor to be passed through model + * cudaMemcpys the result back to CPU memory + * Requires bindings, inputTensor, stream + * Modifies stream, outputTensor + */ Inference::Inference(std::string modelName, std::string packagePathString) : mModelName{std::move(modelName)}, mPackagePath{std::move(packagePathString)}{ //Create ONNX and engine file paths - mLogger.log(ILogger::Severity::kINFO, mModelName.c_str()); - mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str()); + mLogger.log(ILogger::Severity::kINFO, mModelName.c_str()); + mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str()); mONNXModelPath = std::filesystem::path{packagePathString} / "data" / std::string(mModelName + ".onnx"); mEngineModelPath = std::filesystem::path{packagePathString} / "data" / std::string("tensorrt-engine-" + mModelName + ".engine");; @@ -25,29 +25,29 @@ Inference::Inference(std::string modelName, std::string packagePathString) : mM std::snprintf(message.data(), message.size(), "Reading from ONNX model at %s and creating TensorRT engine at %s", mONNXModelPath.c_str(), mEngineModelPath.c_str()); mLogger.log(ILogger::Severity::kINFO, message.data()); - // Create the engine object from either the file or from onnx file - mEngine = std::unique_ptr{createCudaEngine()}; - if (!mEngine) throw std::runtime_error("Failed to create CUDA engine"); + // Create the engine object from either the file or from onnx file + mEngine = std::unique_ptr{createCudaEngine()}; + if (!mEngine) throw std::runtime_error("Failed to create CUDA engine"); - mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine"); + mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine"); - // Check some assumptions about the model - if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count"); + // Check some assumptions about the model + if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count"); // Store the IO Tensor Names mInputTensorName = mEngine->getIOTensorName(0); mOutputTensorName = mEngine->getIOTensorName(1); - if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); - if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1"); + if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input"); + if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1"); // Be verbose about the input tensor size auto inputTensorSize = getInputTensorSize(); std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str()); - mLogger.log(ILogger::Severity::kINFO, message.data()); + mLogger.log(ILogger::Severity::kINFO, message.data()); for(size_t i = 0; i < inputTensorSize.size(); ++i){ - std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]); - mLogger.log(ILogger::Severity::kINFO, message.data()); + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); } // Be verbose about the input tensor size @@ -55,150 +55,150 @@ Inference::Inference(std::string modelName, std::string packagePathString) : mM std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mOutputTensorName.c_str()); mLogger.log(ILogger::Severity::kINFO, message.data()); for(size_t i = 0; i < outputTensorSize.size(); ++i){ - std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]); - mLogger.log(ILogger::Severity::kINFO, message.data()); + std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); } - createExecutionContext(); + createExecutionContext(); - prepTensors(); + prepTensors(); } auto Inference::createCudaEngine() -> ICudaEngine* { mLogger.log(ILogger::Severity::kINFO, "Creating engine building tools..."); - constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + constexpr auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - IBuilder* builder = createInferBuilder(mLogger); - if (!builder) throw std::runtime_error("Failed to create Infer Builder"); - mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder"); + IBuilder* builder = createInferBuilder(mLogger); + if (!builder) throw std::runtime_error("Failed to create Infer Builder"); + mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder"); - std::unique_ptr network{builder->createNetworkV2(explicitBatch)}; - if (!network) throw std::runtime_error("Failed to create Network Definition"); - mLogger.log(ILogger::Severity::kINFO, "Created Network Definition"); + std::unique_ptr network{builder->createNetworkV2(explicitBatch)}; + if (!network) throw std::runtime_error("Failed to create Network Definition"); + mLogger.log(ILogger::Severity::kINFO, "Created Network Definition"); - std::unique_ptr parser{nvonnxparser::createParser(*network, mLogger)}; - if (!parser) throw std::runtime_error("Failed to create ONNX Parser"); - mLogger.log(ILogger::Severity::kINFO, "Created ONNX Parser"); + std::unique_ptr parser{nvonnxparser::createParser(*network, mLogger)}; + if (!parser) throw std::runtime_error("Failed to create ONNX Parser"); + mLogger.log(ILogger::Severity::kINFO, "Created ONNX Parser"); - std::unique_ptr config{builder->createBuilderConfig()}; - if (!config) throw std::runtime_error("Failed to create Builder Config"); - mLogger.log(ILogger::Severity::kINFO, "Created Builder Config"); + std::unique_ptr config{builder->createBuilderConfig()}; + if (!config) throw std::runtime_error("Failed to create Builder Config"); + mLogger.log(ILogger::Severity::kINFO, "Created Builder Config"); - if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { - throw std::runtime_error("Failed to parse ONNX file"); - } + if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast(ILogger::Severity::kINFO))) { + throw std::runtime_error("Failed to parse ONNX file"); + } - IRuntime* runtime = createInferRuntime(mLogger); + IRuntime* runtime = createInferRuntime(mLogger); - // Define the engine file location relative to the mrover package - // Check if engine file exists - if (!exists(mEngineModelPath)) { + // Define the engine file location relative to the mrover package + // Check if engine file exists + if (!exists(mEngineModelPath)) { std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl; - // Create the Engine from onnx file - IHostMemory* serializedEngine = builder->buildSerializedNetwork(*network, *config); - if (!serializedEngine) throw std::runtime_error("Failed to serialize engine"); + // Create the Engine from onnx file + IHostMemory* serializedEngine = builder->buildSerializedNetwork(*network, *config); + if (!serializedEngine) throw std::runtime_error("Failed to serialize engine"); - // Create temporary engine for serializing - ICudaEngine* tempEng = runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size()); - if (!tempEng) throw std::runtime_error("Failed to create temporary engine"); + // Create temporary engine for serializing + ICudaEngine* tempEng = runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size()); + if (!tempEng) throw std::runtime_error("Failed to create temporary engine"); - // Save Engine to File - auto trtModelStream = tempEng->serialize(); - std::ofstream outputFileStream{mEngineModelPath, std::ios::binary}; - outputFileStream.write(static_cast(trtModelStream->data()), static_cast(trtModelStream->size())); - outputFileStream.close(); + // Save Engine to File + auto trtModelStream = tempEng->serialize(); + std::ofstream outputFileStream{mEngineModelPath, std::ios::binary}; + outputFileStream.write(static_cast(trtModelStream->data()), static_cast(trtModelStream->size())); + outputFileStream.close(); - return tempEng; - } + return tempEng; + } - // Load engine from file - std::ifstream inputFileStream{mEngineModelPath, std::ios::binary}; - std::stringstream engineBuffer; + // Load engine from file + std::ifstream inputFileStream{mEngineModelPath, std::ios::binary}; + std::stringstream engineBuffer; - // Stream in the engine file to the buffer - engineBuffer << inputFileStream.rdbuf(); - std::string enginePlan = engineBuffer.str(); - // Deserialize the Cuda Engine file from the buffer - return runtime->deserializeCudaEngine(enginePlan.data(), enginePlan.size()); + // Stream in the engine file to the buffer + engineBuffer << inputFileStream.rdbuf(); + std::string enginePlan = engineBuffer.str(); + // Deserialize the Cuda Engine file from the buffer + return runtime->deserializeCudaEngine(enginePlan.data(), enginePlan.size()); } auto Inference::createExecutionContext() -> void { - // Create Execution Context - mContext.reset(mEngine->createExecutionContext()); - if (!mContext) throw std::runtime_error("Failed to create execution context"); + // Create Execution Context + mContext.reset(mEngine->createExecutionContext()); + if (!mContext) throw std::runtime_error("Failed to create execution context"); - // Set up the input tensor sizing - mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str())); + // Set up the input tensor sizing + mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str())); } auto Inference::doDetections(cv::Mat const& img) const -> void { - // Do the forward pass on the network - launchInference(img, mOutputTensor); + // Do the forward pass on the network + launchInference(img, mOutputTensor); } auto Inference::getOutputTensor() -> cv::Mat { - return mOutputTensor; + return mOutputTensor; } auto Inference::launchInference(cv::Mat const& input, cv::Mat const& output) const -> void { - //Assert these items have been initialized - assert(!input.empty()); - assert(!output.empty()); - assert(input.isContinuous()); - assert(output.isContinuous()); - assert(mContext); + //Assert these items have been initialized + assert(!input.empty()); + assert(!output.empty()); + assert(input.isContinuous()); + assert(output.isContinuous()); + assert(mContext); - // Get the binding id for the input tensor - int inputId = getBindingInputIndex(mContext.get()); + // Get the binding id for the input tensor + int inputId = getBindingInputIndex(mContext.get()); - // Memcpy the input tensor from the host to the gpu - cudaMemcpy(mBindings[inputId], input.data, input.total() * input.elemSize(), cudaMemcpyHostToDevice); + // Memcpy the input tensor from the host to the gpu + cudaMemcpy(mBindings[inputId], input.data, input.total() * input.elemSize(), cudaMemcpyHostToDevice); - // Execute the model on the gpu - mContext->executeV2(mBindings.data()); + // Execute the model on the gpu + mContext->executeV2(mBindings.data()); - // Memcpy the output tensor from the gpu to the host - cudaMemcpy(output.data, mBindings[1 - inputId], output.total() * output.elemSize(), cudaMemcpyDeviceToHost); + // Memcpy the output tensor from the gpu to the host + cudaMemcpy(output.data, mBindings[1 - inputId], output.total() * output.elemSize(), cudaMemcpyDeviceToHost); } auto Inference::prepTensors() -> void { - // Assign the properties to the input and output tensors - for (int i = 0; i < mEngine->getNbIOTensors(); i++) { - char const* tensorName = mEngine->getIOTensorName(i); - auto [rank, extents] = mEngine->getTensorShape(tensorName); - - // Multiply sizeof(float) by the product of the extents - // This is essentially: element count * size of each element - std::size_t size = 1; + // Assign the properties to the input and output tensors + for (int i = 0; i < mEngine->getNbIOTensors(); i++) { + char const* tensorName = mEngine->getIOTensorName(i); + auto [rank, extents] = mEngine->getTensorShape(tensorName); + + // Multiply sizeof(float) by the product of the extents + // This is essentially: element count * size of each element + std::size_t size = 1; for(int32_t i = 0; i < rank; ++i){ size *= extents[i]; } // Create GPU memory for TensorRT to operate on - if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess) - throw std::runtime_error{"Failed to allocate GPU memory: " + std::string{cudaGetErrorString(result)}}; - } + if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess) + throw std::runtime_error{"Failed to allocate GPU memory: " + std::string{cudaGetErrorString(result)}}; + } - assert(mContext); - // Create an appropriately sized output tensor + assert(mContext); + // Create an appropriately sized output tensor // TODO: Fix this - auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); - for (int i = 0; i < nbDims; i++) { - std::array message; - std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); - mLogger.log(ILogger::Severity::kINFO, message.data()); - } - - // Create the mat wrapper around the output matrix for ease of use - assert(nbDims == 3); - assert(d[0] == 1); - mOutputTensor = cv::Mat::zeros(d[1], d[2], CV_32FC1); + auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str()); + for (int i = 0; i < nbDims; i++) { + std::array message; + std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]); + mLogger.log(ILogger::Severity::kINFO, message.data()); + } + + // Create the mat wrapper around the output matrix for ease of use + assert(nbDims == 3); + assert(d[0] == 1); + mOutputTensor = cv::Mat::zeros(d[1], d[2], CV_32FC1); } auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int { - // Returns the id for the input tensor - return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise + // Returns the id for the input tensor + return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise } diff --git a/tensorrt/inference_wrapper.cpp b/tensorrt/inference_wrapper.cpp index 54f5c87d..e8356f7a 100644 --- a/tensorrt/inference_wrapper.cpp +++ b/tensorrt/inference_wrapper.cpp @@ -5,24 +5,24 @@ using namespace nvinfer1; /** -* cudaMemcpys CPU memory in inputTensor to GPU based on bindings -* Queues that tensor to be passed through model -* cudaMemcpys the result back to CPU memory -* Requires bindings, inputTensor, stream -* Modifies stream, outputTensor -*/ + * cudaMemcpys CPU memory in inputTensor to GPU based on bindings + * Queues that tensor to be passed through model + * cudaMemcpys the result back to CPU memory + * Requires bindings, inputTensor, stream + * Modifies stream, outputTensor + */ InferenceWrapper::InferenceWrapper(std::string const& modelName, std::string const& packagePath) { - mInference = std::make_shared(modelName, packagePath); + mInference = std::make_shared(modelName, packagePath); } auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void { - // Execute the forward pass on the inference object - mInference->doDetections(img); + // Execute the forward pass on the inference object + mInference->doDetections(img); } auto InferenceWrapper::getOutputTensor() const -> cv::Mat { - return mInference->getOutputTensor(); + return mInference->getOutputTensor(); } auto InferenceWrapper::getInputTensorSize() -> std::vector{