Skip to content

Commit

Permalink
fixed formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrhm committed Nov 26, 2024
1 parent f575c3a commit fd1be80
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 122 deletions.
224 changes: 112 additions & 112 deletions tensorrt/inference.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,200 +5,200 @@ using namespace nvinfer1;
#include <NvOnnxParser.h>

/**
* cudaMemcpys CPU memory in inputTensor to GPU based on bindings
* Queues that tensor to be passed through model
* cudaMemcpys the result back to CPU memory
* Requires bindings, inputTensor, stream
* Modifies stream, outputTensor
*/
* cudaMemcpys CPU memory in inputTensor to GPU based on bindings
* Queues that tensor to be passed through model
* cudaMemcpys the result back to CPU memory
* Requires bindings, inputTensor, stream
* Modifies stream, outputTensor
*/

Inference::Inference(std::string modelName, std::string packagePathString) : mModelName{std::move(modelName)}, mPackagePath{std::move(packagePathString)}{

//Create ONNX and engine file paths

mLogger.log(ILogger::Severity::kINFO, mModelName.c_str());
mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str());
mLogger.log(ILogger::Severity::kINFO, mModelName.c_str());
mLogger.log(ILogger::Severity::kINFO, mPackagePath.c_str());
mONNXModelPath = std::filesystem::path{packagePathString} / "data" / std::string(mModelName + ".onnx");
mEngineModelPath = std::filesystem::path{packagePathString} / "data" / std::string("tensorrt-engine-" + mModelName + ".engine");;

std::array<char, 150> message{};
std::snprintf(message.data(), message.size(), "Reading from ONNX model at %s and creating TensorRT engine at %s", mONNXModelPath.c_str(), mEngineModelPath.c_str());
mLogger.log(ILogger::Severity::kINFO, message.data());

// Create the engine object from either the file or from onnx file
mEngine = std::unique_ptr<ICudaEngine>{createCudaEngine()};
if (!mEngine) throw std::runtime_error("Failed to create CUDA engine");
// Create the engine object from either the file or from onnx file
mEngine = std::unique_ptr<ICudaEngine>{createCudaEngine()};
if (!mEngine) throw std::runtime_error("Failed to create CUDA engine");

mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine");
mLogger.log(ILogger::Severity::kINFO, "Created CUDA Engine");

// Check some assumptions about the model
if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count");
// Check some assumptions about the model
if (mEngine->getNbIOTensors() != 2) throw std::runtime_error("Invalid Binding Count");

// Store the IO Tensor Names
mInputTensorName = mEngine->getIOTensorName(0);
mOutputTensorName = mEngine->getIOTensorName(1);

if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input");
if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1");
if (mEngine->getTensorIOMode(mInputTensorName.c_str()) != TensorIOMode::kINPUT) throw std::runtime_error("Expected Input Binding 0 Is An Input");
if (mEngine->getTensorIOMode(mOutputTensorName.c_str()) != TensorIOMode::kOUTPUT) throw std::runtime_error("Expected Output Binding Input To Be 1");

// Be verbose about the input tensor size
auto inputTensorSize = getInputTensorSize();
std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mInputTensorName.c_str());
mLogger.log(ILogger::Severity::kINFO, message.data());
mLogger.log(ILogger::Severity::kINFO, message.data());
for(size_t i = 0; i < inputTensorSize.size(); ++i){
std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]);
mLogger.log(ILogger::Severity::kINFO, message.data());
std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, inputTensorSize[i]);
mLogger.log(ILogger::Severity::kINFO, message.data());
}

// Be verbose about the input tensor size
auto outputTensorSize = getInputTensorSize();
std::snprintf(message.data(), message.size(), "%s Tensor's Dimensions:", mOutputTensorName.c_str());
mLogger.log(ILogger::Severity::kINFO, message.data());
for(size_t i = 0; i < outputTensorSize.size(); ++i){
std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]);
mLogger.log(ILogger::Severity::kINFO, message.data());
std::snprintf(message.data(), message.size(), "Dimension: %zu Size: %zu", i, outputTensorSize[i]);
mLogger.log(ILogger::Severity::kINFO, message.data());
}

createExecutionContext();
createExecutionContext();

prepTensors();
prepTensors();
}

auto Inference::createCudaEngine() -> ICudaEngine* {
mLogger.log(ILogger::Severity::kINFO, "Creating engine building tools...");
constexpr auto explicitBatch = 1U << static_cast<std::uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
constexpr auto explicitBatch = 1U << static_cast<std::uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);

IBuilder* builder = createInferBuilder(mLogger);
if (!builder) throw std::runtime_error("Failed to create Infer Builder");
mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder");
IBuilder* builder = createInferBuilder(mLogger);
if (!builder) throw std::runtime_error("Failed to create Infer Builder");
mLogger.log(ILogger::Severity::kINFO, "Created Infer Builder");

std::unique_ptr<INetworkDefinition> network{builder->createNetworkV2(explicitBatch)};
if (!network) throw std::runtime_error("Failed to create Network Definition");
mLogger.log(ILogger::Severity::kINFO, "Created Network Definition");
std::unique_ptr<INetworkDefinition> network{builder->createNetworkV2(explicitBatch)};
if (!network) throw std::runtime_error("Failed to create Network Definition");
mLogger.log(ILogger::Severity::kINFO, "Created Network Definition");

std::unique_ptr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, mLogger)};
if (!parser) throw std::runtime_error("Failed to create ONNX Parser");
mLogger.log(ILogger::Severity::kINFO, "Created ONNX Parser");
std::unique_ptr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, mLogger)};
if (!parser) throw std::runtime_error("Failed to create ONNX Parser");
mLogger.log(ILogger::Severity::kINFO, "Created ONNX Parser");

std::unique_ptr<IBuilderConfig> config{builder->createBuilderConfig()};
if (!config) throw std::runtime_error("Failed to create Builder Config");
mLogger.log(ILogger::Severity::kINFO, "Created Builder Config");
std::unique_ptr<IBuilderConfig> config{builder->createBuilderConfig()};
if (!config) throw std::runtime_error("Failed to create Builder Config");
mLogger.log(ILogger::Severity::kINFO, "Created Builder Config");

if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast<int>(ILogger::Severity::kINFO))) {
throw std::runtime_error("Failed to parse ONNX file");
}
if (!parser->parseFromFile(mONNXModelPath.c_str(), static_cast<int>(ILogger::Severity::kINFO))) {
throw std::runtime_error("Failed to parse ONNX file");
}

IRuntime* runtime = createInferRuntime(mLogger);
IRuntime* runtime = createInferRuntime(mLogger);

// Define the engine file location relative to the mrover package
// Check if engine file exists
if (!exists(mEngineModelPath)) {
// Define the engine file location relative to the mrover package
// Check if engine file exists
if (!exists(mEngineModelPath)) {
std::cout << "Optimizing ONXX model for TensorRT. This make take a long time..." << std::endl;

// Create the Engine from onnx file
IHostMemory* serializedEngine = builder->buildSerializedNetwork(*network, *config);
if (!serializedEngine) throw std::runtime_error("Failed to serialize engine");
// Create the Engine from onnx file
IHostMemory* serializedEngine = builder->buildSerializedNetwork(*network, *config);
if (!serializedEngine) throw std::runtime_error("Failed to serialize engine");

// Create temporary engine for serializing
ICudaEngine* tempEng = runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size());
if (!tempEng) throw std::runtime_error("Failed to create temporary engine");
// Create temporary engine for serializing
ICudaEngine* tempEng = runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size());
if (!tempEng) throw std::runtime_error("Failed to create temporary engine");

// Save Engine to File
auto trtModelStream = tempEng->serialize();
std::ofstream outputFileStream{mEngineModelPath, std::ios::binary};
outputFileStream.write(static_cast<char const*>(trtModelStream->data()), static_cast<int32_t>(trtModelStream->size()));
outputFileStream.close();
// Save Engine to File
auto trtModelStream = tempEng->serialize();
std::ofstream outputFileStream{mEngineModelPath, std::ios::binary};
outputFileStream.write(static_cast<char const*>(trtModelStream->data()), static_cast<int32_t>(trtModelStream->size()));
outputFileStream.close();

return tempEng;
}
return tempEng;
}

// Load engine from file
std::ifstream inputFileStream{mEngineModelPath, std::ios::binary};
std::stringstream engineBuffer;
// Load engine from file
std::ifstream inputFileStream{mEngineModelPath, std::ios::binary};
std::stringstream engineBuffer;

// Stream in the engine file to the buffer
engineBuffer << inputFileStream.rdbuf();
std::string enginePlan = engineBuffer.str();
// Deserialize the Cuda Engine file from the buffer
return runtime->deserializeCudaEngine(enginePlan.data(), enginePlan.size());
// Stream in the engine file to the buffer
engineBuffer << inputFileStream.rdbuf();
std::string enginePlan = engineBuffer.str();
// Deserialize the Cuda Engine file from the buffer
return runtime->deserializeCudaEngine(enginePlan.data(), enginePlan.size());
}

auto Inference::createExecutionContext() -> void {
// Create Execution Context
mContext.reset(mEngine->createExecutionContext());
if (!mContext) throw std::runtime_error("Failed to create execution context");
// Create Execution Context
mContext.reset(mEngine->createExecutionContext());
if (!mContext) throw std::runtime_error("Failed to create execution context");

// Set up the input tensor sizing
mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str()));
// Set up the input tensor sizing
mContext->setInputShape(mInputTensorName.c_str(), mEngine->getTensorShape(mInputTensorName.c_str()));
}

auto Inference::doDetections(cv::Mat const& img) const -> void {
// Do the forward pass on the network
launchInference(img, mOutputTensor);
// Do the forward pass on the network
launchInference(img, mOutputTensor);
}

auto Inference::getOutputTensor() -> cv::Mat {
return mOutputTensor;
return mOutputTensor;
}

auto Inference::launchInference(cv::Mat const& input, cv::Mat const& output) const -> void {
//Assert these items have been initialized
assert(!input.empty());
assert(!output.empty());
assert(input.isContinuous());
assert(output.isContinuous());
assert(mContext);
//Assert these items have been initialized
assert(!input.empty());
assert(!output.empty());
assert(input.isContinuous());
assert(output.isContinuous());
assert(mContext);

// Get the binding id for the input tensor
int inputId = getBindingInputIndex(mContext.get());
// Get the binding id for the input tensor
int inputId = getBindingInputIndex(mContext.get());

// Memcpy the input tensor from the host to the gpu
cudaMemcpy(mBindings[inputId], input.data, input.total() * input.elemSize(), cudaMemcpyHostToDevice);
// Memcpy the input tensor from the host to the gpu
cudaMemcpy(mBindings[inputId], input.data, input.total() * input.elemSize(), cudaMemcpyHostToDevice);

// Execute the model on the gpu
mContext->executeV2(mBindings.data());
// Execute the model on the gpu
mContext->executeV2(mBindings.data());

// Memcpy the output tensor from the gpu to the host
cudaMemcpy(output.data, mBindings[1 - inputId], output.total() * output.elemSize(), cudaMemcpyDeviceToHost);
// Memcpy the output tensor from the gpu to the host
cudaMemcpy(output.data, mBindings[1 - inputId], output.total() * output.elemSize(), cudaMemcpyDeviceToHost);
}

auto Inference::prepTensors() -> void {
// Assign the properties to the input and output tensors
for (int i = 0; i < mEngine->getNbIOTensors(); i++) {
char const* tensorName = mEngine->getIOTensorName(i);
auto [rank, extents] = mEngine->getTensorShape(tensorName);

// Multiply sizeof(float) by the product of the extents
// This is essentially: element count * size of each element
std::size_t size = 1;
// Assign the properties to the input and output tensors
for (int i = 0; i < mEngine->getNbIOTensors(); i++) {
char const* tensorName = mEngine->getIOTensorName(i);
auto [rank, extents] = mEngine->getTensorShape(tensorName);

// Multiply sizeof(float) by the product of the extents
// This is essentially: element count * size of each element
std::size_t size = 1;
for(int32_t i = 0; i < rank; ++i){
size *= extents[i];
}

// Create GPU memory for TensorRT to operate on
if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess)
throw std::runtime_error{"Failed to allocate GPU memory: " + std::string{cudaGetErrorString(result)}};
}
if (cudaError_t result = cudaMalloc(mBindings.data() + i, size * sizeof(float)); result != cudaSuccess)
throw std::runtime_error{"Failed to allocate GPU memory: " + std::string{cudaGetErrorString(result)}};
}

assert(mContext);
// Create an appropriately sized output tensor
assert(mContext);
// Create an appropriately sized output tensor
// TODO: Fix this
auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str());
for (int i = 0; i < nbDims; i++) {
std::array<char, 512> message;
std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]);
mLogger.log(ILogger::Severity::kINFO, message.data());
}

// Create the mat wrapper around the output matrix for ease of use
assert(nbDims == 3);
assert(d[0] == 1);
mOutputTensor = cv::Mat::zeros(d[1], d[2], CV_32FC1);
auto const [nbDims, d] = mEngine->getTensorShape(mOutputTensorName.c_str());
for (int i = 0; i < nbDims; i++) {
std::array<char, 512> message;
std::snprintf(message.data(), message.size(), "Size %d %d", i, d[i]);
mLogger.log(ILogger::Severity::kINFO, message.data());
}

// Create the mat wrapper around the output matrix for ease of use
assert(nbDims == 3);
assert(d[0] == 1);
mOutputTensor = cv::Mat::zeros(d[1], d[2], CV_32FC1);
}

auto Inference::getBindingInputIndex(IExecutionContext const* context) -> int {
// Returns the id for the input tensor
return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise
// Returns the id for the input tensor
return context->getEngine().getTensorIOMode(context->getEngine().getIOTensorName(0)) != TensorIOMode::kINPUT; // 0 (false) if bindingIsInput(0), 1 (true) otherwise
}


Expand Down
20 changes: 10 additions & 10 deletions tensorrt/inference_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,24 @@
using namespace nvinfer1;

/**
* cudaMemcpys CPU memory in inputTensor to GPU based on bindings
* Queues that tensor to be passed through model
* cudaMemcpys the result back to CPU memory
* Requires bindings, inputTensor, stream
* Modifies stream, outputTensor
*/
* cudaMemcpys CPU memory in inputTensor to GPU based on bindings
* Queues that tensor to be passed through model
* cudaMemcpys the result back to CPU memory
* Requires bindings, inputTensor, stream
* Modifies stream, outputTensor
*/

InferenceWrapper::InferenceWrapper(std::string const& modelName, std::string const& packagePath) {
mInference = std::make_shared<Inference>(modelName, packagePath);
mInference = std::make_shared<Inference>(modelName, packagePath);
}

auto InferenceWrapper::doDetections(cv::Mat const& img) const -> void {
// Execute the forward pass on the inference object
mInference->doDetections(img);
// Execute the forward pass on the inference object
mInference->doDetections(img);
}

auto InferenceWrapper::getOutputTensor() const -> cv::Mat {
return mInference->getOutputTensor();
return mInference->getOutputTensor();
}

auto InferenceWrapper::getInputTensorSize() -> std::vector<int64_t>{
Expand Down

0 comments on commit fd1be80

Please sign in to comment.