Skip to content

Commit

Permalink
Add zero copy optimization on inputs for the CPU device
Browse files Browse the repository at this point in the history
  • Loading branch information
sspintel committed Aug 24, 2023
1 parent fb51fae commit 8175bb0
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,23 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
auto tensor_shape = tensor_info.GetShape();
auto tensor_size = tensor_shape.size();
const char* tensor_data = tensor.GetTensorData<char>();
auto tensor_iter = 0;
ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
input_tensor_shape[tensor_iter] = *i;
tensor_iter += 1;
}
auto input = ie_cnn_network_->get_parameters().at(input_idx);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
OVTensorPtr tensor_ptr;
// avoid input copies on the CPU device
if (global_context_.device_type.find("CPU") != std::string::npos) {
tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape, (void*)tensor_data);
} else {
tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
}

try {
infer_request->SetTensor(input_name, tensor_ptr);
} catch (const char* msg) {
Expand Down

0 comments on commit 8175bb0

Please sign in to comment.