Add zero copy optimization on inputs for the CPU device

intel · Aug 24, 2023 · 8175bb0 · 8175bb0
1 parent fb51fae
commit 8175bb0
Showing 1 changed file with 10 additions and 2 deletions.
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -197,15 +197,23 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
         auto tensor_shape = tensor_info.GetShape();
         auto tensor_size = tensor_shape.size();
+        const char* tensor_data = tensor.GetTensorData<char>();
         auto tensor_iter = 0;
         ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
         for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
           input_tensor_shape[tensor_iter] = *i;
           tensor_iter += 1;
         }
         auto input = ie_cnn_network_->get_parameters().at(input_idx);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
-        FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        OVTensorPtr tensor_ptr;
+        // avoid input copies on the CPU device
+        if (global_context_.device_type.find("CPU") != std::string::npos) {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape, (void*)tensor_data);
+        } else {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
+          FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        }
+
         try {
           infer_request->SetTensor(input_name, tensor_ptr);
         } catch (const char* msg) {