diff --git a/prj-tensorrt/centerface.py b/prj-tensorrt/centerface.py index 9ed6006..f2633b3 100644 --- a/prj-tensorrt/centerface.py +++ b/prj-tensorrt/centerface.py @@ -14,6 +14,9 @@ def __init__(self, landmarks=True): runtime = trt.Runtime(self.trt_logger) self.net = runtime.deserialize_cuda_engine(f.read()) self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0 + self.engine = self.net + # Create the context for this engine + self.context = self.engine.create_execution_context() def __call__(self, img, height, width, threshold=0.5): h, w = img.shape[:2] @@ -67,12 +70,8 @@ def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): image_cv = cv2.resize(img, dsize=(self.img_w_new, self.img_h_new)) blob = np.expand_dims(image_cv[:, :, (2, 1, 0)].transpose(2, 0, 1), axis=0).astype("float32") - engine = self.net - - # Create the context for this engine - context = engine.create_execution_context() - # Allocate buffers for input and output - inputs, outputs, bindings, stream = allocate_buffers(engine) # input, output: host # bindings + + inputs, outputs, bindings, stream = allocate_buffers(self.engine) # input, output: host # bindings # Do inference shape_of_output = [(1, 1, int(self.img_h_new / 4), int(self.img_w_new / 4)), @@ -82,7 +81,7 @@ def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): # Load data to the buffer inputs[0].host = blob.reshape(-1) begin = datetime.datetime.now() - trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data + trt_outputs = do_inference(self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data end = datetime.datetime.now() print("gpu times = ", end - begin)