1. 使用tensorrt 对人脸68个特征点推理demo 代码
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import cv2
def get_engine(engine_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_path))
with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
TRT_LOGGER = trt.Logger()
# engine = get_engine("yolov4_1.trt")
engine = get_engine("pfld.engine")
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * 1
dims = engine.get_binding_shape(binding)
dtype = trt.nptype(engine.get_binding_dtype(binding))
print("dtype = ", dtype)
engine = get_engine("pfld.engine")
context = engine.create_execution_context()
def get_landmarks(img):
resized = cv2.resize(img, (112, 112), interpolation=cv2.INTER_LINEAR)
img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
cv2.imwrite("tmp.jpg", img_in)
img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
img_in = np.expand_dims(img_in, axis=0)
img_in /= 255.0
# img_in = np.ascontiguousarray(img_in)
print("Shape of the network input: ", img_in.shape)
# print(img_in)
# with get_engine("mobilefacenet-res2-6-10-2-dim512/onnx/face_reg_mnet.engine") as engine, engine.create_execution_context() as context:
h_input = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
h_output = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
# set the host input data
# h_input = img_in
np.copyto(h_input, img_in.ravel())
# np.copyto(h_input, img_in.unsqueeze_(0))
# print(h_input)
# Transfer input data to the GPU.
cuda.memcpy_htod_async(d_input, h_input, stream)
# Run inference.
context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# Transfer predictions back from the GPU.
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# Synchronize the stream
# Return the host output.
# print(h_output)
return h_output
img1 = cv2.imread("./s_28.jpg")
output = get_landmarks(img1).reshape(-1, 2)
for xy in output:
x = xy[0] * 112
y = xy[1] * 112
cv2.circle(img1, (int(x), int(y)), 2, (0,255,0), -1)
cv2.imwrite("out.jpg", img1)