14、TX2上运行YOLO5

一、下载yolov5、tensorrtx,yolov5s.pt

预备基础环境:
python3
torch1.7或者更高

1、生成权重文件(可以在pc上或者是在TX2上生成都可以)

1、在TX2的终端直接输入下面的命令

git clone https://github.com/wang-xinyu/tensorrtx.git
git clone https://github.com/ultralytics/yolov5.git

2、下载yolov5s.pt

https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt

3、文件搬移

cp tensorrtx/yolov5/gen_wts.py yolov5


将yolov5s.pt(预训练模型)放到yolov5的weights文件夹下,当然也可以是其他位置,具体调整可以参考下面的python脚本
14、TX2上运行YOLO5
4、生成权重文件

python gen_wts.py

14、TX2上运行YOLO5
执行这个脚本的时候可能会出现如下问题,大概如下(可能不全,但是方法是一样的)
①其实是需要一些依赖的环境和包,14、TX2上运行YOLO5
这是环境中缺少了tqdm进度条的安装包,需要使用conda或者pip命令进行安装

pip install tqdm

缺少其他的包就按照同样的方法即可。

②python库中urllib3 (1.22) or chardet (2.2.1) 的版本不兼容14、TX2上运行YOLO5
解决方法如下:

pip uninstall urllib3
pip uninstall chardet
pip install requests

2、生成部署引擎(必须在部署的硬件平台上编译,此处在TX2上)

将yolov5s.wts文件放到tensorrtx/yolov5文件夹中

mv yolov5s.wts  ~/tensorrtx/yolov5

在yolov5.cpp文件中还可以修改fp16还是fp32(nano不支持int8)、device(选择哪一个GPU设备)、nms_thresh(nms的阈值)、conf_thresh(conf的置信度)、batch_size(批次大小),此处可以使用默认的参数。
14、TX2上运行YOLO5

打开yololayer.h文件,修改他的num总数,根据你训练模型的类个数来(如果是你自己针对特定的数据集训练的模型的话)。
14、TX2上运行YOLO5

除此之外,我们还可以修改输入图片的尺寸,但必须是32的倍数。缩小输入尺寸可以一定程度上加快推理速度。
14、TX2上运行YOLO5

然后反序列化引擎对图像做处理

sudo ./yolov5 -d yolov5s.engine ../samples

14、TX2上运行YOLO5
14、TX2上运行YOLO5

二、视频检测

代码如下:

import time
import cv2
import pycuda.autoinit  # This is needed for initializing CUDA driver
import numpy as np
import ctypes
import tensorrt as trt
import pycuda.driver as cuda

import threading
import random

INPUT_W = 608
INPUT_H = 608
CONF_THRESH = 0.2
IOU_THRESHOLD = 0.4

categories = ['vehicle','bicyle','pedestrain','road_sign']


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    """
    description: Plots one bounding box on image img,
                 this function comes from YoLov5 project.
    param:
        x:      a box likes [x1,y1,x2,y2]
        img:    a opencv image object
        color:  color to draw rectangle, such as (0,255,0)
        label:  str
        line_thickness: int
    return:
        no return

    """
    tl = (
            line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
    )  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(
            img,
            label,
            (c1[0], c1[1] - 2),
            0,
            tl / 3,
            [225, 255, 255],
            thickness=tf,
            lineType=cv2.LINE_AA,
        )


def draw_boxes(image_raw, result_boxes, result_scores, result_classid):
    for i in range(len(result_boxes)):
        box = result_boxes[i]
        plot_one_box(
            box,
            image_raw,
            label="{}:{:.2f}".format(
                categories[int(result_classid[i])], result_scores[i]
            ),
        )
    return image_raw
class YoLov5TRT(object):
    """
    description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
    """

    def __init__(self, engine_file_path):
        # Create a Context on this device,
        self.cfx = cuda.Device(0).make_context()
        stream = cuda.Stream()
        TRT_LOGGER = trt.Logger(trt.Logger.INFO)
        runtime = trt.Runtime(TRT_LOGGER)

        # Deserialize the engine from file
        with open(engine_file_path, "rb") as f:
            engine = runtime.deserialize_cuda_engine(f.read())
        context = engine.create_execution_context()

        host_inputs = []
        cuda_inputs = []
        host_outputs = []
        cuda_outputs = []
        bindings = []

        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(cuda_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                host_inputs.append(host_mem)
                cuda_inputs.append(cuda_mem)
            else:
                host_outputs.append(host_mem)
                cuda_outputs.append(cuda_mem)

        # Store
        self.stream = stream
        self.context = context
        self.engine = engine
        self.host_inputs = host_inputs
        self.cuda_inputs = cuda_inputs
        self.host_outputs = host_outputs
        self.cuda_outputs = cuda_outputs
        self.bindings = bindings

    # 释放引擎,释放GPU显存,释放CUDA流
    def __del__(self):
        print("delete object to release memory")

    def infer(self, image_raw):
        threading.Thread.__init__(self)
        # Make self the active context, pushing it on top of the context stack.
        self.cfx.push()
        # Restore
        stream = self.stream
        context = self.context
        engine = self.engine
        host_inputs = self.host_inputs
        cuda_inputs = self.cuda_inputs
        host_outputs = self.host_outputs
        cuda_outputs = self.cuda_outputs
        bindings = self.bindings
        # Do image preprocess
        input_image, image_raw, origin_h, origin_w = self.preprocess_image(
            image_raw
        )
        # Copy input image to host buffer
        np.copyto(host_inputs[0], input_image.ravel())
        # Transfer input data  to the GPU.
        cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
        # Run inference.
        context.execute_async(bindings=bindings, stream_handle=stream.handle)
        # Transfer predictions back from the GPU.
        cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
        # Synchronize the stream
        stream.synchronize()
        # Remove any context from the top of the context stack, deactivating it.
        self.cfx.pop()
        # Here we use the first row of output in that batch_size = 1
        output = host_outputs[0]
        # Do postprocess
        result_boxes, result_scores, result_classid = self.post_process(
            output, origin_h, origin_w
        )

        return image_raw, result_boxes, result_scores, result_classid

    def destroy(self):
        # Remove any context from the top of the context stack, deactivating it.
        self.cfx.pop()

    def preprocess_image(self, image_raw):
        """
        description: Read an image from image path, convert it to RGB,
                     resize and pad it to target size, normalize to [0,1],
                     transform to NCHW format.
        param:
            input_image_path: str, image path
        return:
            image:  the processed image
            image_raw: the original image
            h: original height
            w: original width
        """
        h, w, c = image_raw.shape
        image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
        # Calculate widht and height and paddings
        r_w = INPUT_W / w
        r_h = INPUT_H / h
        if r_h > r_w:
            tw = INPUT_W
            th = int(r_w * h)
            tx1 = tx2 = 0
            ty1 = int((INPUT_H - th) / 2)
            ty2 = INPUT_H - th - ty1
        else:
            tw = int(r_h * w)
            th = INPUT_H
            tx1 = int((INPUT_W - tw) / 2)
            tx2 = INPUT_W - tw - tx1
            ty1 = ty2 = 0
        # Resize the image with long side while maintaining ratio
        image = cv2.resize(image, (tw, th))
        # Pad the short side with (128,128,128)
        image = cv2.copyMakeBorder(
            image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
        )
        image = image.astype(np.float32)
        # Normalize to [0,1]
        image /= 255.0
        # HWC to CHW format:
        image = np.transpose(image, [2, 0, 1])
        # CHW to NCHW format
        image = np.expand_dims(image, axis=0)
        # Convert the image to row-major order, also known as "C order":
        image = np.ascontiguousarray(image)
        return image, image_raw, h, w

    def xywh2xyxy(self, origin_h, origin_w, x):
        """
        description:    Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
        param:
            origin_h:   height of original image
            origin_w:   width of original image
            x:          A boxes tensor, each row is a box [center_x, center_y, w, h]
        return:
            y:          A boxes tensor, each row is a box [x1, y1, x2, y2]
        """
        # y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
        y = np.zeros_like(x)
        r_w = INPUT_W / origin_w
        r_h = INPUT_H / origin_h
        if r_h > r_w:
            y[:, 0] = x[:, 0] - x[:, 2] / 2
            y[:, 2] = x[:, 0] + x[:, 2] / 2
            y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
            y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
            y /= r_w
        else:
            y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
            y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
            y[:, 1] = x[:, 1] - x[:, 3] / 2
            y[:, 3] = x[:, 1] + x[:, 3] / 2
            y /= r_h

        return y

    def nms(self, boxes, scores, iou_threshold=IOU_THRESHOLD):
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        areas = (y2 - y1 + 1) * (x2 - x1 + 1)
        scores = scores
        keep = []
        index = scores.argsort()[::-1]
        while index.size > 0:
            i = index[0]  # every time the first is the biggst, and add it directly
            keep.append(i)

            x11 = np.maximum(x1[i], x1[index[1:]])  # calculate the points of overlap
            y11 = np.maximum(y1[i], y1[index[1:]])
            x22 = np.minimum(x2[i], x2[index[1:]])
            y22 = np.minimum(y2[i], y2[index[1:]])

            w = np.maximum(0, x22 - x11 + 1)  # the weights of overlap
            h = np.maximum(0, y22 - y11 + 1)  # the height of overlap

            overlaps = w * h
            ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)

            idx = np.where(ious <= iou_threshold)[0]
            index = index[idx + 1]  # because index start from 1

        return keep

    def post_process(self, output, origin_h, origin_w):
        """
        description: postprocess the prediction
        param:
            output:     A tensor likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
            origin_h:   height of original image
            origin_w:   width of original image
        return:
            result_boxes: finally boxes, a boxes tensor, each row is a box [x1, y1, x2, y2]
            result_scores: finally scores, a tensor, each element is the score correspoing to box
            result_classid: finally classid, a tensor, each element is the classid correspoing to box
        """
        # Get the num of boxes detected
        num = int(output[0])
        # Reshape to a two dimentional ndarray
        pred = np.reshape(output[1:], (-1, 6))[:num, :]
        # to a torch Tensor
        # pred = torch.Tensor(pred).cuda()
        # Get the boxes
        boxes = pred[:, :4]
        # Get the scores
        scores = pred[:, 4]
        # Get the classid
        classid = pred[:, 5]
        # Choose those boxes that score > CONF_THRESH
        si = scores > CONF_THRESH
        boxes = boxes[si, :]
        scores = scores[si]
        classid = classid[si]
        # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
        boxes = self.xywh2xyxy(origin_h, origin_w, boxes)
        # Do nms
        # indices = torchvision.ops.nms(boxes, scores, iou_threshold=IOU_THRESHOLD).cpu()
        # result_boxes = boxes[indices, :].cpu()
        # result_scores = scores[indices].cpu()
        # result_classid = classid[indices].cpu()
        # return result_boxes, result_scores, result_classid
        indices = self.nms(boxes, scores, IOU_THRESHOLD)
        result_boxes = boxes[indices, :]
        result_scores = scores[indices]
        result_classid = classid[indices]
        return result_boxes, result_scores, result_classid


def detect_one(img, yolov5_wrapper):
    full_scrn = False
    tic = time.clock()
    ##开始检测,并将结果写到result.jpg中
    img, result_boxes, result_scores, result_classid = yolov5_wrapper.infer(img)
    toc = time.clock()
    curr_fps = (toc - tic)
    print("boxes: "+str(result_boxes))
    print("clss: "+str(result_classid))
    print("confs: "+str(result_scores))
    img = draw_boxes(img, result_boxes, result_scores, result_classid)
    cv2.imwrite("result.jpg",img)
    print("time: "+str(curr_fps)+"(sec)")

def main_one():
    filename = "1.jpg"  
    img = cv2.imread(filename)
    # load custom plugins
    #PLUGIN_LIBRARY = "yolov5s/libmyplugins.so"
    PLUGIN_LIBRARY = "/home/nvidia/tensorrtx/yolov5/build/libmyplugins.so"
    ctypes.CDLL(PLUGIN_LIBRARY)
    engine_file_path = "/home/nvidia/tensorrtx/yolov5/build/yolov5s.engine"

    # a  YoLov5TRT instance
    yolov5_wrapper = YoLov5TRT(engine_file_path)
    print("start detection!")
    detect_one(img, yolov5_wrapper)
    cv2.destroyAllWindows()
    print("finish!")
from IPython.display import Image
main_one()
Image("result.jpg")

def detect_video(video, yolov5_wrapper):
    full_scrn = False
    fps = 0.0
    tic = time.time()
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)
    #print(str(frame_width)+str(frame_height))
    ##定义输入编码
    fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
    videoWriter = cv2.VideoWriter('result.AVI', fourcc, fps, (frame_width,frame_height))
    ##开始循环检测,并将结果写到result.mp4中
    while True:
        ret,img = video.read()
        if img is not None:
            img, result_boxes, result_scores, result_classid = yolov5_wrapper.infer(img)
            img = draw_boxes(img, result_boxes, result_scores, result_classid)
            videoWriter.write(img)
            toc = time.time()
            curr_fps = 1.0 / (toc - tic)
            fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
            tic = toc
            print("\rfps: "+str(fps),end="")
        else:
            break
def main_loop():
    filename = "video2.mp4"
    video = cv2.VideoCapture(filename)
    # load custom plugins
    PLUGIN_LIBRARY = "yolov5x/libmyplugins.so"
    ctypes.CDLL(PLUGIN_LIBRARY)
    engine_file_path = "yolov5x/yolov5x.engine"

    # a  YoLov5TRT instance
    yolov5_wrapper = YoLov5TRT(engine_file_path)
    print("start detection!")
    detect_video(video, yolov5_wrapper)
    video.release()
    cv2.destroyAllWindows()
    print("\nfinish!")
main_loop()

from IPython.display import Video
Video("result-ffmpeg4.mp4")

报错:
①少了cv2的模块
14、TX2上运行YOLO5
事实上这里是启动了默认的conda环境,在这个环境里面没有cv2,所以应该关闭这个环境。
②找不到文件
14、TX2上运行YOLO5
这里刚开始是以为不能在/usr/lib找到库文件的原因,所以下面采用软连接的方式。

sudo find / -iname "找不到的库文件名"
ln -s ~/tensorrtx/yolov5/build /usr/lib
sudo ldconfig

但是并没有解决,于是循着报错信息进一步排错。
打开yolov5_trt.py文件,发现文件路径写错了。
14、TX2上运行YOLO5
但是修改过后又出现了新的问题。
14、TX2上运行YOLO5
代更--------

上一篇:TX2 调试 思岚激光雷达 A3


下一篇:使用Doxygen生成html/chm范例,方便源码阅读