Facebook detectron2训练faster rcnn

Facebook detectron2训练faster rcnn

detectron2是Facebook开源的一套框架,适用于很多的deep learning task,例如instance segmentation等,本文主要就如何利用detectron2训练faster rcnn,并得到模型,主要描述大体的思路

0. 前期

前期请看官方的教程,大致了解怎么回事
github
colab

1. 安装detectron2

其进行目标检测方法不像github上一些项目,是把hub给clone下来修改,而是一个框架,我们需要安装这个框架。
如何安装,请见这里
建议直接pip安装,而不是编译安装:
Facebook detectron2训练faster rcnn
安装后,还需要opencv等,这里不描述了。

2. 数据集的准备

我们需要准备一个数据集,这个数据集存放在本机的一个位置,然后我们需要用一个列表装下每张图片的信息,每张图片需要用dict方式存储,举例如下:

{
    'file_name': '/exdata3/RuoyuChen/detectron2/VOC2007/JPEGImages/Black_Footed_Albatross_0009_34.jpg', 
    'image_id': 0, 
    'height': 336, 
    'width': 500, 
    'annotations': [
        {
	        'bbox': [139, 30, 292, 294], 
	        'bbox_mode': <BoxMode.XYXY_ABS: 0>, 
	        'category_id': 0, 
	        'iscrowd': 0
        }
    ]
}

这个只是一张图片的字典可视化,具体解释如下:

file_name:图片路径
image_id:图片编号,从0开始依次加1,没有顺序要求
height:图片的高度
width:图片的宽度
annotation:放box信息,1个{}代表一个框
bbox:[xmin,ymin,xmax,ymax]
bbox_mode:固定就好,详细见官方给的例子,在colab上
category_id:类别数字,从0开始
iscrowd:目标检测默认是0就行

然后假如1个图片的json字典像上面所示,我们假设在python中变量叫record,那么总的字典datasets_dict,我们只要每次求得record后,执行datasets_dict.append(record)即可得到一个完整的字典格式。

3. 注册数据集

这里每次训练前都需要执行类似的操作:

DatasetCatalog.register("CUB_train", lambda d=d: train_dataset_dicts)
MetadataCatalog.get("CUB_train").set(thing_classes=classes)

train_dataset_dicts即对应训练的字典。

4. 训练

模仿官方的配置即可

cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("CUB_train",)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
    cfg.SOLVER.MAX_ITER = 3000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
    cfg.SOLVER.STEPS = []        # do not decay learning rate
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 200  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)

模型的yaml文件类型可在官方仓库看:https://github.com/facebookresearch/detectron2/tree/master/configs

5. 完整的伪代码,供参考

import os
import cv2
import numpy as np

from detectron2.structures import BoxMode
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

from detectron2.engine import DefaultTrainer

from tqdm import tqdm

image_dir = "/exdata3/detectron2/VOC2007/JPEGImages"
image_list_dir = "/exdata3/CUB_200_2011/images.txt"
box_dir = "/exdata3/CUB_200_2011/bounding_boxes.txt"
if_training_dir = "/exdata3/CUB_200_2011/train_test_split.txt"

def get_json():
    boxes = np.loadtxt(box_dir)
    img_path = np.loadtxt(image_list_dir,dtype=bytes).astype(str)
    if_training = np.loadtxt(if_training_dir)
    # datasets
    train_dataset_dicts = []
    test_dataset_dicts = []
    train_id = 0
    test_id = 0
    for i in tqdm(range(boxes.shape[0])):
        record = {}
        image_name = os.path.join(image_dir,img_path[i][1].split('/')[-1])
        # print(image_name)
        class_name = img_path[i][1].split('/')[0]
        class_id = int(class_name.split('.')[0])-1
        xmin = int(boxes[i][1])
        xmax = int(boxes[i][1]+boxes[i][3])
        ymin = int(boxes[i][2])
        ymax = int(boxes[i][2]+boxes[i][4])
        image = cv2.imread(image_name)
        height,width,_ = image.shape
        
        record['file_name'] = image_name
        if(if_training[i][1]==1):
            record['image_id'] = train_id
            train_id += 1
        else:
            record['image_id'] = test_id
            test_id += 1
        record['height']= height
        record['width']= width
        objs = []
        obj = {
            "bbox": [xmin,ymin,xmax,ymax],
            "bbox_mode": BoxMode.XYXY_ABS,
            #"segmentation": [poly], To draw a line, along to ballon
            #you will need this for mask RCNN
            "category_id": class_id,
            "iscrowd": 0
        }
        objs.append(obj)
        record["annotations"] = objs
        if(if_training[i][1]==1):
            train_dataset_dicts.append(record)
        else:
            test_dataset_dicts.append(record)
    return train_dataset_dicts,test_dataset_dicts

def get_classes_name():
    classes_name_dir = "/exdata3/RuoyuChen/CUB_200_2011/classes.txt"
    classes_path = np.loadtxt(classes_name_dir,dtype=bytes).astype(str)
    classes = []
    for i in tqdm(range(classes_path.shape[0])):
        classes.append(classes_path[i][1])
    return classes

def register():
    train_dataset_dicts,test_dataset_dicts = get_json()
    classes = get_classes_name()
    for d in ["train", "test"]:
        # train
        if d == "train":
            DatasetCatalog.register("CUB_" + d, lambda d=d: train_dataset_dicts)
            MetadataCatalog.get("CUB_" + d).set(thing_classes=classes)
        else:
            DatasetCatalog.register("CUB_" + d, lambda d=d: test_dataset_dicts)
            MetadataCatalog.get("CUB_" + d).set(thing_classes=classes)

def main():
    register()
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("CUB_train",)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
    cfg.SOLVER.MAX_ITER = 3000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
    cfg.SOLVER.STEPS = []        # do not decay learning rate
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 200  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
    # NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg) 
    trainer.resume_or_load(resume=False)
    trainer.train()

if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    main()

Facebook detectron2训练faster rcnn

上一篇:第十九课--伪类(Pseudo-classes)


下一篇:keras 中 model.predect() 与model.predict_classes()的区别