Facebook detectron2训练faster rcnn
detectron2是Facebook开源的一套框架,适用于很多的deep learning task,例如instance segmentation等,本文主要就如何利用detectron2训练faster rcnn,并得到模型,主要描述大体的思路
0. 前期
1. 安装detectron2
其进行目标检测方法不像github上一些项目,是把hub给clone下来修改,而是一个框架,我们需要安装这个框架。
如何安装,请见这里。
建议直接pip安装,而不是编译安装:
安装后,还需要opencv等,这里不描述了。
2. 数据集的准备
我们需要准备一个数据集,这个数据集存放在本机的一个位置,然后我们需要用一个列表装下每张图片的信息,每张图片需要用dict方式存储,举例如下:
{
'file_name': '/exdata3/RuoyuChen/detectron2/VOC2007/JPEGImages/Black_Footed_Albatross_0009_34.jpg',
'image_id': 0,
'height': 336,
'width': 500,
'annotations': [
{
'bbox': [139, 30, 292, 294],
'bbox_mode': <BoxMode.XYXY_ABS: 0>,
'category_id': 0,
'iscrowd': 0
}
]
}
这个只是一张图片的字典可视化,具体解释如下:
file_name:图片路径
image_id:图片编号,从0开始依次加1,没有顺序要求
height:图片的高度
width:图片的宽度
annotation:放box信息,1个{}代表一个框
bbox:[xmin,ymin,xmax,ymax]
bbox_mode:固定就好,详细见官方给的例子,在colab上
category_id:类别数字,从0开始
iscrowd:目标检测默认是0就行
然后假如1个图片的json字典像上面所示,我们假设在python中变量叫record
,那么总的字典datasets_dict
,我们只要每次求得record
后,执行datasets_dict.append(record)
即可得到一个完整的字典格式。
3. 注册数据集
这里每次训练前都需要执行类似的操作:
DatasetCatalog.register("CUB_train", lambda d=d: train_dataset_dicts)
MetadataCatalog.get("CUB_train").set(thing_classes=classes)
train_dataset_dicts
即对应训练的字典。
4. 训练
模仿官方的配置即可
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("CUB_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 3000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 200 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
模型的yaml文件类型可在官方仓库看:https://github.com/facebookresearch/detectron2/tree/master/configs
5. 完整的伪代码,供参考
import os
import cv2
import numpy as np
from detectron2.structures import BoxMode
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer
from tqdm import tqdm
image_dir = "/exdata3/detectron2/VOC2007/JPEGImages"
image_list_dir = "/exdata3/CUB_200_2011/images.txt"
box_dir = "/exdata3/CUB_200_2011/bounding_boxes.txt"
if_training_dir = "/exdata3/CUB_200_2011/train_test_split.txt"
def get_json():
boxes = np.loadtxt(box_dir)
img_path = np.loadtxt(image_list_dir,dtype=bytes).astype(str)
if_training = np.loadtxt(if_training_dir)
# datasets
train_dataset_dicts = []
test_dataset_dicts = []
train_id = 0
test_id = 0
for i in tqdm(range(boxes.shape[0])):
record = {}
image_name = os.path.join(image_dir,img_path[i][1].split('/')[-1])
# print(image_name)
class_name = img_path[i][1].split('/')[0]
class_id = int(class_name.split('.')[0])-1
xmin = int(boxes[i][1])
xmax = int(boxes[i][1]+boxes[i][3])
ymin = int(boxes[i][2])
ymax = int(boxes[i][2]+boxes[i][4])
image = cv2.imread(image_name)
height,width,_ = image.shape
record['file_name'] = image_name
if(if_training[i][1]==1):
record['image_id'] = train_id
train_id += 1
else:
record['image_id'] = test_id
test_id += 1
record['height']= height
record['width']= width
objs = []
obj = {
"bbox": [xmin,ymin,xmax,ymax],
"bbox_mode": BoxMode.XYXY_ABS,
#"segmentation": [poly], To draw a line, along to ballon
#you will need this for mask RCNN
"category_id": class_id,
"iscrowd": 0
}
objs.append(obj)
record["annotations"] = objs
if(if_training[i][1]==1):
train_dataset_dicts.append(record)
else:
test_dataset_dicts.append(record)
return train_dataset_dicts,test_dataset_dicts
def get_classes_name():
classes_name_dir = "/exdata3/RuoyuChen/CUB_200_2011/classes.txt"
classes_path = np.loadtxt(classes_name_dir,dtype=bytes).astype(str)
classes = []
for i in tqdm(range(classes_path.shape[0])):
classes.append(classes_path[i][1])
return classes
def register():
train_dataset_dicts,test_dataset_dicts = get_json()
classes = get_classes_name()
for d in ["train", "test"]:
# train
if d == "train":
DatasetCatalog.register("CUB_" + d, lambda d=d: train_dataset_dicts)
MetadataCatalog.get("CUB_" + d).set(thing_classes=classes)
else:
DatasetCatalog.register("CUB_" + d, lambda d=d: test_dataset_dicts)
MetadataCatalog.get("CUB_" + d).set(thing_classes=classes)
def main():
register()
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("CUB_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 3000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 200 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
if __name__ == '__main__':
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
main()