1 #!/usr/bin/env python 2 # -*- encoding: utf-8 -*- 3 """ 4 @Author : {FirstElfin} 5 @License : (C) Copyright 2013-2020, {DHWL} 6 @Contact : {2968793701@qq.com} 7 @Software: PyCharm 8 @File : test.py 9 @Time : 11/22/19 11:55 AM 10 """ 11 import os 12 import xml.etree.ElementTree as ET 13 import shutil 14 15 ann_filepath = './VOC2007_22/Annotations/' 16 img_filepath = './VOC2007_22/JPEGImages/' 17 img_savepath = './VOC2007/JPEGImages/' 18 ann_savepath = './VOC2007/Annotations/' 19 if not os.path.exists(img_savepath): 20 os.mkdir(img_savepath) 21 22 if not os.path.exists(ann_savepath): 23 os.mkdir(ann_savepath) 24 25 classes = ['bicycle', 'bus', 'car', 'motorbike', 'train'] 26 27 28 def save_annotation(file): 29 30 tree = ET.parse(ann_filepath + '/' + file) 31 root = tree.getroot() 32 result = root.findall("object") 33 bool_num = 0 34 for obj in result: 35 if obj.find("name").text not in classes: 36 root.remove(obj) 37 else: 38 bool_num = 1 39 if bool_num: 40 tree.write(ann_savepath + file) 41 return True 42 else: 43 return False 44 45 46 def save_images(file): 47 name_img = img_filepath + os.path.splitext(file)[0] + ".jpg" 48 shutil.copy(name_img, img_savepath) 49 return True 50 51 52 if __name__ == '__main__': 53 for f in os.listdir(ann_filepath): 54 if save_annotation(f): 55 save_images(f)
./VOC2007_22是备份的数据集(原数据集),./VOC2007是我们自己要生成的数据集(制作自己的数据)
通过classes可以选择你需要的类别,原始类别为:
1 classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 2 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 3 'dog', 'horse', 'motorbike', 'pottedplant', 4 'sheep', 'sofa', 'train', 'tvmonitor', 'person']