注意:
本文代码建议把图片和标注文件分开放
图片存在 images 文件夹下(这里用不到)
标注文件存在 annotations 文件夹下
这里还用到 tqdm 库来显示进度条,不需要可以注释掉
import xml.etree.ElementTree as ET #导入xml模块
import pickle
import os
import glob
from os import listdir, getcwd
from os.path import join
from tqdm import tqdm#缺包的需要安装
def class_num(_dir,class_name,dataset,nums):
result = {}
for clss in class_name:
result[clss]=0 #各个类别的标签数量
#print(result)
result["other"]=0#其他标签的数量
result["sum"]=0#总的标签数量
#total参数设置进度条的总长度
pbar = tqdm(total=nums,desc="%s-porcess"%dataset,unit="xml")
for xmll in glob.glob(_dir+"*.xml"):
#print(xml)
#time.sleep(0.05)
pbar.update(1)#每次更新进度条的长度
with open(xmll,"r",encoding="utf-8") as f:
xml = ET.parse(f)
# root = xml.getroot()
# print(root.findall("object"))
for obj in xml.iter('object'):
result["sum"] = result["sum"]+1
if obj.find("name").text not in class_name:
result["other"] = result["other"]+1
for clsn in class_name:
if obj.find("name").text == clsn: #按标注的标签名进行统计
result[clsn] = result[clsn]+1
pbar.close()#关闭占用资源
return result
if __name__ == '__main__':
train_dir="E:/DL/detectron2/SwinT_detectron2/datasets/new/train/annotations/"
test_dir="E:/DL/detectron2/SwinT_detectron2/datasets/test/annotations/"
class_name = ["0","1","2"]#标注时的各类别的名称
train_num = len(os.listdir(train_dir))#计算标签文件的数量
test_num = len(os.listdir(test_dir))
print(train_num,test_num)
results1 = class_num(train_dir,class_name,"train",train_num)
results2 = class_num(test_dir,class_name,"test",test_num)
print("\n\n训练集: ",results1)
print("\n测试集: ",results2)