统计标注的各类目标数量(xml标注格式)

注意:

本文代码建议把图片和标注文件分开放

图片存在 images 文件夹下(这里用不到
标注文件存在 annotations 文件夹下

这里还用到 tqdm 库来显示进度条,不需要可以注释掉

import xml.etree.ElementTree as ET #导入xml模块
import pickle
import os
import glob
from os import listdir, getcwd
from os.path import join
from tqdm import tqdm#缺包的需要安装


def class_num(_dir,class_name,dataset,nums):
    result = {}
    for clss in class_name:
        result[clss]=0 #各个类别的标签数量
    #print(result)
    result["other"]=0#其他标签的数量
    result["sum"]=0#总的标签数量
    
    #total参数设置进度条的总长度
    pbar = tqdm(total=nums,desc="%s-porcess"%dataset,unit="xml")

    for xmll in glob.glob(_dir+"*.xml"):
        #print(xml)
        #time.sleep(0.05)
        pbar.update(1)#每次更新进度条的长度
        with open(xmll,"r",encoding="utf-8") as f:
            xml = ET.parse(f)
            # root = xml.getroot()
            # print(root.findall("object"))
            for obj in xml.iter('object'):
                result["sum"] = result["sum"]+1
                if obj.find("name").text not in class_name:
                    result["other"] = result["other"]+1
                for clsn in class_name:
                    if obj.find("name").text == clsn: #按标注的标签名进行统计           
                        result[clsn] = result[clsn]+1
    pbar.close()#关闭占用资源
    return result

if __name__ == '__main__':
    
    train_dir="E:/DL/detectron2/SwinT_detectron2/datasets/new/train/annotations/"
    test_dir="E:/DL/detectron2/SwinT_detectron2/datasets/test/annotations/"
      
    class_name = ["0","1","2"]#标注时的各类别的名称
    
    train_num = len(os.listdir(train_dir))#计算标签文件的数量
    test_num = len(os.listdir(test_dir))
    print(train_num,test_num)
    
    results1 = class_num(train_dir,class_name,"train",train_num)   
    results2 = class_num(test_dir,class_name,"test",test_num)
                   
    print("\n\n训练集: ",results1)
    print("\n测试集: ",results2)



上一篇:雷霆战机9.5全新上线,Python+Pygame开发飞机大战完整游戏项目(附源码)


下一篇:方块