训练的标注数据格式如下:
[ { "name": "235_2_t20201127123021723_CAM2.jpg", "image_height": 6000, "image_width": 8192, "category": 5, "bbox": [ 1876.06, 998.04, 1883.06, 1004.04 ] }, { "name": "235_2_t20201127123021723_CAM2.jpg", "image_height": 6000, "image_width": 8192, "category": 5, "bbox": [ 1655.06, 1094.04, 1663.06, 1102.04 ] } ]
聚类anchorbox只需要 bbox 中的左上角与右下角的 x,y 数据
k-means 聚类代码:
import numpy as np import json def iou(box, clusters): """ 计算 IOU param: box: tuple or array, shifted to the origin (i. e. width and height) clusters: numpy array of shape (k, 2) where k is the number of clusters return: numpy array of shape (k, 0) where k is the number of clusters """ x = np.minimum(clusters[:, 0], box[0]) y = np.minimum(clusters[:, 1], box[1]) if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: raise ValueError("Box has no area") intersection = x * y box_area = box[0] * box[1] cluster_area = clusters[:, 0] * clusters[:, 1] iou_ = intersection / (box_area + cluster_area - intersection + 1e-10) return iou_ # 计算框的 numpy 数组和 k 个簇之间的平均并集交集(IoU)。 def avg_iou(boxes, clusters): """ param: boxes: numpy array of shape (r, 2), where r is the number of rows clusters: numpy array of shape (k, 2) where k is the number of clusters return: average IoU as a single float """ return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) # 将所有框转换为原点。 def translate_boxes(boxes): """ param: boxes: numpy array of shape (r, 4) return: numpy array of shape (r, 2) """ new_boxes = boxes.copy() for row in range(new_boxes.shape[0]): new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0]) new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1]) return np.delete(new_boxes, [0, 1], axis=1) # 使用联合上的交集(IoU)度量计算k均值聚类。 def kmeans(boxes, k, dist=np.median): """ param: boxes: numpy array of shape (r, 2), where r is the number of rows k: number of clusters dist: distance function return: numpy array of shape (k, 2) """ rows = boxes.shape[0] distances = np.empty((rows, k)) last_clusters = np.zeros((rows,)) np.random.seed() # the Forgy method will fail if the whole array contains the same rows clusters = boxes[np.random.choice(rows, k, replace=False)] # 初始化k个聚类中心(方法是从原始数据集中随机选k个) while True: for row in range(rows): # 定义的距离度量公式:d(box,centroid)=1-IOU(box,centroid)。到聚类中心的距离越小越好,但IOU值是越大越好,所以使用 1 - IOU,这样就保证距离越小,IOU值越大。 distances[row] = 1 - iou(boxes[row], clusters) # 将标注框分配给“距离”最近的聚类中心(也就是这里代码就是选出(对于每一个box)距离最小的那个聚类中心)。 nearest_clusters = np.argmin(distances, axis=1) # 直到聚类中心改变量为0(也就是聚类中心不变了)。 if (last_clusters == nearest_clusters).all(): break # 更新聚类中心(这里把每一个类的中位数作为新的聚类中心) for cluster in range(k): clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0) last_clusters = nearest_clusters return clusters # 读取 json 文件中的标注数据 def parse_anno(annotation_path): with open(annotation_path, 'r') as f: anno = json.load(f) result = [] for line in anno: bbox = line['bbox'] x_min, y_min, x_max, y_max = bbox[0], bbox[1], bbox[2], bbox[3] # 计算边框的大小 width = x_max - x_min height = y_max - y_min assert width > 0 assert height > 0 result.append([width, height]) result = np.asarray(result) return result def get_kmeans(anno, cluster_num=9): anchors = kmeans(anno, cluster_num) ave_iou = avg_iou(anno, anchors) anchors = anchors.astype('int').tolist() anchors = sorted(anchors, key=lambda x: x[0] * x[1]) return anchors, ave_iou if __name__ == '__main__': annotation_path = "tile_round1_train_20201231/train_annos.json" anno_result = parse_anno(annotation_path) anchors, ave_iou = get_kmeans(anno_result, 9) anchor_string = '' for anchor in anchors: anchor_string += '{},{}, '.format(anchor[0], anchor[1]) anchor_string = anchor_string[:-2]
print(f'anchors are: {anchor_string}')
print(f'the average iou is: {ave_iou}')
每次运行的结果都会有点不大一样
参考:https://blog.csdn.net/zuliang001/article/details/90551798