首先,通过darknet53骨干网络得到大中小三种比例的特征图,图片来自https://zhuanlan.zhihu.com/p/50595699
self.conv_lbbox, self.conv_mbbox, self.conv_sbbox = self.__build_nework(input_data)
以小anchor为例,即下采样3次,feature map大小变为原来的1/8,FPN部分参考https://blog.csdn.net/ysh1026/article/details/113780273
self.pred_sbbox = self.decode(self.conv_sbbox, self.anchors[0], self.strides[0])
def decode(self, conv_output, anchors, stride):
"""
return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
contains (x, y, w, h, score, probability)
"""
conv_shape = tf.shape(conv_output)
batch_size = conv_shape[0]
output_size = conv_shape[1]
anchor_per_scale = len(anchors)
conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, anchor_per_scale, 5 + self.num_class))
#conv_raw_dxdy是相对于方格子左上角的坐标,取值在0~1
conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
# 对bounding box prior的放缩比例
conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
# 判断是否是前景,相当于faster rcnn中的RPN层,取值0或1
conv_raw_conf = conv_output[:, :, :, :, 4:5]
#各个类的概率
conv_raw_prob = conv_output[:, :, :, :, 5: ]
y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
#得到每个方格子左上角的位置
xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, anchor_per_scale, 1])
xy_grid = tf.cast(xy_grid, tf.float32)
pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * stride
# anchors是bounding box prior,先验框
pred_wh = (tf.exp(conv_raw_dwdh) * anchors) * stride
# pred_xywh是在原图片上面画框
pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
#归一化
pred_conf = tf.sigmoid(conv_raw_conf)
pred_prob = tf.sigmoid(conv_raw_prob)
return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
代码来自https://github.com/YunYang1994/tensorflow-yolov3
——原来如此简单——