#2021SC@SDUSC
class BaseConv2DLayer:
2D 卷积层的基类。支持可选的批量标准化、激活和序列填充。
首先界定参数:
filter_shape’, (0, 0, 0, 0)
过滤器形状:必须是长度为4的序列。
元素的顺序是height(时间)、width(频率)、in_channel、out_channel。 当 causal_convolution 为 True 时,filter_shape[1] 是内核时间维度中训练权重的实际数量。
p.Define(
'filter_shape', (0, 0, 0, 0),
'Filter shape. Must be a sequence of length 4. Elements are in'
' the order of height (time), width (frequency), in_channel,'
' out_channel. When causal_convolution is True, filter_shape[1]'
' is the actual number of trained weights in the time dimension'
' of the kernel.')
一个(标准转换)或两个(深度转换)元素形状表示过滤器权重的最终尺寸,这些过滤器权重是特定于该层的输出通道。 任何需要在过滤器权重的线性列表和实际过滤器中的排列之间进行转换的算法都需要这种形状。
标准卷积在最后一个dim中具有所有输出通道。
@property
def filter_output_shape(self):
# Standard convolution has all output channels in the last dim.
p = self.params
return [p.filter_shape[-1]]
之后还有判断batchnorm 折叠权重是否有效启用和评估较低级别的卷积核的函数。
def_GetsWeights获取卷积的权重和偏差字典。
这对于某些操作模式是必要的,其中权重与批标准化以不同方式融合以用于训练与评估。
参数:
theta: 一个 .NestedMap
对象,包含这个的底层权重值
图层及其子图层。
卷积_拉姆达:采用卷积权重并运行的 Lambda
卷积。
folded_bn_padding:应用于折叠批量归一化时刻的填充
计算(或 None 无填充)。
cast_dtype:如果不是 None,则将权重转换为给定的 dtype。
返回:
(过滤器,偏差)的元组。
def _GetWeights(self,
theta,
convolution_lambda,
folded_bn_padding,
cast_dtype=None):
p = self.params
# Original weights.
filter_w = theta.w
filter_output_shape = self.filter_output_shape
# TODO(miachen): remove casting once tf.nn.conv2d supports tf.float64.
if cast_dtype:
filter_w = tf.cast(filter_w, tf.float32)
if p.weight_norm:
if len(filter_output_shape) == 1:
# Normalize along the last dim (standard conv).
filter_w = tf.nn.l2_normalize(filter_w, [0, 1, 2]) * tf.reshape(
(theta.g + 1.0), [1, 1, 1, p.filter_shape[-1]])
elif len(filter_output_shape) == 2:
# Normalize along the last two dimensions (depthwise conv).
filter_w = tf.nn.l2_normalize(filter_w, [0, 1]) * tf.reshape(
(theta.g + 1.0), [1, 1] + filter_output_shape)
else:
assert False, 'Unsupported weight norm filter shape'
# Original bias.
if p.bias:
b = theta.b
else:
b = tf.zeros([symbolic.ToStatic(self.output_channels)],
dtype=filter_w.dtype)
# Pass-through if weights are not folded with batch normalization.
if not self._is_bn_folded:
return filter_w, b
# If batch norm is fused with weights, then compute the weights as from
# figure C.8 of https://arxiv.org/pdf/1712.05877.pdf for training and
# figure C.6 for eval.
if self.do_eval:
# Gets current moments without updating.
mean, variance, beta, gamma = self.bn.GetCurrentMoments(theta.bn)
else:
# Updates moments based on a trial run of the convolution.
raw_conv_output = convolution_lambda(filter_w)
mean, variance, beta, gamma = self.bn.ComputeAndUpdateMoments(
theta.bn, raw_conv_output, folded_bn_padding)
# Fold weights and bias. Note that this layer's bias is not used (not
# applicable for batch norm case).
sigma_recip = tf.math.rsqrt(variance + self.bn.epsilon)
scale_correction = gamma * sigma_recip
# Normal conv will have all weights in the last dim
# ([_, _, _, output_channels]), which matches the 1D layout from
# batch norm. Depthwise uses the last two dims so reshape
# ([_, _, in_c, c_multiplier]).
scale_correction = tf.reshape(scale_correction, filter_output_shape)
filter_w = filter_w * scale_correction
b = (beta - (gamma * mean * sigma_recip))
return filter_w, b
class Conv2DLayer(BaseConv2DLayer)
卷积层,具有可选的批量归一化和激活。
class ConvNN2DLayer(BaseConv2DLayer)
卷积层,基于 tf.nn.conv2d 而不是 tf.nn.convolution。
tf.nn.convolution 在 atrous 卷积上使用不同的实现,通过用 space_to_batch 和 batch_to_space 包装实际卷积。 tflite 转换不支持此实现,因此我们需要使用多孔卷积的不同层。
def _EvaluateConvKernel(self, inputs, filter_w, strides, dilation_rate,
padding_algorithm, data_format):
p = self.params
return tf.nn.conv2d(
inputs,
filter_w,
strides=strides,
dilations=p.dilation_rate,
data_format='NHWC',
padding='SAME')入代码片
class SeparableConv2DLayer(Conv2DLayer):
可分离的 2D 卷积。
此类聚合了一个 DepthwiseConv2DLayer,该 DepthwiseConv2DLayer 馈入由该层定义的逐点卷积。 由于逐点卷积控制输出,因此这个类是根据它定义的,并委托给一个深度子层。filter_shape
参数在初始化时从表单中重写:
(h, w, cin, cout)
到:
深度过滤器:(h,w,cin,p.depth_multiplier)
逐点过滤器(在此实例上):(1, 1, cin * p.depth_multiplier, cout)
通过这种方式,该层被配置为就好像它是一个普通的 2D 卷积,但在内部重新配置为可分离的。
def Params(cls):
p = super().Params()
p.Define(
'depth_multiplier', 1,
'Number of depthwise convolution output channels per input channel. '
'The total number of depthwise convolution output channels will be.'
'equal to in_channel * depth_multiplier.')
p.Define('depthwise_tpl',
DepthwiseConv2DLayer.Params().Set(activation='NONE'),
'Template for the depthwise conv sub-layer.')
return p
def __init__(self, params):
# Rewrite the filter.
params = params.Copy()
h, w, cin, cout = params.filter_shape
params.filter_shape = (1, 1, cin * params.depth_multiplier, cout)
depthwise_filter_shape = (h, w, cin, params.depth_multiplier)
# Dilation rate and stride go to the depthwise layer and reset ours.
depthwise_filter_stride = params.filter_stride
depthwise_dilation_rate = params.dilation_rate
params.filter_stride = (1, 1)
params.dilation_rate = (1, 1)
super().__init__(params)
p = self.params
del params
总结
以上分析了layer.py的部分关键代码