Tensorflow 基础API
自定义损失函数
def customize_mse(y_true, y_pred):
return tf.reduce_mean(tf.square(y_pred - y_true))
model.compile(loss=customize_mse, optimizer="adam")
自定义层次
# customized layer by a subclass
class CustomizedDenseLayer(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
self.units = units
self.activation = keras.layers.Activation(activation)
super(CustomizedDenseLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name="kernel",
shape = [input_shape[1], self.units],
initializer = ‘uniform‘,
trainable = True)
self.bias = self.add_weight(name="bias",
shape=(self.units,),
initializer="zeros",
trainable=True) # 是否可改变
super(CustomizedDenseLayer, self).build(input_shape)
def call(self, x):
return self.activation(x @ self.kernel + self.bias)
model = keras.models.Sequential([
CustomizedDenseLayer(30, activation="selu", input_shape=x_train.shape[1:]),
CustomizedDenseLayer(1)
])
# customized activation func
# softplus : log(1 + e^x) similar to relu
customized_softplus = keras.layers.Lambda(lambda x: tf.nn.softplus(x))
tf.function的使用
- Feature
- 把python代码转化成图结构
- 易于将模型到处成为GraphDef + checkpoint 或者SavedModel
- 使得eager execution可以默认打开
- 使得1.0的代码可以通过tf.function在版本2.0上继续运行
# python code
def scaled_elu(z, scale = 1.0, alpha = 1.0):
# z >= 0? scale *z : scale * alpha * tf.nn.elu(z)
is_positive = tf.greater_equal(z, 0.0)
return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z))
scaled_elu_tf = tf.function(scaled_elu)
%timeit scaled_elu(tf.random.normal((1000, 1000))) # 14.8 ms ± 156 μs per loop
%timeit scaled_elu_tf(tf.random.normal((1000, 1000))) # 12.1 ms ± 35.6 μs per loop
普通python代码转成tf的图结构,速度加快
除此之外,tf.function还可以在类型上做限制
@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name=‘x‘)])
def cube(z):
return tf.pow(z, 3)
print(cube(tf.constant([1.,2.,3.]))) # ValueError: Python inputs incompatible with input_signature
print(cube(tf.constant([1, 2, 3]))) # tf.Tensor([ 1 8 27], shape=(3,), dtype=int32)
自定义求导
def approximate_derivative(f, x, eps = 1e-3):
return (f(x + eps) - f(x - eps)) / (2. *eps)
def equation1(x):
return 3.* x**2 + 2. * x - 1
print(approximate_derivative(equation1, 1.)) # 7.999999999999119
def equation2(x1, x2):
return (x1 + 5) * (x2 ** 2)
def approximate_gradient(f, x1, x2, eps=1e-3):
dg_x1 = approximate_derivative(lambda x: equation2(x, x2), x1, eps)
dg_x2 = approximate_derivative(lambda x: equation2(x1, x), x2, eps)
return dg_x1, dg_x2
print(approximate_gradient(equation2, 2., 3.)) # (8.999999999993236, 41.999999999994486)
这是在普通python代码上,实现一元和多元函数的求导。
下面是如何用tensorflow去完成相同的功能
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as tape2:
z2 = equation2(x1, x2)
dz2_x1 = tape2.gradient(z2, x1)
print(dz2_x1) # tf.Tensor(9.0, shape=(), dtype=float32)
with tf.GradientTape() as tape3:
z3 = equation2(x1, x2)
dz3_x1x2 = tape3.gradient(z3, [x1, x2])
# [<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x5 = tf.Variable(5.)
with tf.GradientTape() as tape5:
z5 = 3 * x5
z6 = x5 ** 2
dz5z6_x5 = tape5.gradient([z5, z6], x5) # tf.Tensor(13.0, shape=(), dtype=float32)
如何求二阶导
x6 = tf.Variable(2.0)
x7 = tf.Variable(3.0)
# compute the second derivate
with tf.GradientTape(persistent=True) as outter_tape:
with tf.GradientTape(persistent=True) as inner_tape:
z7 = equation2(x6, x7)
first_derivates = inner_tape.gradient(z7, [x6, x7])
second_derivate = [outter_tape.gradient(first_derivate, [x6, x7]) for first_derivate in first_derivates]
print(second_derivate)
del inner_tape
del outter_tape
梯度下降原理
learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
with tf.GradientTape() as tape:
z = equation1(x)
dz_dx = tape.gradient(z, x)
x.assign_sub(learning_rate * dz_dx)
print(x)
## introduce optimizer
learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(lr=learning_rate)
for _ in range(100):
with tf.GradientTape() as tape:
z = equation1(x)
dz_dx = tape.gradient(z, x)
optimizer.apply_gradients([(dz_dx, x)])
print(x)