1. 代码实现
from __future__ import print_function import theano import theano.tensor as T import numpy as np import matplotlib.pyplot as plt class Layer(object): def __init__(self, inputs, in_size, out_size, activation_function=None): self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size))) self.b = theano.shared(np.zeros((out_size, )) + 0.1) self.Wx_plus_b = T.dot(inputs, self.W) + self.b self.activation_function = activation_function if activation_function is None: self.outputs = self.Wx_plus_b else: self.outputs = self.activation_function(self.Wx_plus_b) # Make up some fake data x_data = np.linspace(-1, 1, 300)[:, np.newaxis] noise = np.random.normal(0, 0.05, x_data.shape) y_data = np.square(x_data) - 0.5 + noise # y = x^2 - 0.5 # show the fake data plt.scatter(x_data, y_data) plt.show() # determine the inputs dtype x = T.dmatrix("x") y = T.dmatrix("y") # add layers l1 = Layer(x, 1, 10, T.nnet.relu) l2 = Layer(l1.outputs, 10, 1, None) # compute the cost cost = T.mean(T.square(l2.outputs - y)) # compute the gradients gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b]) # apply gradient descent learning_rate = 0.05 train = theano.function( inputs=[x, y], outputs=cost, updates=[(l1.W, l1.W - learning_rate * gW1), (l1.b, l1.b - learning_rate * gb1), (l2.W, l2.W - learning_rate * gW2), (l2.b, l2.b - learning_rate * gb2)]) # prediction predict = theano.function(inputs=[x], outputs=l2.outputs) for i in range(1000): # training err = train(x_data, y_data) if i % 50 == 0: print(err)
结果:
1.77825942078 0.0307547174779 0.0145354962126 0.0111276391112 0.0098326475625 0.00913968526182 0.00870222509 0.00832267806176 0.00788557725943 0.00737921234676 0.00684759006112 0.0063416352651 0.00589114798344 0.005512661812 0.00522628405891 0.00498177806607 0.00477628310217 0.00460285349102 0.00445516762566 0.00432311158005