1. 代码实现
from __future__ import print_functionimport theanoimport theano.tensor as Timport numpy as npimport matplotlib.pyplot as pltclass Layer(object): def __init__(self, inputs, in_size, out_size, activation_function=None): self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size))) self.b = theano.shared(np.zeros((out_size, )) + 0.1) self.Wx_plus_b = T.dot(inputs, self.W) + self.b self.activation_function = activation_function if activation_function is None: self.outputs = self.Wx_plus_b else: self.outputs = self.activation_function(self.Wx_plus_b)# Make up some fake datax_data = np.linspace(-1, 1, 300)[:, np.newaxis]noise = np.random.normal(0, 0.05, x_data.shape)y_data = np.square(x_data) - 0.5 + noise # y = x^2 - 0.5# show the fake dataplt.scatter(x_data, y_data)plt.show()# determine the inputs dtypex = T.dmatrix("x")y = T.dmatrix("y")# add layersl1 = Layer(x, 1, 10, T.nnet.relu)l2 = Layer(l1.outputs, 10, 1, None)# compute the costcost = T.mean(T.square(l2.outputs - y))# compute the gradientsgW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])# apply gradient descentlearning_rate = 0.05train = theano.function( inputs=[x, y], outputs=cost, updates=[(l1.W, l1.W - learning_rate * gW1), (l1.b, l1.b - learning_rate * gb1), (l2.W, l2.W - learning_rate * gW2), (l2.b, l2.b - learning_rate * gb2)])# predictionpredict = theano.function(inputs=[x], outputs=l2.outputs)for i in range(1000): # training err = train(x_data, y_data) if i % 50 == 0: print(err)
结果:
1.778259420780.03075471747790.01453549621260.01112763911120.00983264756250.009139685261820.008702225090.008322678061760.007885577259430.007379212346760.006847590061120.00634163526510.005891147983440.0055126618120.005226284058910.004981778066070.004776283102170.004602853491020.004455167625660.00432311158005