Theano 实例:卷积神经网络¶

In [1]:

  1. import theano
  2. import theano.tensor as T
  3. from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
  4. import numpy as np
  5. from load import mnist
  6.  
  7. srng = RandomStreams()
  1. Using gpu device 1: Tesla C2075 (CNMeM is disabled)

从前一节导入有用的函数:

In [2]:

  1. def floatX(X):
  2. return np.asarray(X, dtype=theano.config.floatX)
  3.  
  4. def init_weights(shape):
  5. return theano.shared(floatX(np.random.randn(*shape) * 0.01))
  6.  
  7. def rectify(X):
  8. return T.maximum(X, 0.)
  9.  
  10. def softmax(X):
  11. e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
  12. return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
  13.  
  14. def dropout(X, p=0.):
  15. if p > 0:
  16. retain_prob = 1 - p
  17. X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
  18. X /= retain_prob
  19. return X
  20.  
  21. def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
  22. grads = T.grad(cost=cost, wrt=params)
  23. updates = []
  24. for p, g in zip(params, grads):
  25. acc = theano.shared(p.get_value() * 0.)
  26. acc_new = rho * acc + (1 - rho) * g ** 2
  27. gradient_scaling = T.sqrt(acc_new + epsilon)
  28. g = g / gradient_scaling
  29. updates.append((acc, acc_new))
  30. updates.append((p, p - lr * g))
  31. return updates

与前一节不同,我们使用卷积神经网络来实现这次的模型,为此,我们需要导入 2 维的卷积和池化函数:

In [3]:

  1. from theano.tensor.nnet.conv import conv2d
  2. from theano.tensor.signal.downsample import max_pool_2d

conv2d 函数接受两个输入:

  • 对应输入的 4D 张量,其形状如下:

[mini-batch size, number of feature maps at layer m-1, image height, image width]

  • 对应参数矩阵的 4D 张量,其形状如下:

[number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]

为了对图像使用卷积,我们需要将图像转化为原始的 28 × 28 大小,同时添加一维表示图像的通道数(黑白图像为 1):

In [4]:

  1. trX, teX, trY, teY = mnist(onehot=True)
  2.  
  3. trX = trX.reshape(-1, 1, 28, 28)
  4. teX = teX.reshape(-1, 1, 28, 28)

注意,对于 reshape 方法,传入的参数是 -1 表示该维的维度将根据其他参数自动计算。

模型首先进行三层卷积加池化操作,然后在第三层的输出中加一个全连结层,最后在第四层加上一个 softmax 层:

In [5]:

  1. def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden):
  2.  
  3. # X: 128 * 1 * 28 * 28
  4. # w: 32 * 1 * 3 * 3
  5. # full mode
  6. # l1a: 128 * 32 * (28 + 3 - 1) * (28 + 3 - 1)
  7. l1a = rectify(conv2d(X, w, border_mode='full'))
  8. # l1a: 128 * 32 * 30 * 30
  9. # ignore_border False
  10. # l1: 128 * 32 * (30 / 2) * (30 / 2)
  11. l1 = max_pool_2d(l1a, (2, 2), ignore_border=False)
  12. l1 = dropout(l1, p_drop_conv)
  13.  
  14. # l1: 128 * 32 * 15 * 15
  15. # w2: 64 * 32 * 3 * 3
  16. # valid mode
  17. # l2a: 128 * 64 * (15 - 3 + 1) * (15 - 3 + 1)
  18. l2a = rectify(conv2d(l1, w2))
  19. # l2a: 128 * 64 * 13 * 13
  20. # l2: 128 * 64 * (13 / 2 + 1) * (13 / 2 + 1)
  21. l2 = max_pool_2d(l2a, (2, 2), ignore_border=False)
  22. l2 = dropout(l2, p_drop_conv)
  23.  
  24. # l2: 128 * 64 * 7 * 7
  25. # w3: 128 * 64 * 3 * 3
  26. # l3a: 128 * 128 * (7 - 3 + 1) * (7 - 3 + 1)
  27. l3a = rectify(conv2d(l2, w3))
  28. # l3a: 128 * 128 * 5 * 5
  29. # l3b: 128 * 128 * (5 / 2 + 1) * (5 / 2 + 1)
  30. l3b = max_pool_2d(l3a, (2, 2), ignore_border=False)
  31. # l3b: 128 * 128 * 3 * 3
  32. # l3: 128 * (128 * 3 * 3)
  33. l3 = T.flatten(l3b, outdim=2)
  34. l3 = dropout(l3, p_drop_conv)
  35.  
  36. # l3: 128 * (128 * 3 * 3)
  37. # w4: (128 * 3 * 3) * 625
  38. # l4: 128 * 625
  39. l4 = rectify(T.dot(l3, w4))
  40. l4 = dropout(l4, p_drop_hidden)
  41.  
  42. # l5: 128 * 625
  43. # w5: 625 * 10
  44. # pyx: 128 * 10
  45. pyx = softmax(T.dot(l4, w_o))
  46. return l1, l2, l3, l4, pyx

定义符号变量:

In [6]:

  1. X = T.ftensor4()
  2. Y = T.fmatrix()
  3.  
  4. w = init_weights((32, 1, 3, 3))
  5. w2 = init_weights((64, 32, 3, 3))
  6. w3 = init_weights((128, 64, 3, 3))
  7. w4 = init_weights((128 * 3 * 3, 625))
  8. w_o = init_weights((625, 10))

使用带 dropout 的模型进行训练:

In [7]:

  1. noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)

使用不带 dropout 的模型进行预测:

In [8]:

  1. l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
  2. y_x = T.argmax(py_x, axis=1)

定义损失函数和迭代规则:

In [9]:

  1. cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
  2. params = [w, w2, w3, w4, w_o]
  3. updates = RMSprop(cost, params, lr=0.001)

开始训练:

In [10]:

  1. train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
  2. predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
  3.  
  4. for i in range(50):
  5. for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
  6. cost = train(trX[start:end], trY[start:end])
  7. print "iter {:03d}, {:.3f}".format(i + 1, np.mean(np.argmax(teY, axis=1) == predict(teX)))
  1. iter 001, 0.917
  2. iter 002, 0.974
  3. iter 003, 0.983
  4. iter 004, 0.984
  5. iter 005, 0.987
  6. iter 006, 0.989
  7. iter 007, 0.991
  8. iter 008, 0.993
  9. iter 009, 0.991
  10. iter 010, 0.992
  11. iter 011, 0.993
  12. iter 012, 0.992
  13. iter 013, 0.992
  14. iter 014, 0.992
  15. iter 015, 0.993
  16. iter 016, 0.992
  17. iter 017, 0.994
  18. iter 018, 0.993
  19. iter 019, 0.993
  20. iter 020, 0.994
  21. iter 021, 0.993
  22. iter 022, 0.993
  23. iter 023, 0.993
  24. iter 024, 0.992
  25. iter 025, 0.994
  26. iter 026, 0.993
  27. iter 027, 0.994
  28. iter 028, 0.993
  29. iter 029, 0.993
  30. iter 030, 0.994
  31. iter 031, 0.994
  32. iter 032, 0.993
  33. iter 033, 0.994
  34. iter 034, 0.994
  35. iter 035, 0.994
  36. iter 036, 0.994
  37. iter 037, 0.994
  38. iter 038, 0.993
  39. iter 039, 0.994
  40. iter 040, 0.994
  41. iter 041, 0.994
  42. iter 042, 0.994
  43. iter 043, 0.995
  44. iter 044, 0.994
  45. iter 045, 0.994
  46. iter 046, 0.994
  47. iter 047, 0.995
  48. iter 048, 0.994
  49. iter 049, 0.994
  50. iter 050, 0.995

原文: https://nbviewer.jupyter.org/github/lijin-THU/notes-python/blob/master/09-theano/09.14-convolutional-net-on-mnist.ipynb