You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

googlenet.py 8.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. # -*- coding: utf-8 -*-
  2. # ---
  3. # jupyter:
  4. # jupytext_format_version: '1.2'
  5. # kernelspec:
  6. # display_name: Python 3
  7. # language: python
  8. # name: python3
  9. # language_info:
  10. # codemirror_mode:
  11. # name: ipython
  12. # version: 3
  13. # file_extension: .py
  14. # mimetype: text/x-python
  15. # name: python
  16. # nbconvert_exporter: python
  17. # pygments_lexer: ipython3
  18. # version: 3.5.2
  19. # ---
  20. # # GoogLeNet
  21. # 前面我们讲的 VGG 是 2014 年 ImageNet 比赛的亚军,那么冠军是谁呢?就是我们马上要讲的 GoogLeNet,这是 Google 的研究人员提出的网络结构,在当时取得了非常大的影响,因为网络的结构变得前所未有,它颠覆了大家对卷积网络的串联的印象和固定做法,采用了一种非常有效的 inception 模块,得到了比 VGG 更深的网络结构,但是却比 VGG 的参数更少,因为其去掉了后面的全连接层,所以参数大大减少,同时有了很高的计算效率。
  22. #
  23. # ![](https://ws2.sinaimg.cn/large/006tNc79ly1fmprhdocouj30qb08vac3.jpg)
  24. #
  25. # 这是 googlenet 的网络示意图,下面我们介绍一下其作为创新的 inception 模块。
  26. # ## Inception 模块
  27. # 在上面的网络中,我们看到了多个四个并行卷积的层,这些四个卷积并行的层就是 inception 模块,可视化如下
  28. #
  29. # ![](https://ws4.sinaimg.cn/large/006tNc79gy1fmprivb2hxj30dn09dwef.jpg)
  30. #
  31. # 一个 inception 模块的四个并行线路如下:
  32. # 1.一个 1 x 1 的卷积,一个小的感受野进行卷积提取特征
  33. # 2.一个 1 x 1 的卷积加上一个 3 x 3 的卷积,1 x 1 的卷积降低输入的特征通道,减少参数计算量,然后接一个 3 x 3 的卷积做一个较大感受野的卷积
  34. # 3.一个 1 x 1 的卷积加上一个 5 x 5 的卷积,作用和第二个一样
  35. # 4.一个 3 x 3 的最大池化加上 1 x 1 的卷积,最大池化改变输入的特征排列,1 x 1 的卷积进行特征提取
  36. #
  37. # 最后将四个并行线路得到的特征在通道这个维度上拼接在一起,下面我们可以实现一下
  38. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:05.427292Z", "start_time": "2017-12-22T12:51:04.924747Z"}}
  39. import sys
  40. sys.path.append('..')
  41. import numpy as np
  42. import torch
  43. from torch import nn
  44. from torch.autograd import Variable
  45. from torchvision.datasets import CIFAR10
  46. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:08.890890Z", "start_time": "2017-12-22T12:51:08.876313Z"}}
  47. # 定义一个卷积加一个 relu 激活函数和一个 batchnorm 作为一个基本的层结构
  48. def conv_relu(in_channel, out_channel, kernel, stride=1, padding=0):
  49. layer = nn.Sequential(
  50. nn.Conv2d(in_channel, out_channel, kernel, stride, padding),
  51. nn.BatchNorm2d(out_channel, eps=1e-3),
  52. nn.ReLU(True)
  53. )
  54. return layer
  55. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:09.671474Z", "start_time": "2017-12-22T12:51:09.587337Z"}}
  56. class inception(nn.Module):
  57. def __init__(self, in_channel, out1_1, out2_1, out2_3, out3_1, out3_5, out4_1):
  58. super(inception, self).__init__()
  59. # 第一条线路
  60. self.branch1x1 = conv_relu(in_channel, out1_1, 1)
  61. # 第二条线路
  62. self.branch3x3 = nn.Sequential(
  63. conv_relu(in_channel, out2_1, 1),
  64. conv_relu(out2_1, out2_3, 3, padding=1)
  65. )
  66. # 第三条线路
  67. self.branch5x5 = nn.Sequential(
  68. conv_relu(in_channel, out3_1, 1),
  69. conv_relu(out3_1, out3_5, 5, padding=2)
  70. )
  71. # 第四条线路
  72. self.branch_pool = nn.Sequential(
  73. nn.MaxPool2d(3, stride=1, padding=1),
  74. conv_relu(in_channel, out4_1, 1)
  75. )
  76. def forward(self, x):
  77. f1 = self.branch1x1(x)
  78. f2 = self.branch3x3(x)
  79. f3 = self.branch5x5(x)
  80. f4 = self.branch_pool(x)
  81. output = torch.cat((f1, f2, f3, f4), dim=1)
  82. return output
  83. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:10.948630Z", "start_time": "2017-12-22T12:51:10.757903Z"}}
  84. test_net = inception(3, 64, 48, 64, 64, 96, 32)
  85. test_x = Variable(torch.zeros(1, 3, 96, 96))
  86. print('input shape: {} x {} x {}'.format(test_x.shape[1], test_x.shape[2], test_x.shape[3]))
  87. test_y = test_net(test_x)
  88. print('output shape: {} x {} x {}'.format(test_y.shape[1], test_y.shape[2], test_y.shape[3]))
  89. # -
  90. # 可以看到输入经过了 inception 模块之后,大小没有变化,通道的维度变多了
  91. # 下面我们定义 GoogLeNet,GoogLeNet 可以看作是很多个 inception 模块的串联,注意,原论文中使用了多个输出来解决梯度消失的问题,这里我们只定义一个简单版本的 GoogLeNet,简化为一个输出
  92. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:13.149380Z", "start_time": "2017-12-22T12:51:12.934110Z"}}
  93. class googlenet(nn.Module):
  94. def __init__(self, in_channel, num_classes, verbose=False):
  95. super(googlenet, self).__init__()
  96. self.verbose = verbose
  97. self.block1 = nn.Sequential(
  98. conv_relu(in_channel, out_channel=64, kernel=7, stride=2, padding=3),
  99. nn.MaxPool2d(3, 2)
  100. )
  101. self.block2 = nn.Sequential(
  102. conv_relu(64, 64, kernel=1),
  103. conv_relu(64, 192, kernel=3, padding=1),
  104. nn.MaxPool2d(3, 2)
  105. )
  106. self.block3 = nn.Sequential(
  107. inception(192, 64, 96, 128, 16, 32, 32),
  108. inception(256, 128, 128, 192, 32, 96, 64),
  109. nn.MaxPool2d(3, 2)
  110. )
  111. self.block4 = nn.Sequential(
  112. inception(480, 192, 96, 208, 16, 48, 64),
  113. inception(512, 160, 112, 224, 24, 64, 64),
  114. inception(512, 128, 128, 256, 24, 64, 64),
  115. inception(512, 112, 144, 288, 32, 64, 64),
  116. inception(528, 256, 160, 320, 32, 128, 128),
  117. nn.MaxPool2d(3, 2)
  118. )
  119. self.block5 = nn.Sequential(
  120. inception(832, 256, 160, 320, 32, 128, 128),
  121. inception(832, 384, 182, 384, 48, 128, 128),
  122. nn.AvgPool2d(2)
  123. )
  124. self.classifier = nn.Linear(1024, num_classes)
  125. def forward(self, x):
  126. x = self.block1(x)
  127. if self.verbose:
  128. print('block 1 output: {}'.format(x.shape))
  129. x = self.block2(x)
  130. if self.verbose:
  131. print('block 2 output: {}'.format(x.shape))
  132. x = self.block3(x)
  133. if self.verbose:
  134. print('block 3 output: {}'.format(x.shape))
  135. x = self.block4(x)
  136. if self.verbose:
  137. print('block 4 output: {}'.format(x.shape))
  138. x = self.block5(x)
  139. if self.verbose:
  140. print('block 5 output: {}'.format(x.shape))
  141. x = x.view(x.shape[0], -1)
  142. x = self.classifier(x)
  143. return x
  144. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:13.614936Z", "start_time": "2017-12-22T12:51:13.428383Z"}}
  145. test_net = googlenet(3, 10, True)
  146. test_x = Variable(torch.zeros(1, 3, 96, 96))
  147. test_y = test_net(test_x)
  148. print('output: {}'.format(test_y.shape))
  149. # -
  150. # 可以看到输入的尺寸不断减小,通道的维度不断增加
  151. # + {"ExecuteTime": {"end_time": "2017-12-22T12:51:16.387778Z", "start_time": "2017-12-22T12:51:15.121350Z"}}
  152. from utils import train
  153. def data_tf(x):
  154. x = x.resize((96, 96), 2) # 将图片放大到 96 x 96
  155. x = np.array(x, dtype='float32') / 255
  156. x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
  157. x = x.transpose((2, 0, 1)) # 将 channel 放到第一维,只是 pytorch 要求的输入方式
  158. x = torch.from_numpy(x)
  159. return x
  160. train_set = CIFAR10('./data', train=True, transform=data_tf)
  161. train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
  162. test_set = CIFAR10('./data', train=False, transform=data_tf)
  163. test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)
  164. net = googlenet(3, 10)
  165. optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
  166. criterion = nn.CrossEntropyLoss()
  167. # + {"ExecuteTime": {"end_time": "2017-12-22T13:17:25.310685Z", "start_time": "2017-12-22T12:51:16.389607Z"}}
  168. train(net, train_data, test_data, 20, optimizer, criterion)
  169. # -
  170. # GoogLeNet 加入了更加结构化的 Inception 块使得我们能够使用更大的通道,更多的层,同时也控制了计算量。
  171. #
  172. # **小练习:GoogLeNet 有很多后续的版本,尝试看看论文,看看有什么不同,实现一下:
  173. # v1:最早的版本
  174. # v2:加入 batch normalization 加快训练
  175. # v3:对 inception 模块做了调整
  176. # v4:基于 ResNet 加入了 残差连接 **

机器学习越来越多应用到飞行器、机器人等领域,其目的是利用计算机实现类似人类的智能,从而实现装备的智能化与无人化。本课程旨在引导学生掌握机器学习的基本知识、典型方法与技术,通过具体的应用案例激发学生对该学科的兴趣,鼓励学生能够从人工智能的角度来分析、解决飞行器、机器人所面临的问题和挑战。本课程主要内容包括Python编程基础,机器学习模型,无监督学习、监督学习、深度学习基础知识与实现,并学习如何利用机器学习解决实际问题,从而全面提升自我的《综合能力》。