You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resnet.py 7.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. # -*- coding: utf-8 -*-
  2. # ---
  3. # jupyter:
  4. # jupytext_format_version: '1.2'
  5. # kernelspec:
  6. # display_name: Python 3
  7. # language: python
  8. # name: python3
  9. # language_info:
  10. # codemirror_mode:
  11. # name: ipython
  12. # version: 3
  13. # file_extension: .py
  14. # mimetype: text/x-python
  15. # name: python
  16. # nbconvert_exporter: python
  17. # pygments_lexer: ipython3
  18. # version: 3.5.2
  19. # ---
  20. # # ResNet
  21. # 当大家还在惊叹 GoogLeNet 的 inception 结构的时候,微软亚洲研究院的研究员已经在设计更深但结构更加简单的网络 ResNet,并且凭借这个网络子在 2015 年 ImageNet 比赛上大获全胜。
  22. #
  23. # ResNet 有效地解决了深度神经网络难以训练的问题,可以训练高达 1000 层的卷积网络。网络之所以难以训练,是因为存在着梯度消失的问题,离 loss 函数越远的层,在反向传播的时候,梯度越小,就越难以更新,随着层数的增加,这个现象越严重。之前有两种常见的方案来解决这个问题:
  24. #
  25. # 1.按层训练,先训练比较浅的层,然后在不断增加层数,但是这种方法效果不是特别好,而且比较麻烦
  26. #
  27. # 2.使用更宽的层,或者增加输出通道,而不加深网络的层数,这种结构往往得到的效果又不好
  28. #
  29. # ResNet 通过引入了跨层链接解决了梯度回传消失的问题。
  30. #
  31. # ![](https://ws1.sinaimg.cn/large/006tNc79ly1fmptq2snv9j30j808t74a.jpg)
  32. # 这就普通的网络连接跟跨层残差连接的对比图,使用普通的连接,上层的梯度必须要一层一层传回来,而是用残差连接,相当于中间有了一条更短的路,梯度能够从这条更短的路传回来,避免了梯度过小的情况。
  33. #
  34. # 假设某层的输入是 x,期望输出是 H(x), 如果我们直接把输入 x 传到输出作为初始结果,这就是一个更浅层的网络,更容易训练,而这个网络没有学会的部分,我们可以使用更深的网络 F(x) 去训练它,使得训练更加容易,最后希望拟合的结果就是 F(x) = H(x) - x,这就是一个残差的结构
  35. #
  36. # 残差网络的结构就是上面这种残差块的堆叠,下面让我们来实现一个 residual block
  37. # + {"ExecuteTime": {"end_time": "2017-12-22T12:56:06.772059Z", "start_time": "2017-12-22T12:56:06.766027Z"}}
  38. import sys
  39. sys.path.append('..')
  40. import numpy as np
  41. import torch
  42. from torch import nn
  43. import torch.nn.functional as F
  44. from torch.autograd import Variable
  45. from torchvision.datasets import CIFAR10
  46. # + {"ExecuteTime": {"end_time": "2017-12-22T12:47:49.222432Z", "start_time": "2017-12-22T12:47:49.217940Z"}}
  47. def conv3x3(in_channel, out_channel, stride=1):
  48. return nn.Conv2d(in_channel, out_channel, 3, stride=stride, padding=1, bias=False)
  49. # + {"ExecuteTime": {"end_time": "2017-12-22T13:14:02.429145Z", "start_time": "2017-12-22T13:14:02.383322Z"}}
  50. class residual_block(nn.Module):
  51. def __init__(self, in_channel, out_channel, same_shape=True):
  52. super(residual_block, self).__init__()
  53. self.same_shape = same_shape
  54. stride=1 if self.same_shape else 2
  55. self.conv1 = conv3x3(in_channel, out_channel, stride=stride)
  56. self.bn1 = nn.BatchNorm2d(out_channel)
  57. self.conv2 = conv3x3(out_channel, out_channel)
  58. self.bn2 = nn.BatchNorm2d(out_channel)
  59. if not self.same_shape:
  60. self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride)
  61. def forward(self, x):
  62. out = self.conv1(x)
  63. out = F.relu(self.bn1(out), True)
  64. out = self.conv2(out)
  65. out = F.relu(self.bn2(out), True)
  66. if not self.same_shape:
  67. x = self.conv3(x)
  68. return F.relu(x+out, True)
  69. # -
  70. # 我们测试一下一个 residual block 的输入和输出
  71. # + {"ExecuteTime": {"end_time": "2017-12-22T13:14:05.793185Z", "start_time": "2017-12-22T13:14:05.763382Z"}}
  72. # 输入输出形状相同
  73. test_net = residual_block(32, 32)
  74. test_x = Variable(torch.zeros(1, 32, 96, 96))
  75. print('input: {}'.format(test_x.shape))
  76. test_y = test_net(test_x)
  77. print('output: {}'.format(test_y.shape))
  78. # + {"ExecuteTime": {"end_time": "2017-12-22T13:14:11.929120Z", "start_time": "2017-12-22T13:14:11.914604Z"}}
  79. # 输入输出形状不同
  80. test_net = residual_block(3, 32, False)
  81. test_x = Variable(torch.zeros(1, 3, 96, 96))
  82. print('input: {}'.format(test_x.shape))
  83. test_y = test_net(test_x)
  84. print('output: {}'.format(test_y.shape))
  85. # -
  86. # 下面我们尝试实现一个 ResNet,它就是 residual block 模块的堆叠
  87. # + {"ExecuteTime": {"end_time": "2017-12-22T13:27:46.099404Z", "start_time": "2017-12-22T13:27:45.986235Z"}}
  88. class resnet(nn.Module):
  89. def __init__(self, in_channel, num_classes, verbose=False):
  90. super(resnet, self).__init__()
  91. self.verbose = verbose
  92. self.block1 = nn.Conv2d(in_channel, 64, 7, 2)
  93. self.block2 = nn.Sequential(
  94. nn.MaxPool2d(3, 2),
  95. residual_block(64, 64),
  96. residual_block(64, 64)
  97. )
  98. self.block3 = nn.Sequential(
  99. residual_block(64, 128, False),
  100. residual_block(128, 128)
  101. )
  102. self.block4 = nn.Sequential(
  103. residual_block(128, 256, False),
  104. residual_block(256, 256)
  105. )
  106. self.block5 = nn.Sequential(
  107. residual_block(256, 512, False),
  108. residual_block(512, 512),
  109. nn.AvgPool2d(3)
  110. )
  111. self.classifier = nn.Linear(512, num_classes)
  112. def forward(self, x):
  113. x = self.block1(x)
  114. if self.verbose:
  115. print('block 1 output: {}'.format(x.shape))
  116. x = self.block2(x)
  117. if self.verbose:
  118. print('block 2 output: {}'.format(x.shape))
  119. x = self.block3(x)
  120. if self.verbose:
  121. print('block 3 output: {}'.format(x.shape))
  122. x = self.block4(x)
  123. if self.verbose:
  124. print('block 4 output: {}'.format(x.shape))
  125. x = self.block5(x)
  126. if self.verbose:
  127. print('block 5 output: {}'.format(x.shape))
  128. x = x.view(x.shape[0], -1)
  129. x = self.classifier(x)
  130. return x
  131. # -
  132. # 输出一下每个 block 之后的大小
  133. # + {"ExecuteTime": {"end_time": "2017-12-22T13:28:00.597030Z", "start_time": "2017-12-22T13:28:00.417746Z"}}
  134. test_net = resnet(3, 10, True)
  135. test_x = Variable(torch.zeros(1, 3, 96, 96))
  136. test_y = test_net(test_x)
  137. print('output: {}'.format(test_y.shape))
  138. # + {"ExecuteTime": {"end_time": "2017-12-22T13:29:01.484172Z", "start_time": "2017-12-22T13:29:00.095952Z"}}
  139. from utils import train
  140. def data_tf(x):
  141. x = x.resize((96, 96), 2) # 将图片放大到 96 x 96
  142. x = np.array(x, dtype='float32') / 255
  143. x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
  144. x = x.transpose((2, 0, 1)) # 将 channel 放到第一维,只是 pytorch 要求的输入方式
  145. x = torch.from_numpy(x)
  146. return x
  147. train_set = CIFAR10('./data', train=True, transform=data_tf)
  148. train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
  149. test_set = CIFAR10('./data', train=False, transform=data_tf)
  150. test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)
  151. net = resnet(3, 10)
  152. optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
  153. criterion = nn.CrossEntropyLoss()
  154. # + {"ExecuteTime": {"end_time": "2017-12-22T13:45:00.783186Z", "start_time": "2017-12-22T13:29:09.214453Z"}}
  155. train(net, train_data, test_data, 20, optimizer, criterion)
  156. # -
  157. # ResNet 使用跨层通道使得训练非常深的卷积神经网络成为可能。同样它使用很简单的卷积层配置,使得其拓展更加简单。
  158. #
  159. # **小练习:
  160. # 1.尝试一下论文中提出的 bottleneck 的结构
  161. # 2.尝试改变 conv -> bn -> relu 的顺序为 bn -> relu -> conv,看看精度会不会提高**

机器学习越来越多应用到飞行器、机器人等领域,其目的是利用计算机实现类似人类的智能,从而实现装备的智能化与无人化。本课程旨在引导学生掌握机器学习的基本知识、典型方法与技术,通过具体的应用案例激发学生对该学科的兴趣,鼓励学生能够从人工智能的角度来分析、解决飞行器、机器人所面临的问题和挑战。本课程主要内容包括Python编程基础,机器学习模型,无监督学习、监督学习、深度学习基础知识与实现,并学习如何利用机器学习解决实际问题,从而全面提升自我的《综合能力》。