You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

lr-decay.py 7.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. # -*- coding: utf-8 -*-
  2. # ---
  3. # jupyter:
  4. # jupytext_format_version: '1.2'
  5. # kernelspec:
  6. # display_name: Python 3
  7. # language: python
  8. # name: python3
  9. # language_info:
  10. # codemirror_mode:
  11. # name: ipython
  12. # version: 3
  13. # file_extension: .py
  14. # mimetype: text/x-python
  15. # name: python
  16. # nbconvert_exporter: python
  17. # pygments_lexer: ipython3
  18. # version: 3.5.2
  19. # ---
  20. # # 学习率衰减
  21. # 对于基于一阶梯度进行优化的方法而言,开始的时候更新的幅度是比较大的,也就是说开始的学习率可以设置大一点,但是当训练集的 loss 下降到一定程度之后,,使用这个太大的学习率就会导致 loss 一直来回震荡,比如
  22. #
  23. # ![](https://ws4.sinaimg.cn/large/006tNc79ly1fmrvdlncomj30bf0aywet.jpg)
  24. # 这个时候就需要对学习率进行衰减已达到 loss 的充分下降,而是用学习率衰减的办法能够解决这个矛盾,学习率衰减就是随着训练的进行不断的减小学习率。
  25. #
  26. # 在 pytorch 中学习率衰减非常方便,使用 `torch.optim.lr_scheduler`,更多的信息可以直接查看[文档](http://pytorch.org/docs/0.3.0/optim.html#how-to-adjust-learning-rate)
  27. #
  28. # 但是我推荐大家使用下面这种方式来做学习率衰减,更加直观,下面我们直接举例子来说明
  29. # + {"ExecuteTime": {"start_time": "2017-12-24T08:45:33.834665Z", "end_time": "2017-12-24T08:45:34.293625Z"}}
  30. import sys
  31. sys.path.append('..')
  32. import numpy as np
  33. import torch
  34. from torch import nn
  35. import torch.nn.functional as F
  36. from torch.autograd import Variable
  37. from torchvision.datasets import CIFAR10
  38. from utils import resnet
  39. from torchvision import transforms as tfs
  40. from datetime import datetime
  41. # + {"ExecuteTime": {"start_time": "2017-12-24T08:45:35.063610Z", "end_time": "2017-12-24T08:45:35.195093Z"}}
  42. net = resnet(3, 10)
  43. optimizer = torch.optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-4)
  44. # -
  45. # 这里我们定义好了模型和优化器,可以通过 `optimizer.param_groups` 来得到所有的参数组和其对应的属性,参数组是什么意思呢?就是我们可以将模型的参数分成几个组,每个组定义一个学习率,这里比较复杂,一般来讲如果不做特别修改,就只有一个参数组
  46. #
  47. # 这个参数组是一个字典,里面有很多属性,比如学习率,权重衰减等等,我们可以访问以下
  48. # + {"ExecuteTime": {"start_time": "2017-12-24T08:22:59.187178Z", "end_time": "2017-12-24T08:22:59.192905Z"}}
  49. print('learning rate: {}'.format(optimizer.param_groups[0]['lr']))
  50. print('weight decay: {}'.format(optimizer.param_groups[0]['weight_decay']))
  51. # -
  52. # 所以我们可以通过修改这个属性来改变我们训练过程中的学习率,非常简单
  53. # + {"ExecuteTime": {"start_time": "2017-12-24T08:25:04.762612Z", "end_time": "2017-12-24T08:25:04.767090Z"}}
  54. optimizer.param_groups[0]['lr'] = 1e-5
  55. # -
  56. # 为了防止有多个参数组,我们可以使用一个循环
  57. # + {"ExecuteTime": {"start_time": "2017-12-24T08:26:05.136955Z", "end_time": "2017-12-24T08:26:05.142183Z"}}
  58. for param_group in optimizer.param_groups:
  59. param_group['lr'] = 1e-1
  60. # -
  61. # 方法就是这样,非常简单,我们可以在任意的位置改变我们的学习率
  62. #
  63. # 下面我们具体来看看学习率衰减的好处
  64. # + {"ExecuteTime": {"start_time": "2017-12-24T08:45:40.803993Z", "end_time": "2017-12-24T08:45:40.809459Z"}}
  65. def set_learning_rate(optimizer, lr):
  66. for param_group in optimizer.param_groups:
  67. param_group['lr'] = lr
  68. # + {"ExecuteTime": {"start_time": "2017-12-24T08:45:46.738002Z", "end_time": "2017-12-24T08:45:48.006789Z"}}
  69. # 使用数据增强
  70. def train_tf(x):
  71. im_aug = tfs.Compose([
  72. tfs.Resize(120),
  73. tfs.RandomHorizontalFlip(),
  74. tfs.RandomCrop(96),
  75. tfs.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),
  76. tfs.ToTensor(),
  77. tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
  78. ])
  79. x = im_aug(x)
  80. return x
  81. def test_tf(x):
  82. im_aug = tfs.Compose([
  83. tfs.Resize(96),
  84. tfs.ToTensor(),
  85. tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
  86. ])
  87. x = im_aug(x)
  88. return x
  89. train_set = CIFAR10('./data', train=True, transform=train_tf)
  90. train_data = torch.utils.data.DataLoader(train_set, batch_size=256, shuffle=True, num_workers=4)
  91. valid_set = CIFAR10('./data', train=False, transform=test_tf)
  92. valid_data = torch.utils.data.DataLoader(valid_set, batch_size=256, shuffle=False, num_workers=4)
  93. net = resnet(3, 10)
  94. optimizer = torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-4)
  95. criterion = nn.CrossEntropyLoss()
  96. # + {"ExecuteTime": {"start_time": "2017-12-24T08:45:48.556187Z", "end_time": "2017-12-24T08:59:49.656832Z"}}
  97. train_losses = []
  98. valid_losses = []
  99. if torch.cuda.is_available():
  100. net = net.cuda()
  101. prev_time = datetime.now()
  102. for epoch in range(30):
  103. if epoch == 20:
  104. set_learning_rate(optimizer, 0.01) # 80 次修改学习率为 0.01
  105. train_loss = 0
  106. net = net.train()
  107. for im, label in train_data:
  108. if torch.cuda.is_available():
  109. im = Variable(im.cuda()) # (bs, 3, h, w)
  110. label = Variable(label.cuda()) # (bs, h, w)
  111. else:
  112. im = Variable(im)
  113. label = Variable(label)
  114. # forward
  115. output = net(im)
  116. loss = criterion(output, label)
  117. # backward
  118. optimizer.zero_grad()
  119. loss.backward()
  120. optimizer.step()
  121. train_loss += loss.data[0]
  122. cur_time = datetime.now()
  123. h, remainder = divmod((cur_time - prev_time).seconds, 3600)
  124. m, s = divmod(remainder, 60)
  125. time_str = "Time %02d:%02d:%02d" % (h, m, s)
  126. valid_loss = 0
  127. valid_acc = 0
  128. net = net.eval()
  129. for im, label in valid_data:
  130. if torch.cuda.is_available():
  131. im = Variable(im.cuda(), volatile=True)
  132. label = Variable(label.cuda(), volatile=True)
  133. else:
  134. im = Variable(im, volatile=True)
  135. label = Variable(label, volatile=True)
  136. output = net(im)
  137. loss = criterion(output, label)
  138. valid_loss += loss.data[0]
  139. epoch_str = (
  140. "Epoch %d. Train Loss: %f, Valid Loss: %f, "
  141. % (epoch, train_loss / len(train_data), valid_loss / len(valid_data)))
  142. prev_time = cur_time
  143. train_losses.append(train_loss / len(train_data))
  144. valid_losses.append(valid_loss / len(valid_data))
  145. print(epoch_str + time_str)
  146. # -
  147. # 下面我们画出 loss 曲线
  148. # + {"ExecuteTime": {"start_time": "2017-12-24T09:01:37.439613Z", "end_time": "2017-12-24T09:01:37.676274Z"}}
  149. import matplotlib.pyplot as plt
  150. # %matplotlib inline
  151. # + {"ExecuteTime": {"start_time": "2017-12-24T09:02:37.244995Z", "end_time": "2017-12-24T09:02:37.432883Z"}}
  152. plt.plot(train_losses, label='train')
  153. plt.plot(valid_losses, label='valid')
  154. plt.xlabel('epoch')
  155. plt.legend(loc='best')
  156. # -
  157. # 这里我们只训练了 30 次,在 20 次的时候进行了学习率衰减,可以看 loss 曲线在 20 次的时候不管是 train loss 还是 valid loss,都有了一个陡降。
  158. #
  159. # 当然这里我们只是作为举例,在实际应用中,做学习率衰减之前应该经过充分的训练,比如训练 80 次或者 100 次,然后再做学习率衰减得到更好的结果,有的时候甚至需要做多次学习率衰减

机器学习越来越多应用到飞行器、机器人等领域,其目的是利用计算机实现类似人类的智能,从而实现装备的智能化与无人化。本课程旨在引导学生掌握机器学习的基本知识、典型方法与技术,通过具体的应用案例激发学生对该学科的兴趣,鼓励学生能够从人工智能的角度来分析、解决飞行器、机器人所面临的问题和挑战。本课程主要内容包括Python编程基础,机器学习模型,无监督学习、监督学习、深度学习基础知识与实现,并学习如何利用机器学习解决实际问题,从而全面提升自我的《综合能力》。