You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_optimizer.py 8.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import numpy as np
  10. import megengine.functional as F
  11. from megengine import Parameter, optimizer
  12. from megengine.jit import trace
  13. from megengine.module import Linear, Module
  14. from megengine.tensor import TensorDict, tensor
  15. class MLP(Module):
  16. def __init__(self):
  17. super().__init__()
  18. self.dense0 = Linear(28, 50)
  19. self.dense1 = Linear(50, 20)
  20. def forward(self, x):
  21. x = self.dense0(x)
  22. x = F.relu(x)
  23. x = self.dense1(x)
  24. return x
  25. class Simple(Module):
  26. def __init__(self):
  27. super().__init__()
  28. self.a = Parameter(1.23, dtype=np.float32)
  29. def forward(self, x):
  30. x = x * self.a
  31. return x
  32. def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
  33. iter_num = 3
  34. net = Simple()
  35. opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
  36. check_func = check_class(net, **test_case)
  37. step = 0
  38. data_shape = (2, 28)
  39. for i in range(iter_num):
  40. if update_lr and i == 1: # change learning rate
  41. for group in opt.param_groups:
  42. group["lr"] += 0.01
  43. check_func.lr += 0.01
  44. data = tensor(np.random.random(data_shape).astype(np.float32))
  45. opt.zero_grad()
  46. with opt.record():
  47. pred = net(data)
  48. loss = pred.sum()
  49. opt.backward(loss)
  50. ori_params = TensorDict()
  51. for param in net.parameters():
  52. ori_params[param] = np.copy(param.numpy())
  53. opt.step()
  54. step += 1
  55. check_func(ori_params, net.parameters(), step)
  56. # static graph
  57. for symbolic in (False, True):
  58. @trace(symbolic=symbolic)
  59. def train_func(data, *, opt=None):
  60. opt.zero_grad()
  61. with opt.record():
  62. pred = net(data)
  63. loss = pred.sum()
  64. opt.backward(loss)
  65. opt.step()
  66. # reset net and opt
  67. net = Simple()
  68. opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
  69. check_func = check_class(net, **test_case)
  70. step = 0
  71. for i in range(iter_num):
  72. if update_lr and i == 1: # change learning rate
  73. for group in opt.param_groups:
  74. group["lr"] += 0.01
  75. check_func.lr += 0.01
  76. ori_params = TensorDict()
  77. for param in net.parameters():
  78. ori_params[param] = np.copy(param.numpy())
  79. train_func(np.random.random(data_shape).astype(np.float32), opt=opt)
  80. step += 1
  81. check_func(ori_params, net.parameters(), step)
  82. def test_sgd():
  83. class CheckValue:
  84. def __init__(self, net, **kwarg):
  85. self.slots = TensorDict()
  86. for param in net.parameters():
  87. self.slots[param] = np.zeros(param.shape).astype(np.float32)
  88. for k, v in kwarg.items():
  89. setattr(self, k, v)
  90. def __call__(self, ori_params, new_params, step):
  91. for param in new_params:
  92. grad = param.grad.numpy()
  93. if hasattr(self, "momentum"):
  94. self.slots[param] = grad + self.slots[param] * self.momentum
  95. delta = -self.lr * self.slots[param]
  96. else:
  97. delta = -self.lr * grad
  98. np.testing.assert_almost_equal(param.numpy(), ori_params[param] + delta)
  99. cases = [
  100. {"momentum": 0.9, "lr": 0.01}, # SGD with momentum
  101. {"lr": 0.01}, # simple SGD
  102. {"weight_decay": 0.1, "lr": 0.01}, # with weight_decay
  103. ]
  104. for case in cases:
  105. _test_optimizer("SGD", case, CheckValue)
  106. _test_optimizer("SGD", case, CheckValue, update_lr=True)
  107. def test_adam():
  108. class CheckValue:
  109. def __init__(self, net, **kwarg):
  110. self.m_slots = TensorDict()
  111. self.v_slots = TensorDict()
  112. for param in net.parameters():
  113. self.m_slots[param] = np.zeros(param.shape).astype(np.float32)
  114. self.v_slots[param] = np.zeros(param.shape).astype(np.float32)
  115. for k, v in kwarg.items():
  116. setattr(self, k, v)
  117. def __call__(self, ori_params, new_params, step):
  118. for param in new_params:
  119. grad = param.grad.numpy()
  120. m = self.m_slots[param]
  121. v = self.v_slots[param]
  122. m *= self.betas[0]
  123. m += (1 - self.betas[0]) * grad
  124. v *= self.betas[1]
  125. v += (1 - self.betas[1]) * grad * grad
  126. delta = (m / (1 - self.betas[0] ** step)) / (
  127. np.sqrt(v / (1 - self.betas[1] ** step)) + self.eps
  128. )
  129. np.testing.assert_almost_equal(
  130. param.numpy(), ori_params[param] - self.lr * delta
  131. )
  132. cases = [
  133. {"betas": (0.8, 0.9), "eps": 1e-04, "lr": 0.01},
  134. {
  135. "betas": (0.8, 0.9),
  136. "eps": 1e-04,
  137. "lr": 0.01,
  138. "weight_decay": 0.1,
  139. }, # with weight_decay
  140. ]
  141. for case in cases:
  142. _test_optimizer("Adam", case, CheckValue)
  143. _test_optimizer("Adam", case, CheckValue, update_lr=True)
  144. def test_adagrad():
  145. class CheckValue:
  146. def __init__(self, net, **kwarg):
  147. self.s_slots = TensorDict()
  148. for param in net.parameters():
  149. self.s_slots[param] = np.zeros(param.shape).astype(np.float32)
  150. for k, v in kwarg.items():
  151. setattr(self, k, v)
  152. def __call__(self, ori_params, new_params, step):
  153. for param in new_params:
  154. grad = param.grad.numpy()
  155. self.s_slots[param] += grad ** 2
  156. delta = grad / (self.s_slots[param] + self.eps) ** 0.5
  157. delta *= -(self.lr / (1 + (step - 1) * self.lr_decay))
  158. np.testing.assert_almost_equal(param.numpy(), ori_params[param] + delta)
  159. cases = [
  160. {"lr": 0.01, "eps": 1e-06, "lr_decay": 0.01},
  161. {"lr": 0.01, "eps": 1e-06, "lr_decay": 0.0}, # without lr_decay
  162. {
  163. "lr": 0.01,
  164. "eps": 1e-06,
  165. "lr_decay": 0.01,
  166. "weight_decay": 0.1,
  167. }, # with weight_decay
  168. ]
  169. for case in cases:
  170. _test_optimizer("Adagrad", case, CheckValue)
  171. _test_optimizer("Adagrad", case, CheckValue, update_lr=True)
  172. def test_adadelta():
  173. class CheckValue:
  174. def __init__(self, net, **kwarg):
  175. self.s_slots = TensorDict()
  176. self.a_slots = TensorDict()
  177. for param in net.parameters():
  178. self.s_slots[param] = np.zeros(param.shape).astype(np.float32)
  179. self.a_slots[param] = np.zeros(param.shape).astype(np.float32)
  180. for k, v in kwarg.items():
  181. setattr(self, k, v)
  182. def __call__(self, ori_params, new_params, step):
  183. for param in new_params:
  184. grad = param.grad.numpy()
  185. self.s_slots[param] = self.s_slots[param] * self.rho + grad ** 2 * (
  186. 1 - self.rho
  187. )
  188. delta = (
  189. grad
  190. * ((self.a_slots[param] + self.eps) ** 0.5)
  191. / (self.s_slots[param] + self.eps) ** 0.5
  192. )
  193. self.a_slots[param] = self.a_slots[param] * self.rho + delta ** 2 * (
  194. 1 - self.rho
  195. )
  196. delta *= -self.lr
  197. np.testing.assert_almost_equal(param.numpy(), ori_params[param] + delta)
  198. cases = [
  199. {"lr": 1.0, "eps": 1e-06, "rho": 0.9},
  200. {"lr": 1.0, "eps": 1e-06, "rho": 0.9, "weight_decay": 0.9}, # with weight_decay
  201. ]
  202. for case in cases:
  203. _test_optimizer("Adadelta", case, CheckValue)
  204. _test_optimizer("Adadelta", case, CheckValue, update_lr=True)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台