You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_clip_grad.py 2.4 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  2. #
  3. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4. #
  5. # Unless required by applicable law or agreed to in writing,
  6. # software distributed under the License is distributed on an
  7. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  8. import numpy as np
  9. import megengine as mge
  10. import megengine.autodiff as ad
  11. import megengine.functional as F
  12. import megengine.module as M
  13. import megengine.optimizer as optim
  14. class Net(M.Module):
  15. def __init__(self):
  16. super().__init__()
  17. self.conv1 = M.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
  18. self.bn1 = M.BatchNorm2d(64)
  19. self.avgpool = M.AvgPool2d(kernel_size=5, stride=5, padding=0)
  20. self.fc = M.Linear(64, 10)
  21. def forward(self, x):
  22. x = self.conv1(x)
  23. x = self.bn1(x)
  24. x = F.relu(x)
  25. x = self.avgpool(x)
  26. x = F.avg_pool2d(x, 22)
  27. x = F.flatten(x, 1)
  28. x = self.fc(x)
  29. return x
  30. def save_grad_value(net):
  31. for param in net.parameters():
  32. param.grad_backup = param.grad.numpy().copy()
  33. def test_clip_grad_norm():
  34. net = Net()
  35. x = mge.tensor(np.random.randn(10, 3, 224, 224))
  36. gm = ad.GradManager().attach(net.parameters())
  37. opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
  38. with gm:
  39. loss = net(x).sum()
  40. gm.backward(loss)
  41. save_grad_value(net)
  42. max_norm = 1.0
  43. original_norm = optim.clip_grad_norm(net.parameters(), max_norm=max_norm, ord=2)
  44. scale = max_norm / original_norm
  45. for param in net.parameters():
  46. np.testing.assert_almost_equal(param.grad.numpy(), param.grad_backup * scale)
  47. opt.step().clear_grad()
  48. def test_clip_grad_value():
  49. net = Net()
  50. x = np.random.randn(10, 3, 224, 224).astype("float32")
  51. gm = ad.GradManager().attach(net.parameters())
  52. opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
  53. with gm:
  54. y = net(mge.tensor(x))
  55. y = y.mean()
  56. gm.backward(y)
  57. save_grad_value(net)
  58. max_val = 5
  59. min_val = -2
  60. optim.clip_grad_value(net.parameters(), lower=min_val, upper=max_val)
  61. for param in net.parameters():
  62. np.testing.assert_almost_equal(
  63. param.grad.numpy(),
  64. np.maximum(np.minimum(param.grad_backup, max_val), min_val),
  65. )
  66. opt.step().clear_grad()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台