You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_param_pack.py 1.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # -*- coding: utf-8 -*-
  2. import platform
  3. import numpy as np
  4. import pytest
  5. import megengine
  6. import megengine.autodiff as ad
  7. import megengine.distributed as dist
  8. import megengine.optimizer as optimizer
  9. from megengine import Parameter, tensor
  10. from megengine.module import Module
  11. from megengine.optimizer import SGD
  12. class Simple(Module):
  13. def __init__(self, param_shape):
  14. super().__init__()
  15. self.params = [
  16. Parameter(np.ones(param_shape), dtype=np.float32) for i in range(10)
  17. ]
  18. def forward(self, x):
  19. for p in self.params:
  20. x = x * p
  21. return x
  22. @pytest.mark.require_ngpu(2)
  23. @pytest.mark.isolated_distributed
  24. @pytest.mark.parametrize(
  25. "threshold", [0, 128, None], ids=["no_pack", "small_pack", "large_pack"]
  26. )
  27. @pytest.mark.parametrize("param_shape", [(16,), (128, 256), (2, 1024, 1024)])
  28. def test_param_pack(param_shape, threshold, n_iters=100):
  29. data = np.ones(param_shape, dtype="float32")
  30. @dist.launcher(n_gpus=2)
  31. def worker():
  32. net = Simple(param_shape)
  33. opt = SGD(net.parameters(), lr=0.1)
  34. allreduce_cb = dist.make_allreduce_cb("MEAN", dist.WORLD)
  35. if threshold is not None:
  36. allreduce_cb._param_pack_thd = threshold
  37. gm = ad.GradManager().attach(net.parameters(), callbacks=[allreduce_cb])
  38. def run():
  39. opt.clear_grad()
  40. with gm:
  41. x = tensor(data)
  42. loss = net(x)
  43. loss = loss.sum()
  44. gm.backward(loss)
  45. for i in range(n_iters):
  46. run()
  47. for p in net.params:
  48. np.testing.assert_equal(p.grad.numpy(), np.ones_like(p.grad.numpy()))
  49. worker()