|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- import numpy as np
- import pytest
-
- import megengine as mge
- from megengine.amp import GradScaler
- from megengine.autodiff import GradManager
- from megengine.jit import trace
-
-
- @pytest.mark.parametrize(
- "is_trace", [False, True],
- )
- def test_grad_scaler(is_trace):
- gm = GradManager()
- scaler = GradScaler()
-
- def f(idx, data, calc):
- x = mge.tensor(data, no_cache=True)
- y = mge.tensor(data, no_cache=True)
-
- if is_trace:
- calc = trace(calc)
-
- gm.attach([x, y])
- with gm:
- loss = calc(x, y)
- scaler.backward(gm, loss, unscale_grad=False)
- np.testing.assert_equal(x.grad.numpy(), 2 * scaler.scale_factor)
- scaler.unscale(filter(lambda t: t.grad is not None, gm.attached_tensors()))
- # scaler.unscale(gm.attached_tensors())
- np.testing.assert_equal(x.grad.numpy(), 2)
-
- def double_variables(x, y):
- z = x + 2 * y
- loss = 2 * z + 1
- return loss
-
- def single_variable(x, y):
- z = x + 1
- loss = 2 * z + 1
- return loss
-
- # need grad being unique storage or not inplace modifying grad
- def double_variables_with_same_grad(x, y):
- z = x + y
- loss = 2 * z + 1
- return loss
-
- for data in [np.random.random((1, 2, 3, 4)), 1.0]:
- for calc in [double_variables, single_variable, double_variables_with_same_grad]:
- for idx in range(3):
- f(idx, data, calc)
|