Browse Source

fix(mge/imperative): update tests to new optimizer api

GitOrigin-RevId: 3d06e3db3c
tags/v1.0.0-rc1
Megvii Engine Team 4 years ago
parent
commit
e283663a02
19 changed files with 127 additions and 116 deletions
  1. +1
    -0
      imperative/python/megengine/autodiff/grad_manager.py
  2. +7
    -0
      imperative/python/megengine/core/autodiff/grad.py
  3. +1
    -1
      imperative/python/megengine/distributed/functional.py
  4. +2
    -13
      imperative/python/megengine/optimizer/adadelta.py
  5. +2
    -12
      imperative/python/megengine/optimizer/adagrad.py
  6. +2
    -13
      imperative/python/megengine/optimizer/adam.py
  7. +2
    -2
      imperative/python/megengine/optimizer/sgd.py
  8. +9
    -6
      imperative/python/test/integration/test_advance_indexing.py
  9. +5
    -3
      imperative/python/test/integration/test_ai.py
  10. +13
    -9
      imperative/python/test/integration/test_bn.py
  11. +5
    -3
      imperative/python/test/integration/test_converge.py
  12. +10
    -7
      imperative/python/test/integration/test_correctness.py
  13. +5
    -3
      imperative/python/test/integration/test_detach.py
  14. +19
    -12
      imperative/python/test/integration/test_dp_correctness.py
  15. +5
    -3
      imperative/python/test/integration/test_hello_world.py
  16. +5
    -3
      imperative/python/test/integration/test_optimizer.py
  17. +7
    -5
      imperative/python/test/integration/test_save_load.py
  18. +8
    -6
      imperative/python/test/integration/test_sgd_momentum.py
  19. +19
    -15
      imperative/python/test/unit/test_function.py

+ 1
- 0
imperative/python/megengine/autodiff/grad_manager.py View File

@@ -29,6 +29,7 @@ class GradManager:


def register_after_backward_callback(self, callback): def register_after_backward_callback(self, callback):
self._after_backward_callback.append(callback) self._after_backward_callback.append(callback)
return self


def backward(self, ys, dys=None): def backward(self, ys, dys=None):
global backwarding_grad_manager global backwarding_grad_manager


+ 7
- 0
imperative/python/megengine/core/autodiff/grad.py View File

@@ -177,6 +177,13 @@ class Grad:
dys = aslist(dys) dys = aslist(dys)
assert len(ys) == len(dys) assert len(ys) == len(dys)


ids = [i for i, y in enumerate(ys) if self in y._extra_data.keys()]
if len(ids) == 0:
return

ys = [y for i, y in enumerate(ys) if i in ids]
dys = [dy for i, dy in enumerate(dys) if i in ids]

# ys is changed to a list of VariableNode which contains more information # ys is changed to a list of VariableNode which contains more information
# such as OpNode, callback, etc. # such as OpNode, callback, etc.
ys = [i._extra_data[self].node for i in ys] ys = [i._extra_data[self].node for i in ys]


+ 1
- 1
imperative/python/megengine/distributed/functional.py View File

@@ -20,8 +20,8 @@ from ..core.autodiff.grad import (
from ..core.ops.builtin import CollectiveComm, Copy, RemoteRecv, RemoteSend from ..core.ops.builtin import CollectiveComm, Copy, RemoteRecv, RemoteSend
from ..core.tensor.core import apply from ..core.tensor.core import apply
from ..core.tensor.tensor import Tensor, tensor_apply from ..core.tensor.tensor import Tensor, tensor_apply
from ..tensor import tensor
from ..device import get_default_device from ..device import get_default_device
from ..tensor import tensor
from .group import WORLD, Group, get_backend, get_client, get_mm_server_addr, get_rank from .group import WORLD, Group, get_backend, get_client, get_mm_server_addr, get_rank


__all__ = [ __all__ = [


+ 2
- 13
imperative/python/megengine/optimizer/adadelta.py View File

@@ -11,7 +11,7 @@ from typing import Iterable, Union
import numpy as np import numpy as np


from ..functional import sqrt from ..functional import sqrt
from ..tensor_nn import Buffer, Parameter
from ..tensor_nn import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -63,16 +63,7 @@ class Adadelta(Optimizer):


for param in param_group["params"]: for param in param_group["params"]:


if param.__wrapped__ in self._grad_skip:
self._grad_skip.remove(param.__wrapped__)
continue

if not isinstance(param.grad, Buffer):
raise TypeError(
"grad must be a Buffer, maybe you forget to call backward()?"
)

if not param.requires_grad:
if not param.requires_grad or "grad" not in param.__dict__:
continue continue


states = self._state[param] states = self._state[param]
@@ -91,5 +82,3 @@ class Adadelta(Optimizer):
acc_delta = rho * acc_delta + (1 - rho) * delta ** 2 acc_delta = rho * acc_delta + (1 - rho) * delta ** 2
states["square_avg"]._reset(square_avg) states["square_avg"]._reset(square_avg)
states["acc_delta"]._reset(acc_delta) states["acc_delta"]._reset(acc_delta)

assert len(self._grad_skip) == 0

+ 2
- 12
imperative/python/megengine/optimizer/adagrad.py View File

@@ -11,7 +11,7 @@ from typing import Iterable, Union
import numpy as np import numpy as np


from ..functional import sqrt from ..functional import sqrt
from ..tensor_nn import Buffer, Parameter
from ..tensor_nn import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -62,16 +62,7 @@ class Adagrad(Optimizer):


for param in param_group["params"]: for param in param_group["params"]:


if param.__wrapped__ in self._grad_skip:
self._grad_skip.remove(param.__wrapped__)
continue

if not isinstance(param.grad, Buffer):
raise TypeError(
"grad must be a Buffer, maybe you forget to call backward()?"
)

if not param.requires_grad:
if not param.requires_grad or "grad" not in param.__dict__:
continue continue


states = self._state[param] states = self._state[param]
@@ -87,4 +78,3 @@ class Adagrad(Optimizer):
clr = lr / (1 + (step - 1) * lr_decay) clr = lr / (1 + (step - 1) * lr_decay)


param -= clr * delta param -= clr * delta
assert len(self._grad_skip) == 0

+ 2
- 13
imperative/python/megengine/optimizer/adam.py View File

@@ -8,7 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from typing import Iterable, Tuple, Union from typing import Iterable, Tuple, Union


from ..tensor_nn import Buffer, Parameter
from ..tensor_nn import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -59,18 +59,9 @@ class Adam(Optimizer):


for param in param_group["params"]: for param in param_group["params"]:


if param.__wrapped__ in self._grad_skip:
self._grad_skip.remove(param.__wrapped__)
if not param.requires_grad or "grad" not in param.__dict__:
continue continue


if not param.requires_grad:
continue

if not isinstance(param.grad, Buffer):
raise TypeError(
"grad must be a Buffer, maybe you forget to call backward()?"
)

grad = param.grad grad = param.grad
if weight_decay != 0.0: if weight_decay != 0.0:
grad += param * weight_decay grad += param * weight_decay
@@ -91,5 +82,3 @@ class Adam(Optimizer):
# not inplace change, need to update underlying tensor handler in state # not inplace change, need to update underlying tensor handler in state
states["exp_avg"]._reset(exp_avg) states["exp_avg"]._reset(exp_avg)
states["exp_avg_sq"]._reset(exp_avg_sq) states["exp_avg_sq"]._reset(exp_avg_sq)

assert len(self._grad_skip) == 0

+ 2
- 2
imperative/python/megengine/optimizer/sgd.py View File

@@ -8,7 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from typing import Iterable, Union from typing import Iterable, Union


from ..tensor_nn import Buffer, Parameter
from ..tensor_nn import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -52,7 +52,7 @@ class SGD(Optimizer):
momentum = param_group["momentum"] momentum = param_group["momentum"]


for param in param_group["params"]: for param in param_group["params"]:
if not param.requires_grad:
if not param.requires_grad or "grad" not in param.__dict__:
continue continue


grad = param.grad grad = param.grad


+ 9
- 6
imperative/python/test/integration/test_advance_indexing.py View File

@@ -9,6 +9,7 @@
import numpy as np import numpy as np


import megengine import megengine
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.module import Module from megengine.module import Module
@@ -37,8 +38,9 @@ class Simple2(Module):
def test_advance_indexing(): def test_advance_indexing():
net = Simple() net = Simple()


gm = ad.GradManager().register(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()


dshape = (10, 10) dshape = (10, 10)
raw_data = np.arange(100).reshape(dshape).astype(np.float32) raw_data = np.arange(100).reshape(dshape).astype(np.float32)
@@ -46,9 +48,9 @@ def test_advance_indexing():
data = tensor(raw_data) data = tensor(raw_data)
mask = tensor(raw_mask) mask = tensor(raw_mask)
answer = 1.0 - raw_data[raw_mask].sum() answer = 1.0 - raw_data[raw_mask].sum()
with optim.record():
with gm.record():
loss = net(data, mask).sum() loss = net(data, mask).sum()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()
np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32)) np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32))


@@ -56,15 +58,16 @@ def test_advance_indexing():
def test_advance_indexing_with_subtensor(): def test_advance_indexing_with_subtensor():
net = Simple2() net = Simple2()


gm = ad.GradManager().register(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()


dshape = (2, 3, 4, 3, 4, 2) dshape = (2, 3, 4, 3, 4, 2)
raw_data = np.arange(576).reshape(dshape).astype(np.float32) raw_data = np.arange(576).reshape(dshape).astype(np.float32)
data = tensor(raw_data) data = tensor(raw_data)
answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum() answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum()
with optim.record():
with gm.record():
loss = net(data).sum() loss = net(data).sum()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()
np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32)) np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32))

+ 5
- 3
imperative/python/test/integration/test_ai.py View File

@@ -9,6 +9,7 @@
import numpy as np import numpy as np


import megengine import megengine
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.module import Module from megengine.module import Module
@@ -27,14 +28,15 @@ class Simple(Module):
def test_ai(): def test_ai():
net = Simple() net = Simple()


gm = ad.GradManager().register(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()


dshape = (10, 10) dshape = (10, 10)
data = tensor(np.ones(dshape).astype(np.float32)) data = tensor(np.ones(dshape).astype(np.float32))
with optim.record():
with gm.record():
loss = net(data).sum() loss = net(data).sum()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
net.a.numpy(), np.array([1.0 - dshape[0]]).astype(np.float32) net.a.numpy(), np.array([1.0 - dshape[0]]).astype(np.float32)


+ 13
- 9
imperative/python/test/integration/test_bn.py View File

@@ -10,6 +10,7 @@ import numpy as np
import pytest import pytest


import megengine import megengine
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.module import BatchNorm2d from megengine.module import BatchNorm2d
@@ -24,13 +25,14 @@ def test_frozen_bn():
saved_wt = m.weight.numpy() saved_wt = m.weight.numpy()
saved_bias = m.bias.numpy() saved_bias = m.bias.numpy()


gm = ad.GradManager().register(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0) optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()


data = np.random.random((6, nchannel, 2, 2)).astype("float32") data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with optim.record():
with gm.record():
loss = m(data).mean() loss = m(data).mean()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()


np.testing.assert_equal(m.running_var.numpy(), saved_var) np.testing.assert_equal(m.running_var.numpy(), saved_var)
@@ -44,13 +46,14 @@ def test_bn_no_track_stat():
nchannel = 3 nchannel = 3
m = BatchNorm2d(nchannel, track_running_stats=False) m = BatchNorm2d(nchannel, track_running_stats=False)


gm = ad.GradManager().register(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0) optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()


data = np.random.random((6, nchannel, 2, 2)).astype("float32") data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with optim.record():
with gm.record():
loss = m(data).sum() loss = m(data).sum()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()




@@ -65,13 +68,14 @@ def test_bn_no_track_stat2():
saved_mean = m.running_mean.numpy() saved_mean = m.running_mean.numpy()
assert saved_mean is not None assert saved_mean is not None


gm = ad.GradManager().register(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0) optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()


data = np.random.random((6, nchannel, 2, 2)).astype("float32") data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with optim.record():
with gm.record():
loss = m(data).sum() loss = m(data).sum()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()


np.testing.assert_equal(m.running_var.numpy(), saved_var) np.testing.assert_equal(m.running_var.numpy(), saved_var)


+ 5
- 3
imperative/python/test/integration/test_converge.py View File

@@ -12,6 +12,7 @@ import numpy as np
import pytest import pytest


import megengine as mge import megengine as mge
import megengine.autodiff as ad
import megengine.functional as F import megengine.functional as F
from megengine import Tensor from megengine import Tensor
from megengine.module import Linear, Module from megengine.module import Linear, Module
@@ -76,12 +77,13 @@ def test_training_converge():
opt = SGD( opt = SGD(
net.parameters(requires_grad=True), lr=0.01, momentum=0.9, weight_decay=5e-4 net.parameters(requires_grad=True), lr=0.01, momentum=0.9, weight_decay=5e-4
) )
gm = ad.GradManager().register(net.parameters())


def train(data, label): def train(data, label):
with opt.record():
with gm.record():
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)
opt.backward(loss)
gm.backward(loss)
return loss return loss


def infer(data): def infer(data):
@@ -93,7 +95,7 @@ def test_training_converge():
for data, label in itertools.islice(train_dataset, 2000): for data, label in itertools.islice(train_dataset, 2000):
data = Tensor(data, dtype=np.float32) data = Tensor(data, dtype=np.float32)
label = Tensor(label, dtype=np.int32) label = Tensor(label, dtype=np.int32)
opt.zero_grad()
opt.clear_grad()
loss = train(data, label) loss = train(data, label)
opt.step() opt.step()
losses.append(loss.numpy()) losses.append(loss.numpy())


+ 10
- 7
imperative/python/test/integration/test_correctness.py View File

@@ -15,6 +15,7 @@ import numpy as np
import pytest import pytest


import megengine as mge import megengine as mge
import megengine.autodiff as ad
import megengine.functional as F import megengine.functional as F
from megengine import jit from megengine import jit
from megengine.core._trace_option import set_tensor_shape from megengine.core._trace_option import set_tensor_shape
@@ -89,11 +90,11 @@ class MnistNet(Module):
return x return x




def train(data, label, net, opt):
with opt.record():
def train(data, label, net, opt, gm):
with gm.record():
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)
opt.backward(loss)
gm.backward(loss)
return loss return loss




@@ -116,12 +117,13 @@ def update_model(model_path):
net.load_state_dict(checkpoint["net_init"]) net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager().register(net.parameters())


data = Tensor(checkpoint["data"], dtype=np.float32) data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32) label = Tensor(checkpoint["label"], dtype=np.int32)


opt.zero_grad()
loss = train(data, label, net=net, opt=opt)
opt.clear_grad()
loss = train(data, label, net, opt, gm)
opt.step() opt.step()


xpu_name = get_xpu_name() xpu_name = get_xpu_name()
@@ -150,6 +152,7 @@ def run_train(
net.load_state_dict(checkpoint["net_init"]) net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager().register(net.parameters())


data = Tensor(checkpoint["data"], dtype=np.float32) data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32) label = Tensor(checkpoint["label"], dtype=np.int32)
@@ -165,8 +168,8 @@ def run_train(
sublinear_memory_config=sublinear_memory_config, sublinear_memory_config=sublinear_memory_config,
) )


opt.zero_grad()
loss = train_func(data, label, net=net, opt=opt)
opt.clear_grad()
loss = train_func(data, label, net, opt, gm)
opt.step() opt.step()


assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err) assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err)


+ 5
- 3
imperative/python/test/integration/test_detach.py View File

@@ -9,6 +9,7 @@
import numpy as np import numpy as np


import megengine import megengine
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.module import Module from megengine.module import Module
@@ -30,13 +31,14 @@ def test_detach():
net = Simple() net = Simple()


optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())


dshape = (10, 10) dshape = (10, 10)
data = tensor(np.ones(dshape).astype(np.float32)) data = tensor(np.ones(dshape).astype(np.float32))
with optim.record():
with gm.record():
loss = net(data).sum() loss = net(data).sum()
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()
np.testing.assert_equal(net.a.numpy(), np.array([1.0]).astype(np.float32)) np.testing.assert_equal(net.a.numpy(), np.array([1.0]).astype(np.float32))
np.testing.assert_equal( np.testing.assert_equal(


+ 19
- 12
imperative/python/test/integration/test_dp_correctness.py View File

@@ -18,6 +18,7 @@ import numpy as np
import pytest import pytest


import megengine as mge import megengine as mge
import megengine.autodiff as ad
import megengine.distributed as dist import megengine.distributed as dist
import megengine.functional as F import megengine.functional as F
from megengine.device import get_default_device, set_default_device from megengine.device import get_default_device, set_default_device
@@ -94,11 +95,13 @@ class MnistNet(Module):
return x return x




def train(data, label, net, opt):
with opt.record():
def train(data, label, net, opt, gm):
opt.clear_grad()
with gm.record():
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)
opt.backward(loss)
gm.backward(loss)
opt.step()
return loss return loss




@@ -111,7 +114,7 @@ def update_model(model_path):


.. code-block:: python .. code-block:: python


from test_correctness import update_model
from test_dp_correctness import update_model
update_model('mnist_model_with_test.mge') # for gpu update_model('mnist_model_with_test.mge') # for gpu
update_model('mnist_model_with_test_cpu.mge') # for cpu update_model('mnist_model_with_test_cpu.mge') # for cpu


@@ -122,6 +125,11 @@ def update_model(model_path):
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)


gm = ad.GradManager()
gm.register(
net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
)

data = Tensor(checkpoint["data"], dtype=np.float32) data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32) label = Tensor(checkpoint["label"], dtype=np.int32)


@@ -158,24 +166,23 @@ def run_test(


def worker(rank, max_err): def worker(rank, max_err):
dist.init_process_group("localhost", port, p_num, rank, rank) dist.init_process_group("localhost", port, p_num, rank, rank)
set_default_device(device="gpu{}".format(dist.get_rank()))
net = MnistNet(has_bn=True) net = MnistNet(has_bn=True)
net.load_state_dict(checkpoint["net_init"]) net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), reduce_method="mean", lr=lr)
opt = SGD(net.parameters(), lr=lr)

gm = ad.GradManager()
gm.register(
net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
)


# use same data and label for all gpu's # use same data and label for all gpu's
# such that the result does not depend on number of gpu # such that the result does not depend on number of gpu
data_train = Tensor(data) data_train = Tensor(data)
label_train = Tensor(label) label_train = Tensor(label)


train_func = train

opt.zero_grad()
loss = train_func(data_train, label_train, net=net, opt=opt)
opt.step()
loss = train(data_train, label_train, net, opt, gm)


print("{} loss {}".format(get_default_device(), loss.numpy()[0]))
assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err) assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err)


if dist.get_rank(): if dist.get_rank():


+ 5
- 3
imperative/python/test/integration/test_hello_world.py View File

@@ -12,6 +12,7 @@ import numpy as np
import pytest import pytest


import megengine import megengine
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.module import Module from megengine.module import Module
@@ -31,12 +32,13 @@ def test_hello_world():
net = Simple() net = Simple()


optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())


data = tensor([2.34]) data = tensor([2.34])
with optim.record():
with gm.record():
loss = net(data) loss = net(data)
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
net.a.numpy(), np.array([1.23 - 2.34]).astype(np.float32) net.a.numpy(), np.array([1.23 - 2.34]).astype(np.float32)


+ 5
- 3
imperative/python/test/integration/test_optimizer.py View File

@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np import numpy as np


import megengine.autodiff as ad
import megengine.functional as F import megengine.functional as F
from megengine import Parameter, optimizer from megengine import Parameter, optimizer
from megengine.jit import trace from megengine.jit import trace
@@ -43,6 +44,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
net = Simple() net = Simple()
opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
check_func = check_class(net, **test_case) check_func = check_class(net, **test_case)
gm = ad.GradManager().register(net.parameters())


step = 0 step = 0
data_shape = (2, 28) data_shape = (2, 28)
@@ -54,11 +56,11 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
check_func.lr += 0.01 check_func.lr += 0.01
data = tensor(np.random.random(data_shape).astype(np.float32)) data = tensor(np.random.random(data_shape).astype(np.float32))


opt.zero_grad()
with opt.record():
opt.clear_grad()
with gm.record():
pred = net(data) pred = net(data)
loss = pred.sum() loss = pred.sum()
opt.backward(loss)
gm.backward(loss)


ori_params = TensorDict() ori_params = TensorDict()
for param in net.parameters(): for param in net.parameters():


+ 7
- 5
imperative/python/test/integration/test_save_load.py View File

@@ -1,6 +1,7 @@
import numpy as np import numpy as np


import megengine as mge import megengine as mge
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.core.tensor.raw_tensor import RawTensor from megengine.core.tensor.raw_tensor import RawTensor
@@ -21,13 +22,14 @@ def test_save_load():
net = Simple() net = Simple()


optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
optim.zero_grad()
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())


data = tensor([2.34]) data = tensor([2.34])


with optim.record():
with gm.record():
loss = net(data) loss = net(data)
optim.backward(loss)
gm.backward(loss)


optim.step() optim.step()


@@ -53,9 +55,9 @@ def test_save_load():
optim.load_state_dict(checkpoint["opt_state"]) optim.load_state_dict(checkpoint["opt_state"])
print("load done") print("load done")


with optim.record():
with gm.record():
loss = net([1.23]) loss = net([1.23])
optim.backward(loss)
gm.backward(loss)


optim.step() optim.step()
# Restore device # Restore device


+ 8
- 6
imperative/python/test/integration/test_sgd_momentum.py View File

@@ -9,6 +9,7 @@
import numpy as np import numpy as np


import megengine import megengine
import megengine.autodiff as ad
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter, tensor from megengine import Parameter, tensor
from megengine.jit import trace from megengine.jit import trace
@@ -29,14 +30,15 @@ def test_sgd_momentum():
net = Simple() net = Simple()


optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
optim.zero_grad()
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())


data = tensor([2.34]) data = tensor([2.34])


# do a step of train # do a step of train
with optim.record():
with gm.record():
loss = net(data) loss = net(data)
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()


np.testing.assert_almost_equal(optim._state[net.a]["momentum_buffer"].numpy(), 2.34) np.testing.assert_almost_equal(optim._state[net.a]["momentum_buffer"].numpy(), 2.34)
@@ -48,10 +50,10 @@ def test_sgd_momentum():
np.testing.assert_almost_equal(optim._state[net.a]["momentum_buffer"].numpy(), 2.34) np.testing.assert_almost_equal(optim._state[net.a]["momentum_buffer"].numpy(), 2.34)


# do a step of train # do a step of train
optim.zero_grad()
with optim.record():
optim.clear_grad()
with gm.record():
loss = net(data) loss = net(data)
optim.backward(loss)
gm.backward(loss)
optim.step() optim.step()


np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)


+ 19
- 15
imperative/python/test/unit/test_function.py View File

@@ -9,6 +9,7 @@ import copy


import numpy as np import numpy as np


import megengine.autodiff as ad
import megengine.functional as F import megengine.functional as F
import megengine.optimizer as optimizer import megengine.optimizer as optimizer
from megengine import Parameter from megengine import Parameter
@@ -41,13 +42,14 @@ def test_single_input():
return x return x


net = Simple(av) net = Simple(av)
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
gm = ad.GradManager().register(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0)


with optim.record():
opt.clear_grad()
with gm.record():
loss = net() loss = net()
optim.backward(loss.sum())
optim.step()
gm.backward(loss.sum())
opt.step()


np.testing.assert_almost_equal(loss.numpy(), (av * 10)) np.testing.assert_almost_equal(loss.numpy(), (av * 10))
np.testing.assert_almost_equal(net.a.numpy(), (av - 10)) np.testing.assert_almost_equal(net.a.numpy(), (av - 10))
@@ -79,13 +81,14 @@ def test_multi_input():
return x return x


net = Simple(av, bv) net = Simple(av, bv)
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
gm = ad.GradManager().register(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0)


with optim.record():
opt.clear_grad()
with gm.record():
loss = net() loss = net()
optim.backward(loss.sum())
optim.step()
gm.backward(loss.sum())
opt.step()


np.testing.assert_almost_equal(loss.numpy(), (av * bv)) np.testing.assert_almost_equal(loss.numpy(), (av * bv))
np.testing.assert_almost_equal(net.a.numpy(), (av - 2 * bv)) np.testing.assert_almost_equal(net.a.numpy(), (av - 2 * bv))
@@ -118,13 +121,14 @@ def test_multi_output():
return x + y return x + y


net = Simple(av, bv) net = Simple(av, bv)
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.zero_grad()
gm = ad.GradManager().register(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0)


with optim.record():
opt.clear_grad()
with gm.record():
loss = net() loss = net()
optim.backward(loss.sum())
optim.step()
gm.backward(loss.sum())
opt.step()


np.testing.assert_almost_equal(loss.numpy(), (av * bv + av + bv), decimal=6) np.testing.assert_almost_equal(loss.numpy(), (av * bv + av + bv), decimal=6)
np.testing.assert_almost_equal(net.a.numpy(), (av - bv - 1), decimal=6) np.testing.assert_almost_equal(net.a.numpy(), (av - bv - 1), decimal=6)


Loading…
Cancel
Save