GitOrigin-RevId: 482ee62652
tags/v1.0.0-rc1
@@ -40,7 +40,7 @@ __all__ = [ | |||||
] | ] | ||||
@apply.add | |||||
@apply.register() | |||||
def _(op: RemoteSend, *args: Tensor): | def _(op: RemoteSend, *args: Tensor): | ||||
ret = tensor_apply(op, *args) | ret = tensor_apply(op, *args) | ||||
@@ -133,7 +133,7 @@ def update_model(model_path): | |||||
data = Tensor(checkpoint["data"], dtype=np.float32) | data = Tensor(checkpoint["data"], dtype=np.float32) | ||||
label = Tensor(checkpoint["label"], dtype=np.int32) | label = Tensor(checkpoint["label"], dtype=np.int32) | ||||
opt.zero_grad() | |||||
opt.clear_grad() | |||||
loss = train(data, label, net=net, opt=opt) | loss = train(data, label, net=net, opt=opt) | ||||
opt.step() | opt.step() | ||||
@@ -73,17 +73,18 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): | |||||
for symbolic in (False, True): | for symbolic in (False, True): | ||||
@trace(symbolic=symbolic) | @trace(symbolic=symbolic) | ||||
def train_func(data, *, opt=None): | |||||
opt.zero_grad() | |||||
with opt.record(): | |||||
def train_func(data, *, opt=None, gm=None): | |||||
opt.clear_grad() | |||||
with gm.record(): | |||||
pred = net(data) | pred = net(data) | ||||
loss = pred.sum() | loss = pred.sum() | ||||
opt.backward(loss) | |||||
gm.backward(loss) | |||||
opt.step() | opt.step() | ||||
# reset net and opt | # reset net and opt | ||||
net = Simple() | net = Simple() | ||||
opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) | opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) | ||||
gm = ad.GradManager().register(net.parameters()) | |||||
check_func = check_class(net, **test_case) | check_func = check_class(net, **test_case) | ||||
step = 0 | step = 0 | ||||
for i in range(iter_num): | for i in range(iter_num): | ||||
@@ -96,7 +97,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): | |||||
for param in net.parameters(): | for param in net.parameters(): | ||||
ori_params[param] = np.copy(param.numpy()) | ori_params[param] = np.copy(param.numpy()) | ||||
train_func(np.random.random(data_shape).astype(np.float32), opt=opt) | |||||
train_func(np.random.random(data_shape).astype(np.float32), opt=opt, gm=gm) | |||||
step += 1 | step += 1 | ||||
check_func(ori_params, net.parameters(), step) | check_func(ori_params, net.parameters(), step) | ||||
@@ -67,23 +67,24 @@ def test_sgd_momentum_trace(): | |||||
for symbolic in (True, False): | for symbolic in (True, False): | ||||
@trace(symbolic=symbolic) | @trace(symbolic=symbolic) | ||||
def train_func(data, *, model=None, optim=None): | |||||
optim.zero_grad() | |||||
with optim.record(): | |||||
def train_func(data, *, model=None, optim=None, gm=None): | |||||
optim.clear_grad() | |||||
with gm.record(): | |||||
loss = net(data) | loss = net(data) | ||||
optim.backward(loss) | |||||
gm.backward(loss) | |||||
optim.step() | optim.step() | ||||
return loss | return loss | ||||
@trace(symbolic=symbolic) | @trace(symbolic=symbolic) | ||||
def eval_func(data, *, model=None, optim=None): | |||||
def eval_func(data, *, model=None, optim=None, gm=None): | |||||
loss = net(data) | loss = net(data) | ||||
return loss | return loss | ||||
net = Simple() | net = Simple() | ||||
optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) | optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) | ||||
gm = ad.GradManager().register(net.parameters()) | |||||
data = tensor([2.34]) | data = tensor([2.34]) | ||||
train_func(data, model=net, optim=optim) | |||||
train_func(data, model=net, optim=optim, gm=gm) | |||||
np.testing.assert_almost_equal( | np.testing.assert_almost_equal( | ||||
optim._state[net.a]["momentum_buffer"].numpy(), 2.34 | optim._state[net.a]["momentum_buffer"].numpy(), 2.34 | ||||
) | ) | ||||
@@ -97,7 +98,7 @@ def test_sgd_momentum_trace(): | |||||
) | ) | ||||
# do a step of train | # do a step of train | ||||
train_func(data, model=net, optim=optim) | |||||
train_func(data, model=net, optim=optim, gm=gm) | |||||
np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) | np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) | ||||
np.testing.assert_almost_equal( | np.testing.assert_almost_equal( | ||||
optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34 | optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34 | ||||
@@ -17,6 +17,7 @@ import megengine.functional as F | |||||
import megengine.module as M | import megengine.module as M | ||||
import megengine.optimizer as optim | import megengine.optimizer as optim | ||||
from megengine import tensor | from megengine import tensor | ||||
from megengine.autodiff import GradManager | |||||
from megengine.jit import trace | from megengine.jit import trace | ||||
@@ -61,17 +62,18 @@ class XORNet(M.Module): | |||||
def test_xornet_trace_dump(): | def test_xornet_trace_dump(): | ||||
net = XORNet() | net = XORNet() | ||||
opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9) | opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9) | ||||
gm = GradManager().register(net.parameters(requires_grad=True)) | |||||
batch_size = 64 | batch_size = 64 | ||||
train_dataset = minibatch_generator(batch_size) | train_dataset = minibatch_generator(batch_size) | ||||
val_dataset = minibatch_generator(batch_size) | val_dataset = minibatch_generator(batch_size) | ||||
@trace | @trace | ||||
def train_fun(data, label): | def train_fun(data, label): | ||||
with opt.record(): | |||||
with gm.record(): | |||||
net.train() | net.train() | ||||
pred = net(data) | pred = net(data) | ||||
loss = F.cross_entropy_with_softmax(pred, label) | loss = F.cross_entropy_with_softmax(pred, label) | ||||
opt.backward(loss) | |||||
gm.backward(loss) | |||||
return pred, loss | return pred, loss | ||||
@trace | @trace | ||||
@@ -95,7 +97,7 @@ def test_xornet_trace_dump(): | |||||
break | break | ||||
data = tensor(minibatch["data"]) | data = tensor(minibatch["data"]) | ||||
label = tensor(minibatch["label"]) | label = tensor(minibatch["label"]) | ||||
opt.zero_grad() | |||||
opt.clear_grad() | |||||
_, loss = train_fun(data, label) | _, loss = train_fun(data, label) | ||||
train_loss.append((step, loss.numpy())) | train_loss.append((step, loss.numpy())) | ||||
if step % 50 == 0: | if step % 50 == 0: | ||||