Browse Source

refactor(mge): polish api

- refactor(mge): add support for optimizer.step().clear_grad() idiom

- refactor(mge): rename some methods of GradManager

- refactor(mge): remove tensor_nn and TensorDict

- refactor(mge): remove Buffer

- refactor(mge): remove requires_grad flag

- refactor(mge): add a default grad=None attribute to Tensor

- refactor(mge): deprecation for 1.0

GitOrigin-RevId: 3b723d9387
tags/v1.0.0-rc1
Megvii Engine Team 4 years ago
parent
commit
10e942d952
55 changed files with 246 additions and 471 deletions
  1. +1
    -2
      imperative/python/megengine/__init__.py
  2. +14
    -8
      imperative/python/megengine/autodiff/grad_manager.py
  3. +1
    -1
      imperative/python/megengine/core/ops/_internal/misc_ops.py
  4. +2
    -2
      imperative/python/megengine/core/tensor/megbrain_graph.py
  5. +4
    -2
      imperative/python/megengine/core/tensor/tensor_wrapper.py
  6. +1
    -1
      imperative/python/megengine/core/tensor/utils.py
  7. +2
    -2
      imperative/python/megengine/functional/math.py
  8. +1
    -1
      imperative/python/megengine/functional/types.py
  9. +4
    -4
      imperative/python/megengine/jit/tracing.py
  10. +0
    -1
      imperative/python/megengine/module/__init__.py
  11. +1
    -1
      imperative/python/megengine/module/activation.py
  12. +3
    -3
      imperative/python/megengine/module/batchnorm.py
  13. +1
    -1
      imperative/python/megengine/module/conv.py
  14. +9
    -3
      imperative/python/megengine/module/embedding.py
  15. +3
    -3
      imperative/python/megengine/module/init.py
  16. +1
    -1
      imperative/python/megengine/module/linear.py
  17. +31
    -31
      imperative/python/megengine/module/module.py
  18. +0
    -156
      imperative/python/megengine/module/parampack.py
  19. +1
    -1
      imperative/python/megengine/module/quantized/conv.py
  20. +1
    -1
      imperative/python/megengine/module/quantized/conv_bn.py
  21. +1
    -1
      imperative/python/megengine/module/quantized/linear.py
  22. +2
    -2
      imperative/python/megengine/optimizer/adadelta.py
  23. +2
    -2
      imperative/python/megengine/optimizer/adagrad.py
  24. +2
    -2
      imperative/python/megengine/optimizer/adam.py
  25. +11
    -17
      imperative/python/megengine/optimizer/optimizer.py
  26. +2
    -2
      imperative/python/megengine/optimizer/sgd.py
  27. +1
    -2
      imperative/python/megengine/quantization/fake_quant.py
  28. +14
    -14
      imperative/python/megengine/quantization/observer.py
  29. +22
    -51
      imperative/python/megengine/tensor.py
  30. +0
    -20
      imperative/python/megengine/tensor_nn.py
  31. +1
    -0
      imperative/python/megengine/utils/deprecation.py
  32. +1
    -1
      imperative/python/megengine/utils/types.py
  33. +1
    -0
      imperative/python/requires.txt
  34. +4
    -4
      imperative/python/test/integration/test_advance_indexing.py
  35. +2
    -2
      imperative/python/test/integration/test_ai.py
  36. +6
    -6
      imperative/python/test/integration/test_bn.py
  37. +3
    -5
      imperative/python/test/integration/test_converge.py
  38. +3
    -3
      imperative/python/test/integration/test_correctness.py
  39. +2
    -2
      imperative/python/test/integration/test_detach.py
  40. +3
    -5
      imperative/python/test/integration/test_dp_correctness.py
  41. +2
    -2
      imperative/python/test/integration/test_hello_world.py
  42. +13
    -13
      imperative/python/test/integration/test_optimizer.py
  43. +3
    -3
      imperative/python/test/integration/test_save_load.py
  44. +5
    -5
      imperative/python/test/integration/test_sgd_momentum.py
  45. +3
    -3
      imperative/python/test/integration/test_trace_dump.py
  46. +4
    -4
      imperative/python/test/unit/functional/test_functional.py
  47. +1
    -1
      imperative/python/test/unit/functional/test_tensor.py
  48. +14
    -33
      imperative/python/test/unit/module/test_batchnorm.py
  49. +5
    -5
      imperative/python/test/unit/module/test_external.py
  50. +5
    -8
      imperative/python/test/unit/module/test_module.py
  51. +3
    -3
      imperative/python/test/unit/module/test_tensor.py
  52. +14
    -14
      imperative/python/test/unit/test_function.py
  53. +1
    -1
      imperative/python/test/unit/test_indexing_op.py
  54. +8
    -9
      imperative/python/test/unit/test_serialization.py
  55. +1
    -1
      sdk/xor-deploy/xornet.py

+ 1
- 2
imperative/python/megengine/__init__.py View File

@@ -74,8 +74,7 @@ from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func
from .device import * from .device import *
from .logger import enable_debug_log, get_logger, set_log_file, set_log_level from .logger import enable_debug_log, get_logger, set_log_file, set_log_level
from .serialization import load, save from .serialization import load, save
from .tensor import Tensor, tensor
from .tensor_nn import Buffer, Parameter
from .tensor import Parameter, Tensor, tensor
from .version import __version__ from .version import __version__


_set_fork_exec_path_for_timed_func( _set_fork_exec_path_for_timed_func(


+ 14
- 8
imperative/python/megengine/autodiff/grad_manager.py View File

@@ -22,7 +22,7 @@ class GradManager:
self._after_backward_callback = [] self._after_backward_callback = []
self._gradients = dict() self._gradients = dict()


def register(self, params, callbacks=None):
def attach(self, params, callbacks=None):
if callbacks is None: if callbacks is None:
callbacks = [] callbacks = []
if isinstance(callbacks, Callable): if isinstance(callbacks, Callable):
@@ -62,7 +62,7 @@ class GradManager:
if isinstance(grad, Future): if isinstance(grad, Future):
grad = grad.get() grad = grad.get()
param = self._param_dict[p] param = self._param_dict[p]
if getattr(param, "grad", None) is None:
if param.grad is None:
param.grad = grad param.grad = grad
else: else:
param.grad += grad param.grad += grad
@@ -70,9 +70,9 @@ class GradManager:
self._stop_record() self._stop_record()
backwarding_grad_manager = cache backwarding_grad_manager = cache


def __enter__(self):
def record(self):
if self._recording: if self._recording:
return self
raise RuntimeError("already recording")
grad = Grad() grad = Grad()
self._recording = True self._recording = True
self._grad = grad self._grad = grad
@@ -88,16 +88,22 @@ class GradManager:


grad.wrt(param_wrapper, callback=callback) grad.wrt(param_wrapper, callback=callback)
grad.__enter__() grad.__enter__()
return self


def __exit__(self, exc_type, exc_val, exc_tb):
def release(self):
if not self._recording:
raise RuntimeError("not recording")
self._stop_record() self._stop_record()


record = __enter__

def _stop_record(self): def _stop_record(self):
if self._grad is not None: if self._grad is not None:
self._grad.__exit__(None, None, None) self._grad.__exit__(None, None, None)
self._recording = False self._recording = False
self._grad = None self._grad = None
self._gradients = dict() self._gradients = dict()

def __enter__(self):
self.record()
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self._stop_record()

+ 1
- 1
imperative/python/megengine/core/ops/_internal/misc_ops.py View File

@@ -70,7 +70,7 @@ class Dimshuffle(PodOpVisitor):
return bytes(ctypes.c_uint32(0)) + bytes(self) return bytes(ctypes.c_uint32(0)) + bytes(self)


def __init__(self, pattern, ndim=0): def __init__(self, pattern, ndim=0):
assert isinstance(pattern, collections.Iterable)
assert isinstance(pattern, collections.abc.Iterable)
assert len(pattern) <= TensorShape.MAX_NDIM assert len(pattern) <= TensorShape.MAX_NDIM
pattern_array = Dimshuffle.Pattern.Pattern_Array() pattern_array = Dimshuffle.Pattern.Pattern_Array()
for idx, v in enumerate(pattern): for idx, v in enumerate(pattern):


+ 2
- 2
imperative/python/megengine/core/tensor/megbrain_graph.py View File

@@ -231,13 +231,13 @@ class OpNode:




def _wrap(x): def _wrap(x):
if isinstance(x, collections.Sequence):
if isinstance(x, collections.abc.Sequence):
return type(x)(map(_wrap, x)) return type(x)(map(_wrap, x))
return x.graph._wrap(x) return x.graph._wrap(x)




def _unwrap(x): def _unwrap(x):
if isinstance(x, collections.Sequence):
if isinstance(x, collections.abc.Sequence):
return type(x)(map(_unwrap, x)) return type(x)(map(_unwrap, x))
return x._node return x._node




+ 4
- 2
imperative/python/megengine/core/tensor/tensor_wrapper.py View File

@@ -166,7 +166,7 @@ def _reduce(mode):


op = builtin.Reduce(mode=mode, axis=0) op = builtin.Reduce(mode=mode, axis=0)
(result,) = apply(op, data) (result,) = apply(op, data)
elif isinstance(axis, collections.Iterable):
elif isinstance(axis, collections.abc.Iterable):
axis = list(axis) axis = list(axis)
axis.sort(reverse=True) axis.sort(reverse=True)


@@ -204,7 +204,9 @@ def _todo(*_):


def _expand_args(args): def _expand_args(args):
if len(args) == 1: if len(args) == 1:
if isinstance(args[0], (collections.Sequence, TensorBase, TensorWrapperBase)):
if isinstance(
args[0], (collections.abc.Sequence, TensorBase, TensorWrapperBase)
):
args = args[0] args = args[0]
return args return args




+ 1
- 1
imperative/python/megengine/core/tensor/utils.py View File

@@ -143,7 +143,7 @@ def astensor1d(x, *reference, dtype=None, device=None):
(x,) = Const(x, dtype=dtype, device=device)(*reference) (x,) = Const(x, dtype=dtype, device=device)(*reference)
return x return x


if not isinstance(x, collections.Sequence):
if not isinstance(x, collections.abc.Sequence):
raise TypeError raise TypeError


if any(isinstance(i, (TensorBase, TensorWrapperBase)) for i in x): if any(isinstance(i, (TensorBase, TensorWrapperBase)) for i in x):


+ 2
- 2
imperative/python/megengine/functional/math.py View File

@@ -432,7 +432,7 @@ def argmin(
[0] [0]


""" """
if isinstance(axis, collections.Iterable):
if isinstance(axis, collections.abc.Iterable):
axis = list(axis) axis = list(axis)
axis.sort(reverse=True) axis.sort(reverse=True)


@@ -486,7 +486,7 @@ def argmax(
[5] [5]


""" """
if isinstance(axis, collections.Iterable):
if isinstance(axis, collections.abc.Iterable):
axis = list(axis) axis = list(axis)
axis.sort(reverse=True) axis.sort(reverse=True)




+ 1
- 1
imperative/python/megengine/functional/types.py View File

@@ -15,7 +15,7 @@ def get_ndtuple(value, *, n, allow_zero=True):


:type allow_zero: bool :type allow_zero: bool
:param allow_zero: whether to allow zero tuple value""" :param allow_zero: whether to allow zero tuple value"""
if not isinstance(value, collections.Iterable):
if not isinstance(value, collections.abc.Iterable):
value = int(value) value = int(value)
value = tuple([value for i in range(n)]) value = tuple([value for i in range(n)])
else: else:


+ 4
- 4
imperative/python/megengine/jit/tracing.py View File

@@ -502,7 +502,7 @@ class trace:
raise TypeError( raise TypeError(
"cannot specify output_names when output is already in dict format" "cannot specify output_names when output is already in dict format"
) )
if output_names and not isinstance(output_names, collections.Sequence):
if output_names and not isinstance(output_names, collections.abc.Sequence):
output_names = (output_names,) output_names = (output_names,)
if output_names and len(output_names) != len(self._output_bindings): if output_names and len(output_names) != len(self._output_bindings):
raise ValueError( raise ValueError(
@@ -510,7 +510,7 @@ class trace:
len(self._output_bindings) len(self._output_bindings)
) )
) )
if arg_names and not isinstance(arg_names, collections.Sequence):
if arg_names and not isinstance(arg_names, collections.abc.Sequence):
arg_names = (arg_names,) arg_names = (arg_names,)
if arg_names and len(arg_names) != len(self._arg_bindings): if arg_names and len(arg_names) != len(self._arg_bindings):
raise ValueError( raise ValueError(
@@ -646,9 +646,9 @@ class trace:


def _process_outputs(self, outputs): def _process_outputs(self, outputs):
output_names = None output_names = None
if isinstance(outputs, collections.Mapping):
if isinstance(outputs, collections.abc.Mapping):
output_names, outputs = zip(*sorted(outputs.items())) output_names, outputs = zip(*sorted(outputs.items()))
elif not isinstance(outputs, collections.Sequence):
elif not isinstance(outputs, collections.abc.Sequence):
outputs = (outputs,) outputs = (outputs,)


if not self._untraced: if not self._untraced:


+ 0
- 1
imperative/python/megengine/module/__init__.py View File

@@ -18,7 +18,6 @@ from .embedding import Embedding
from .identity import Identity from .identity import Identity
from .linear import Linear from .linear import Linear
from .module import Module from .module import Module
from .parampack import ParamPack
from .pooling import AvgPool2d, MaxPool2d from .pooling import AvgPool2d, MaxPool2d
from .quant_dequant import DequantStub, QuantStub from .quant_dequant import DequantStub, QuantStub
from .sequential import Sequential from .sequential import Sequential

+ 1
- 1
imperative/python/megengine/module/activation.py View File

@@ -9,7 +9,7 @@
import numpy as np import numpy as np


from ..functional import leaky_relu, prelu, relu, sigmoid, softmax from ..functional import leaky_relu, prelu, relu, sigmoid, softmax
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .module import Module from .module import Module






+ 3
- 3
imperative/python/megengine/module/batchnorm.py View File

@@ -12,7 +12,7 @@ import numpy as np


from ..distributed.group import WORLD, Group from ..distributed.group import WORLD, Group
from ..functional import batch_norm2d, sync_batch_norm from ..functional import batch_norm2d, sync_batch_norm
from ..tensor_nn import Buffer, Parameter, Tensor
from ..tensor import Parameter, Tensor
from . import init from . import init
from .module import Module from .module import Module


@@ -45,8 +45,8 @@ class _BatchNorm(Module):
tshape = (1, self.num_features, 1, 1) tshape = (1, self.num_features, 1, 1)


if self.track_running_stats: if self.track_running_stats:
self.running_mean = Buffer(np.zeros(tshape, dtype=np.float32))
self.running_var = Buffer(np.ones(tshape, dtype=np.float32))
self.running_mean = Tensor(np.zeros(tshape, dtype=np.float32))
self.running_var = Tensor(np.ones(tshape, dtype=np.float32))
else: else:
self.running_mean = None self.running_mean = None
self.running_var = None self.running_var = None


+ 1
- 1
imperative/python/megengine/module/conv.py View File

@@ -13,7 +13,7 @@ import numpy as np
from ..core.ops._internal import param_defs as P from ..core.ops._internal import param_defs as P
from ..functional import conv2d, conv_transpose2d, local_conv2d, relu from ..functional import conv2d, conv_transpose2d, local_conv2d, relu
from ..functional.types import _pair, _pair_nonzero from ..functional.types import _pair, _pair_nonzero
from ..tensor_nn import Parameter
from ..tensor import Parameter
from . import init from . import init
from .module import Module from .module import Module




+ 9
- 3
imperative/python/megengine/module/embedding.py View File

@@ -11,7 +11,7 @@ from typing import Optional
import numpy as np import numpy as np


from ..functional import embedding as embedding_func from ..functional import embedding as embedding_func
from ..tensor_nn import Parameter
from ..tensor import Parameter
from . import init from . import init
from .module import Module from .module import Module


@@ -72,6 +72,7 @@ class Embedding(Module):
max_norm: Optional[float] = None, max_norm: Optional[float] = None,
norm_type: Optional[float] = None, norm_type: Optional[float] = None,
initial_weight: Parameter = None, initial_weight: Parameter = None,
freeze: bool = False,
): ):
super().__init__() super().__init__()
if padding_idx is not None: if padding_idx is not None:
@@ -83,6 +84,7 @@ class Embedding(Module):
self.norm_type = norm_type self.norm_type = norm_type
self.num_embeddings = num_embeddings self.num_embeddings = num_embeddings
self.embedding_dim = embedding_dim self.embedding_dim = embedding_dim
self.freeze = freeze
if initial_weight is None: if initial_weight is None:
self.weight = Parameter( self.weight = Parameter(
np.random.uniform( np.random.uniform(
@@ -101,7 +103,11 @@ class Embedding(Module):
init.normal_(self.weight) init.normal_(self.weight)


def forward(self, inputs): def forward(self, inputs):
return embedding_func(inputs, self.weight)
if self.freeze:
weight = self.weight.detach()
else:
weight = self.weight
return embedding_func(inputs, weight)


@classmethod @classmethod
def from_pretrained( def from_pretrained(
@@ -166,6 +172,6 @@ class Embedding(Module):
padding_idx=padding_idx, padding_idx=padding_idx,
max_norm=max_norm, max_norm=max_norm,
norm_type=norm_type, norm_type=norm_type,
freeze=freeze,
) )
embedding.weight.requires_grad = not freeze
return embedding return embedding

+ 3
- 3
imperative/python/megengine/module/init.py View File

@@ -23,7 +23,7 @@ def fill_(tensor: Tensor, val: Union[float, int]) -> None:
:param tensor: An n-dimentional tensor to be initialized :param tensor: An n-dimentional tensor to be initialized
:param val: The value to be filled throughout the tensor :param val: The value to be filled throughout the tensor
""" """
tensor.set_value(full(shape=tensor.shape, value=val, dtype=tensor.dtype))
tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype))




def zeros_(tensor: Tensor) -> None: def zeros_(tensor: Tensor) -> None:
@@ -50,7 +50,7 @@ def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None:
:param a: Lower bound of the sampling interval :param a: Lower bound of the sampling interval
:param b: Upper bound of the sampling interval :param b: Upper bound of the sampling interval
""" """
tensor.set_value(uniform(tensor.shape, low=a, high=b).astype(tensor.dtype))
tensor._reset(uniform(tensor.shape, low=a, high=b).astype(tensor.dtype))




def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None:
@@ -61,7 +61,7 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None:
:param mean: The mean of the normal distribution :param mean: The mean of the normal distribution
:param std: The standard deviation of the normal distribution :param std: The standard deviation of the normal distribution
""" """
tensor.set_value(gaussian(tensor.shape, mean=mean, std=std).astype(tensor.dtype))
tensor._reset(gaussian(tensor.shape, mean=mean, std=std).astype(tensor.dtype))




def calculate_gain( def calculate_gain(


+ 1
- 1
imperative/python/megengine/module/linear.py View File

@@ -8,7 +8,7 @@
import numpy as np import numpy as np


from ..functional import linear from ..functional import linear
from ..tensor_nn import Parameter
from ..tensor import Parameter
from . import init from . import init
from .module import Module from .module import Module




+ 31
- 31
imperative/python/megengine/module/module.py View File

@@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, # Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import warnings
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from collections import OrderedDict from collections import OrderedDict
from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union
@@ -14,8 +15,8 @@ import numpy as np
from ..core.tensor.dtype import is_quantize from ..core.tensor.dtype import is_quantize
from ..core.tensor.utils import make_shape_tuple from ..core.tensor.utils import make_shape_tuple
from ..logger import get_logger from ..logger import get_logger
from ..tensor import Tensor
from ..tensor_nn import Buffer, Parameter
from ..tensor import Parameter, Tensor
from ..utils.deprecation import deprecated
from ..utils.hook import HookHandler from ..utils.hook import HookHandler


logger = get_logger(__name__) logger = get_logger(__name__)
@@ -48,7 +49,7 @@ def _is_parameter(obj):




def _is_buffer(obj): def _is_buffer(obj):
return isinstance(obj, Buffer)
return isinstance(obj, Tensor) and not isinstance(obj, Parameter)




def _is_module(obj): def _is_module(obj):
@@ -163,49 +164,43 @@ class Module(metaclass=ABCMeta):
seen=seen, seen=seen,
) )


def parameters(
self, requires_grad: Optional[bool] = None, recursive: bool = True, **kwargs
) -> Iterable[Parameter]:
def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]:
r"""Returns an iterable for the :class:`~.Parameter` of the module. r"""Returns an iterable for the :class:`~.Parameter` of the module.


:param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad`
attribute of returned :class:`.Parameter`. ``None`` for no limitation.
:param recursive: If ``True``, returns all :class:`~.Parameter` within this :param recursive: If ``True``, returns all :class:`~.Parameter` within this
module, else only returns :class:`~.Parameter` that are direct attributes module, else only returns :class:`~.Parameter` that are direct attributes
of this module. of this module.
""" """


if "requires_grad" in kwargs:
del kwargs["requires_grad"]
warnings.warn("passing requires_grad has no effect currently")

def predicate(obj) -> bool: def predicate(obj) -> bool:
return _is_parameter(obj) and (
requires_grad is None or obj.requires_grad == requires_grad
)
return _is_parameter(obj)


yield from self._flatten( yield from self._flatten(
with_key=False, predicate=predicate, recursive=recursive, **kwargs with_key=False, predicate=predicate, recursive=recursive, **kwargs
) )


def named_parameters( def named_parameters(
self,
requires_grad: Optional[bool] = None,
prefix: Optional[str] = None,
recursive: bool = True,
**kwargs
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs
) -> Iterable[Tuple[str, Parameter]]: ) -> Iterable[Tuple[str, Parameter]]:
"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where """Returns an iterable for key :class:`~.Parameter` pairs of the module, where
``key`` is the dotted path from this module to the :class:`~.Parameter` . ``key`` is the dotted path from this module to the :class:`~.Parameter` .


:param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad`
attribute of returned :class:`~.Parameter` . ``None`` for no limitation.
:param prefix: The prefix prepended to the keys. :param prefix: The prefix prepended to the keys.
:param recursive: If ``True``, returns all :class:`~.Parameter` within this :param recursive: If ``True``, returns all :class:`~.Parameter` within this
module, else only returns :class:`~.Parameter` that are direct attributes module, else only returns :class:`~.Parameter` that are direct attributes
of this module. of this module.
""" """


if "requires_grad" in kwargs:
del kwargs["requires_grad"]
warnings.warn("passing requires_grad has no effect currently")

def predicate(obj) -> bool: def predicate(obj) -> bool:
return _is_parameter(obj) and (
requires_grad is None or obj.requires_grad == requires_grad
)
return _is_parameter(obj)


yield from self._flatten( yield from self._flatten(
with_key=True, with_key=True,
@@ -215,11 +210,13 @@ class Module(metaclass=ABCMeta):
**kwargs, **kwargs,
) )


def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Buffer]:
"""Returns an iterable for the :class:`~.Buffer` of the module.
def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]:
"""Returns an iterable for the buffers of the module.


:param recursive: If ``True``, returns all :class:`~.Buffer` within this
module, else only returns :class:`~.Buffer` that are direct attributes
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`.

:param recursive: If ``True``, returns all buffers within this
module, else only returns buffers that are direct attributes
of this module. of this module.
""" """
yield from self._flatten( yield from self._flatten(
@@ -228,13 +225,15 @@ class Module(metaclass=ABCMeta):


def named_buffers( def named_buffers(
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs self, prefix: Optional[str] = None, recursive: bool = True, **kwargs
) -> Iterable[Tuple[str, Buffer]]:
"""Returns an iterable for key :class:`~.Buffer` pairs of the module, where
``key`` is the dotted path from this module to the :class:`~.Buffer` .
) -> Iterable[Tuple[str, Tensor]]:
"""Returns an iterable for key buffer pairs of the module, where
``key`` is the dotted path from this module to the buffer.

Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`.


:param prefix: The prefix prepended to the keys. :param prefix: The prefix prepended to the keys.
:param recursive: If ``True``, returns all :class:`~.Buffer` within this
module, else only returns :class:`~.Buffer` that are direct attributes
:param recursive: If ``True``, returns all buffers within this
module, else only returns buffers that are direct attributes
of this module. of this module.
""" """
yield from self._flatten( yield from self._flatten(
@@ -297,6 +296,7 @@ class Module(metaclass=ABCMeta):
for it in self.modules(): for it in self.modules():
fn(it) fn(it)


@deprecated(version="1.0")
def zero_grad(self) -> None: def zero_grad(self) -> None:
"""Set all parameters' grads to zero """Set all parameters' grads to zero
""" """
@@ -505,7 +505,7 @@ class Module(metaclass=ABCMeta):
# scale/zero_points maybe invalid, use pretrained dtype instead. # scale/zero_points maybe invalid, use pretrained dtype instead.
if is_quantize(to_be_load.dtype) and is_quantize(var.dtype): if is_quantize(to_be_load.dtype) and is_quantize(var.dtype):
var = var.astype(to_be_load.dtype) var = var.astype(to_be_load.dtype)
var.set_value(to_be_load)
var._reset(to_be_load)
loaded.append(k) loaded.append(k)


return set(loaded), set(skipped) return set(loaded), set(skipped)

+ 0
- 156
imperative/python/megengine/module/parampack.py View File

@@ -1,156 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import collections
from typing import Callable, Iterable, Optional, Tuple

import numpy as np

from ..tensor_nn import Parameter, Tensor
from .module import Module


class ParamPack(Module):
r"""Pack module's parameters by gathering their memory to continuous address.
Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True),
parameters with same key will be packed togather.
It helps a lot for multimachine training by speeding up allreduce gradients.

:param model: the module you want to pack parameters.
:param nr_ignore_first: how many parameters will be unpacked at first.
:param max_size_per_group: upper bound of packed parameters' size in MB.
:param max_nr_params_per_group: upper bound of the number of parameters of each group.

"""

def __init__(
self,
model: Module,
nr_ignore_first: int = 8,
max_size_per_group: int = 10,
max_nr_params_per_group: int = 100,
group_func: Callable = lambda name, param: 0,
):
super().__init__()
self._model = model
self._nr_ignore_first = nr_ignore_first
self._max_size_per_group = max_size_per_group
self._max_nr_params_per_group = max_nr_params_per_group
self._group_func = group_func
self._grouped_params = []
self._packed_params = []

params = model.named_parameters()
self._pack_params(params)

def parameters(self, requires_grad: Optional[bool] = None) -> Iterable[Parameter]:
for param in self._packed_params:
if requires_grad is None or param.requires_grad == requires_grad:
yield param

def named_parameters(
self, requires_grad: Optional[bool] = None
) -> Iterable[Tuple[str, Parameter]]:
for idx, param in enumerate(self._packed_params):
if requires_grad is None or param.requires_grad == requires_grad:
yield "packed_param_" + str(idx), param

def _pack_params(self, params: Iterable[Tuple[str, Parameter]]):
groups = collections.defaultdict(list)
ignored = 0
param_id = 0
for name, param in params:
if self._nr_ignore_first > ignored:
ignored += 1
self._grouped_params.append([{"shape": param.shape, "id": param_id}])
param.pack_group_key = self._group_func(name, param)
self._packed_params.append(param)
else:
key = (
param.dtype,
param.device,
param.requires_grad,
self._group_func(name, param),
)
groups[key].append({"tensor": param, "id": param_id})
param_id += 1
for (dtype, device, requires_grad, group_key) in groups.keys():
dtype_sz = np.dtype(dtype).itemsize
align = device.mem_align
if align < dtype_sz:
align = 1
else:
assert align % dtype_sz == 0
align //= dtype_sz

group = groups[(dtype, device, requires_grad, group_key)]
while group:
aligned_pos = []
offset = 0
params = []
idx = 0
while idx < len(group):
param = group[idx]
assert param["tensor"].device == device
padding = (align - (offset & (align - 1))) & (align - 1)
offset += padding
aligned_pos.append(offset)
params.append(param)
offset += int(np.prod(param["tensor"].shape))
idx += 1

if (
offset * dtype_sz >= self._max_size_per_group * 1024 * 1024
or idx >= self._max_nr_params_per_group
):
break
group = group[idx:]
if idx == 1:
# ignore param packs with only one item
params[0]["tensor"].pack_group_key = group_key
self._packed_params.append(params[0]["tensor"])
self._grouped_params.append(
[{"shape": params[0]["tensor"].shape, "id": params[0]["id"]}]
)
continue

packed_value = np.zeros((offset,), dtype=dtype)
for param, pos in zip(params, aligned_pos):
val = param["tensor"].numpy()
packed_value[pos : pos + val.size] = val.flatten()
new_param = Parameter(
value=packed_value,
device=device,
dtype=dtype,
requires_grad=requires_grad,
)
new_param.pack_group_key = group_key
self._packed_params.append(new_param)
self._grouped_params.append(
[{"shape": i["tensor"].shape, "id": i["id"]} for i in params]
)

def forward(self, *args, **kwargs):
replace_param = dict()
for i in range(len(self._packed_params)):
packed_param = self._packed_params[i]
grouped_params = self._grouped_params[i]
if len(grouped_params) == 1:
continue
split = param_pack_split(
packed_param._symvar, [i["shape"] for i in grouped_params]
)
split = [
Parameter(Tensor(i, requires_grad=packed_param.requires_grad))
for i in split
]
for j in range(len(split)):
replace_param[grouped_params[j]["id"]] = split[j]
self._model.replace_param(replace_param, 0)

return self._model.forward(*args, **kwargs)

+ 1
- 1
imperative/python/megengine/module/quantized/conv.py View File

@@ -12,7 +12,7 @@ import numpy as np
from ... import module as Float from ... import module as Float
from ...core.tensor import dtype from ...core.tensor import dtype
from ...functional import conv_bias_activation from ...functional import conv_bias_activation
from ...tensor_nn import Parameter
from ...tensor import Parameter
from ..qat import conv as QAT from ..qat import conv as QAT
from .module import QuantizedModule from .module import QuantizedModule




+ 1
- 1
imperative/python/megengine/module/quantized/conv_bn.py View File

@@ -5,7 +5,7 @@
# Unless required by applicable law or agreed to in writing, # Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from ...tensor_nn import Parameter
from ...tensor import Parameter
from ..qat import conv_bn as QAT from ..qat import conv_bn as QAT
from .conv import Conv2d from .conv import Conv2d




+ 1
- 1
imperative/python/megengine/module/quantized/linear.py View File

@@ -9,7 +9,7 @@ import numpy as np


from ... import functional as F from ... import functional as F
from ...core.tensor import dtype from ...core.tensor import dtype
from ...tensor_nn import Parameter
from ...tensor import Parameter
from ..qat import linear as QAT from ..qat import linear as QAT
from .module import QuantizedModule from .module import QuantizedModule




+ 2
- 2
imperative/python/megengine/optimizer/adadelta.py View File

@@ -11,7 +11,7 @@ from typing import Iterable, Union
import numpy as np import numpy as np


from ..functional import sqrt from ..functional import sqrt
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -63,7 +63,7 @@ class Adadelta(Optimizer):


for param in param_group["params"]: for param in param_group["params"]:


if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue continue


states = self._state[param] states = self._state[param]


+ 2
- 2
imperative/python/megengine/optimizer/adagrad.py View File

@@ -11,7 +11,7 @@ from typing import Iterable, Union
import numpy as np import numpy as np


from ..functional import sqrt from ..functional import sqrt
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -62,7 +62,7 @@ class Adagrad(Optimizer):


for param in param_group["params"]: for param in param_group["params"]:


if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue continue


states = self._state[param] states = self._state[param]


+ 2
- 2
imperative/python/megengine/optimizer/adam.py View File

@@ -8,7 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from typing import Iterable, Tuple, Union from typing import Iterable, Tuple, Union


from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -59,7 +59,7 @@ class Adam(Optimizer):


for param in param_group["params"]: for param in param_group["params"]:


if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue continue


grad = param.grad grad = param.grad


+ 11
- 17
imperative/python/megengine/optimizer/optimizer.py View File

@@ -7,7 +7,7 @@
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from collections import Iterable
from collections.abc import Iterable
from contextlib import contextmanager from contextlib import contextmanager
from typing import Dict from typing import Dict
from typing import Iterable as Iter from typing import Iterable as Iter
@@ -15,8 +15,7 @@ from typing import Union


import numpy as np import numpy as np


from ..tensor import Tensor, TensorDict
from ..tensor_nn import Buffer, Parameter
from ..tensor import Parameter, Tensor




class _RequiredParameter: class _RequiredParameter:
@@ -37,7 +36,7 @@ class Optimizer(metaclass=ABCMeta):
def __init__( # pylint: disable=too-many-branches def __init__( # pylint: disable=too-many-branches
self, params: Union[Iter[Parameter], dict], defaults: dict, self, params: Union[Iter[Parameter], dict], defaults: dict,
): ):
self._state = TensorDict()
self._state = dict()
self._defaults = defaults self._defaults = defaults


if isinstance(params, (Parameter, dict)): if isinstance(params, (Parameter, dict)):
@@ -93,10 +92,6 @@ class Optimizer(metaclass=ABCMeta):
"optimizer can only optimize Parameters, but one of the params is " "optimizer can only optimize Parameters, but one of the params is "
+ type(param) + type(param)
) )
if not param.requires_grad:
raise ValueError(
"optimizer can only optimize Parameters with requires_grad=True"
)


for name, default in self._defaults.items(): for name, default in self._defaults.items():
if default is required and name not in param_group: if default is required and name not in param_group:
@@ -122,7 +117,7 @@ class Optimizer(metaclass=ABCMeta):
initializer = np.zeros(param.shape, dtype=np.float32) initializer = np.zeros(param.shape, dtype=np.float32)
state_dict = self._state.setdefault(param, {}) state_dict = self._state.setdefault(param, {})
assert state_name not in state_dict assert state_name not in state_dict
state = Buffer(initializer)
state = Tensor(initializer)
state_dict[state_name] = state state_dict[state_name] = state


@abstractmethod @abstractmethod
@@ -140,7 +135,7 @@ class Optimizer(metaclass=ABCMeta):
params.append(param) params.append(param)
return params return params


def step(self, clear_grad=False):
def step(self):
r"""Performs a single optimization step. r"""Performs a single optimization step.


""" """
@@ -152,8 +147,7 @@ class Optimizer(metaclass=ABCMeta):
"Please use a list instead." "Please use a list instead."
) )
self._updates(group) self._updates(group)
if clear_grad:
self.clear_grad()
return self


def clear_grad(self): def clear_grad(self):
r"""Clear the grad buffer. r"""Clear the grad buffer.
@@ -161,8 +155,7 @@ class Optimizer(metaclass=ABCMeta):
""" """
for param_group in self.param_groups: for param_group in self.param_groups:
for param in param_group["params"]: for param in param_group["params"]:
if getattr(param, "grad", None) is not None:
param.grad = None
param.grad = None


def state_dict(self) -> Dict: def state_dict(self) -> Dict:
r"""Export the optimizer state. r"""Export the optimizer state.
@@ -171,7 +164,7 @@ class Optimizer(metaclass=ABCMeta):
""" """
param_groups = [] param_groups = []
state = dict() state = dict()
param2id = TensorDict()
param2id = dict()


cur_id = 0 cur_id = 0
for group in self.param_groups: for group in self.param_groups:
@@ -213,8 +206,9 @@ class Optimizer(metaclass=ABCMeta):
p = param_new p = param_new
self._state[p] = state["state"][param_saved].copy() self._state[p] = state["state"][param_saved].copy()
for k, v in self._state[p].items(): for k, v in self._state[p].items():
if isinstance(v, Buffer):
self._state[p][k] = Buffer(v.numpy())
if isinstance(v, Tensor):
# TODO: maybe a more efficient way?
self._state[p][k] = Tensor(v.numpy())


if set(group_new.keys()) != set(group_saved.keys()): if set(group_new.keys()) != set(group_saved.keys()):
raise ValueError( raise ValueError(


+ 2
- 2
imperative/python/megengine/optimizer/sgd.py View File

@@ -8,7 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from typing import Iterable, Union from typing import Iterable, Union


from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer from .optimizer import Optimizer




@@ -52,7 +52,7 @@ class SGD(Optimizer):
momentum = param_group["momentum"] momentum = param_group["momentum"]


for param in param_group["params"]: for param in param_group["params"]:
if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue continue


grad = param.grad grad = param.grad


+ 1
- 2
imperative/python/megengine/quantization/fake_quant.py View File

@@ -14,8 +14,7 @@ from .. import functional as F
from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
from ..core.tensor.function import Function from ..core.tensor.function import Function
from ..module import Module from ..module import Module
from ..tensor import Tensor
from ..tensor_nn import Parameter
from ..tensor import Parameter, Tensor
from .utils import QuantMode, fake_quant_tensor, get_qparam_dict from .utils import QuantMode, fake_quant_tensor, get_qparam_dict






+ 14
- 14
imperative/python/megengine/quantization/observer.py View File

@@ -13,7 +13,7 @@ import numpy as np
from .. import functional as F from .. import functional as F
from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
from ..module import Module from ..module import Module
from ..tensor_nn import Buffer
from ..tensor import Tensor
from .utils import QuantMode, Round, get_qparam_dict from .utils import QuantMode, Round, get_qparam_dict




@@ -82,8 +82,8 @@ class MinMaxObserver(Observer):
): ):
super().__init__(dtype, narrow_range) super().__init__(dtype, narrow_range)
self.mode = mode self.mode = mode
self.min_val = Buffer(np.finfo(np.float32).max, dtype=np.float32)
self.max_val = Buffer(np.finfo(np.float32).min, dtype=np.float32)
self.min_val = Tensor(np.finfo(np.float32).max, dtype=np.float32)
self.max_val = Tensor(np.finfo(np.float32).min, dtype=np.float32)
self.scale_limit = eps self.scale_limit = eps


def _calculate_qparams(self, inp_min_val, inp_max_val): def _calculate_qparams(self, inp_min_val, inp_max_val):
@@ -118,8 +118,8 @@ class MinMaxObserver(Observer):
# stop gradient # stop gradient
x = x_orig.detach() x = x_orig.detach()
# find max and min # find max and min
self.min_val.set_value(F.minimum(self.min_val, x.min()))
self.max_val.set_value(F.maximum(self.max_val, x.max()))
self.min_val._reset(F.minimum(self.min_val, x.min()))
self.max_val._reset(F.maximum(self.max_val, x.max()))
return x_orig return x_orig




@@ -133,22 +133,22 @@ class ExponentialMovingAverageObserver(MinMaxObserver):
narrow_range: bool = False, narrow_range: bool = False,
): ):
super().__init__(mode, eps, dtype, narrow_range) super().__init__(mode, eps, dtype, narrow_range)
self.momentum = Buffer(momentum)
self.runtime_momentum = Buffer(0.0)
self.momentum = Tensor(momentum)
self.runtime_momentum = Tensor(0.0)


def set_momentum(self, momentum): def set_momentum(self, momentum):
self.momentum.set_value(momentum)
self.momentum._reset(momentum)


def forward(self, x_orig): def forward(self, x_orig):
if self.enabled: if self.enabled:
# stop gradient # stop gradient
x = x_orig.detach() x = x_orig.detach()
# Exponential Moving Average # Exponential Moving Average
self.min_val.set_value(
self.min_val._reset(
self.min_val * self.runtime_momentum self.min_val * self.runtime_momentum
+ (1 - self.runtime_momentum) * x.min() + (1 - self.runtime_momentum) * x.min()
) )
self.max_val.set_value(
self.max_val._reset(
self.max_val * self.runtime_momentum self.max_val * self.runtime_momentum
+ (1 - self.runtime_momentum) * x.max() + (1 - self.runtime_momentum) * x.max()
) )
@@ -171,7 +171,7 @@ class HistogramObserver(MinMaxObserver):
self.bins = bins self.bins = bins
self.upsample_rate = upsample_rate self.upsample_rate = upsample_rate
self.dst_nbins = _metadata_dict[dtype].qmax - _metadata_dict[dtype].qmin + 1 self.dst_nbins = _metadata_dict[dtype].qmax - _metadata_dict[dtype].qmin + 1
self.histogram = Buffer([-1] + [0.0] * (bins - 1))
self.histogram = Tensor([-1] + [0.0] * (bins - 1))


def _non_linear_param_search(self): def _non_linear_param_search(self):
r"""Non-linear parameter search. r"""Non-linear parameter search.
@@ -395,9 +395,9 @@ class HistogramObserver(MinMaxObserver):
self.bins, self.bins,
) )


self.histogram.set_value(new_histogram)
self.min_val.set_value(new_min)
self.max_val.set_value(new_max)
self.histogram._reset(new_histogram)
self.min_val._reset(new_min)
self.max_val._reset(new_max)


def forward(self, x_orig): def forward(self, x_orig):
self.sideeffect_forward(x_orig) self.sideeffect_forward(x_orig)


+ 22
- 51
imperative/python/megengine/tensor.py View File

@@ -14,10 +14,11 @@ from .core import Tensor as _Tensor
from .core.ops.builtin import Copy from .core.ops.builtin import Copy
from .core.tensor.core import apply from .core.tensor.core import apply
from .device import get_default_device from .device import get_default_device
from .utils.deprecation import deprecated




class Tensor(_Tensor): class Tensor(_Tensor):
requires_grad = False
grad = None
dmap_callback = None dmap_callback = None


def __init__(self, data, dtype=None, device=None): def __init__(self, data, dtype=None, device=None):
@@ -26,15 +27,32 @@ class Tensor(_Tensor):
self.q_dict = {"mode": None, "scale": None, "zero_point": None} self.q_dict = {"mode": None, "scale": None, "zero_point": None}
super().__init__(data, dtype=dtype, device=device) super().__init__(data, dtype=dtype, device=device)


@deprecated(version="1.0", reason="no need to reuse an existing tensor since 1.0")
def set_value(self, value): def set_value(self, value):
self._reset(value) self._reset(value)


@deprecated(version="1.0", reason="use *= 0 instead")
def reset_zero(self): def reset_zero(self):
self *= 0 self *= 0


def to(self, cn): def to(self, cn):
return apply(Copy(comp_node=cn), self)[0] return apply(Copy(comp_node=cn), self)[0]


@property
def requires_grad(self):
raise AttributeError("requires_grad is reserved for future use")

@requires_grad.setter
def requires_grad(self, value):
raise AttributeError("requires_grad is reserved for future use")

@requires_grad.deleter
def requires_grad(self):
raise AttributeError("requires_grad is reserved for future use")

def __hash__(self):
return id(self)

def __getstate__(self): def __getstate__(self):
r""" __getstate__ will be called for pickle serialization or deep copy r""" __getstate__ will be called for pickle serialization or deep copy
""" """
@@ -73,53 +91,6 @@ class Tensor(_Tensor):
tensor = Tensor tensor = Tensor




class Dict(collections.MutableMapping):
def __init__(self, *args, key=None, **kwargs):
self.data = {}
if key:
self.keyfn = key
for i in args:
self.update(i)
self.update(**kwargs)

@staticmethod
def keyfn(key): # pylint: disable=method-hidden
return key

def __getitem__(self, key):
_, v = self.data[self.keyfn(key)]
return v

def __setitem__(self, key, value):
self.data[self.keyfn(key)] = key, value

def __delitem__(self, key):
del self.data[self.keyfn(key)]

def __iter__(self):
for _, (k, _) in self.data.items():
yield k

def __len__(self):
return len(self.data)


class TensorDict(Dict): # pylint: disable=too-many-ancestors
class keyfn:
def __new__(cls, x: Tensor):
if not isinstance(x, Tensor):
return x
return super().__new__(cls)

def __init__(self, x: Tensor):
self._data = x # do not save id directly to make pickle work

def __hash__(self):
return id(self._data)

def __eq__(self, other):
# pylint: disable=undefined-variable
return isinstance(other, __class__) and id(self._data) == id(other._data)

def __init__(self, *args):
super().__init__(*args)
class Parameter(Tensor):
r"""A kind of Tensor that is to be considered a module parameter.
"""

+ 0
- 20
imperative/python/megengine/tensor_nn.py View File

@@ -1,20 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from . import Tensor, tensor


class Buffer(Tensor):
r"""A kind of Tensor with ``requires_grad=False``.
"""


class Parameter(Tensor):
r"""A kind of Tensor that is to be considered a module parameter.
"""
requires_grad = True

+ 1
- 0
imperative/python/megengine/utils/deprecation.py View File

@@ -0,0 +1 @@
from deprecated.sphinx import deprecated

+ 1
- 1
imperative/python/megengine/utils/types.py View File

@@ -15,7 +15,7 @@ def get_ndtuple(value, *, n, allow_zero=True):


:type allow_zero: bool :type allow_zero: bool
:param allow_zero: whether to allow zero tuple value""" :param allow_zero: whether to allow zero tuple value"""
if not isinstance(value, collections.Iterable):
if not isinstance(value, collections.abc.Iterable):
value = int(value) value = int(value)
value = tuple([value for i in range(n)]) value = tuple([value for i in range(n)])
else: else:


+ 1
- 0
imperative/python/requires.txt View File

@@ -5,3 +5,4 @@ requests
tabulate tabulate
tqdm tqdm
redispy redispy
deprecated

+ 4
- 4
imperative/python/test/integration/test_advance_indexing.py View File

@@ -38,7 +38,7 @@ class Simple2(Module):
def test_advance_indexing(): def test_advance_indexing():
net = Simple() net = Simple()


gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()


@@ -48,7 +48,7 @@ def test_advance_indexing():
data = tensor(raw_data) data = tensor(raw_data)
mask = tensor(raw_mask) mask = tensor(raw_mask)
answer = 1.0 - raw_data[raw_mask].sum() answer = 1.0 - raw_data[raw_mask].sum()
with gm.record():
with gm:
loss = net(data, mask).sum() loss = net(data, mask).sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -58,7 +58,7 @@ def test_advance_indexing():
def test_advance_indexing_with_subtensor(): def test_advance_indexing_with_subtensor():
net = Simple2() net = Simple2()


gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()


@@ -66,7 +66,7 @@ def test_advance_indexing_with_subtensor():
raw_data = np.arange(576).reshape(dshape).astype(np.float32) raw_data = np.arange(576).reshape(dshape).astype(np.float32)
data = tensor(raw_data) data = tensor(raw_data)
answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum() answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum()
with gm.record():
with gm:
loss = net(data).sum() loss = net(data).sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()


+ 2
- 2
imperative/python/test/integration/test_ai.py View File

@@ -28,13 +28,13 @@ class Simple(Module):
def test_ai(): def test_ai():
net = Simple() net = Simple()


gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()


dshape = (10, 10) dshape = (10, 10)
data = tensor(np.ones(dshape).astype(np.float32)) data = tensor(np.ones(dshape).astype(np.float32))
with gm.record():
with gm:
loss = net(data).sum() loss = net(data).sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()


+ 6
- 6
imperative/python/test/integration/test_bn.py View File

@@ -25,12 +25,12 @@ def test_frozen_bn():
saved_wt = m.weight.numpy() saved_wt = m.weight.numpy()
saved_bias = m.bias.numpy() saved_bias = m.bias.numpy()


gm = ad.GradManager().register(m.parameters())
gm = ad.GradManager().attach(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0) optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()


data = np.random.random((6, nchannel, 2, 2)).astype("float32") data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with gm.record():
with gm:
loss = m(data).mean() loss = m(data).mean()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -46,12 +46,12 @@ def test_bn_no_track_stat():
nchannel = 3 nchannel = 3
m = BatchNorm2d(nchannel, track_running_stats=False) m = BatchNorm2d(nchannel, track_running_stats=False)


gm = ad.GradManager().register(m.parameters())
gm = ad.GradManager().attach(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0) optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()


data = np.random.random((6, nchannel, 2, 2)).astype("float32") data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with gm.record():
with gm:
loss = m(data).sum() loss = m(data).sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -68,12 +68,12 @@ def test_bn_no_track_stat2():
saved_mean = m.running_mean.numpy() saved_mean = m.running_mean.numpy()
assert saved_mean is not None assert saved_mean is not None


gm = ad.GradManager().register(m.parameters())
gm = ad.GradManager().attach(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0) optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()


data = np.random.random((6, nchannel, 2, 2)).astype("float32") data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with gm.record():
with gm:
loss = m(data).sum() loss = m(data).sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()


+ 3
- 5
imperative/python/test/integration/test_converge.py View File

@@ -74,13 +74,11 @@ class XORNet(Module):


def test_training_converge(): def test_training_converge():
net = XORNet() net = XORNet()
opt = SGD(
net.parameters(requires_grad=True), lr=0.01, momentum=0.9, weight_decay=5e-4
)
gm = ad.GradManager().register(net.parameters())
opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
gm = ad.GradManager().attach(net.parameters())


def train(data, label): def train(data, label):
with gm.record():
with gm:
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)
gm.backward(loss) gm.backward(loss)


+ 3
- 3
imperative/python/test/integration/test_correctness.py View File

@@ -91,7 +91,7 @@ class MnistNet(Module):




def train(data, label, net, opt, gm): def train(data, label, net, opt, gm):
with gm.record():
with gm:
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)
gm.backward(loss) gm.backward(loss)
@@ -117,7 +117,7 @@ def update_model(model_path):
net.load_state_dict(checkpoint["net_init"]) net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


data = Tensor(checkpoint["data"], dtype=np.float32) data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32) label = Tensor(checkpoint["label"], dtype=np.int32)
@@ -152,7 +152,7 @@ def run_train(
net.load_state_dict(checkpoint["net_init"]) net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


data = Tensor(checkpoint["data"], dtype=np.float32) data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32) label = Tensor(checkpoint["label"], dtype=np.int32)


+ 2
- 2
imperative/python/test/integration/test_detach.py View File

@@ -32,11 +32,11 @@ def test_detach():


optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


dshape = (10, 10) dshape = (10, 10)
data = tensor(np.ones(dshape).astype(np.float32)) data = tensor(np.ones(dshape).astype(np.float32))
with gm.record():
with gm:
loss = net(data).sum() loss = net(data).sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()


+ 3
- 5
imperative/python/test/integration/test_dp_correctness.py View File

@@ -97,7 +97,7 @@ class MnistNet(Module):


def train(data, label, net, opt, gm): def train(data, label, net, opt, gm):
opt.clear_grad() opt.clear_grad()
with gm.record():
with gm:
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)
gm.backward(loss) gm.backward(loss)
@@ -125,8 +125,7 @@ def update_model(model_path):
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)


gm = ad.GradManager()
gm.register(
gm = ad.GradManager().attach(
net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)] net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
) )


@@ -171,8 +170,7 @@ def run_test(
lr = checkpoint["sgd_lr"] lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr) opt = SGD(net.parameters(), lr=lr)


gm = ad.GradManager()
gm.register(
gm = ad.GradManager().attach(
net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)] net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
) )




+ 2
- 2
imperative/python/test/integration/test_hello_world.py View File

@@ -33,10 +33,10 @@ def test_hello_world():


optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad() optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


data = tensor([2.34]) data = tensor([2.34])
with gm.record():
with gm:
loss = net(data) loss = net(data)
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()


+ 13
- 13
imperative/python/test/integration/test_optimizer.py View File

@@ -13,7 +13,7 @@ import megengine.functional as F
from megengine import Parameter, optimizer from megengine import Parameter, optimizer
from megengine.jit import trace from megengine.jit import trace
from megengine.module import Linear, Module from megengine.module import Linear, Module
from megengine.tensor import TensorDict, tensor
from megengine.tensor import tensor




class MLP(Module): class MLP(Module):
@@ -44,7 +44,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
net = Simple() net = Simple()
opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
check_func = check_class(net, **test_case) check_func = check_class(net, **test_case)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


step = 0 step = 0
data_shape = (2, 28) data_shape = (2, 28)
@@ -57,12 +57,12 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
data = tensor(np.random.random(data_shape).astype(np.float32)) data = tensor(np.random.random(data_shape).astype(np.float32))


opt.clear_grad() opt.clear_grad()
with gm.record():
with gm:
pred = net(data) pred = net(data)
loss = pred.sum() loss = pred.sum()
gm.backward(loss) gm.backward(loss)


ori_params = TensorDict()
ori_params = {}
for param in net.parameters(): for param in net.parameters():
ori_params[param] = np.copy(param.numpy()) ori_params[param] = np.copy(param.numpy())
opt.step() opt.step()
@@ -75,7 +75,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
@trace(symbolic=symbolic) @trace(symbolic=symbolic)
def train_func(data, *, opt=None, gm=None): def train_func(data, *, opt=None, gm=None):
opt.clear_grad() opt.clear_grad()
with gm.record():
with gm:
pred = net(data) pred = net(data)
loss = pred.sum() loss = pred.sum()
gm.backward(loss) gm.backward(loss)
@@ -84,7 +84,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
# reset net and opt # reset net and opt
net = Simple() net = Simple()
opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
check_func = check_class(net, **test_case) check_func = check_class(net, **test_case)
step = 0 step = 0
for i in range(iter_num): for i in range(iter_num):
@@ -93,7 +93,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
group["lr"] += 0.01 group["lr"] += 0.01
check_func.lr += 0.01 check_func.lr += 0.01


ori_params = TensorDict()
ori_params = {}
for param in net.parameters(): for param in net.parameters():
ori_params[param] = np.copy(param.numpy()) ori_params[param] = np.copy(param.numpy())


@@ -105,7 +105,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
def test_sgd(): def test_sgd():
class CheckValue: class CheckValue:
def __init__(self, net, **kwarg): def __init__(self, net, **kwarg):
self.slots = TensorDict()
self.slots = {}
for param in net.parameters(): for param in net.parameters():
self.slots[param] = np.zeros(param.shape).astype(np.float32) self.slots[param] = np.zeros(param.shape).astype(np.float32)
for k, v in kwarg.items(): for k, v in kwarg.items():
@@ -134,8 +134,8 @@ def test_sgd():
def test_adam(): def test_adam():
class CheckValue: class CheckValue:
def __init__(self, net, **kwarg): def __init__(self, net, **kwarg):
self.m_slots = TensorDict()
self.v_slots = TensorDict()
self.m_slots = {}
self.v_slots = {}
for param in net.parameters(): for param in net.parameters():
self.m_slots[param] = np.zeros(param.shape).astype(np.float32) self.m_slots[param] = np.zeros(param.shape).astype(np.float32)
self.v_slots[param] = np.zeros(param.shape).astype(np.float32) self.v_slots[param] = np.zeros(param.shape).astype(np.float32)
@@ -175,7 +175,7 @@ def test_adam():
def test_adagrad(): def test_adagrad():
class CheckValue: class CheckValue:
def __init__(self, net, **kwarg): def __init__(self, net, **kwarg):
self.s_slots = TensorDict()
self.s_slots = {}
for param in net.parameters(): for param in net.parameters():
self.s_slots[param] = np.zeros(param.shape).astype(np.float32) self.s_slots[param] = np.zeros(param.shape).astype(np.float32)
for k, v in kwarg.items(): for k, v in kwarg.items():
@@ -207,8 +207,8 @@ def test_adagrad():
def test_adadelta(): def test_adadelta():
class CheckValue: class CheckValue:
def __init__(self, net, **kwarg): def __init__(self, net, **kwarg):
self.s_slots = TensorDict()
self.a_slots = TensorDict()
self.s_slots = {}
self.a_slots = {}
for param in net.parameters(): for param in net.parameters():
self.s_slots[param] = np.zeros(param.shape).astype(np.float32) self.s_slots[param] = np.zeros(param.shape).astype(np.float32)
self.a_slots[param] = np.zeros(param.shape).astype(np.float32) self.a_slots[param] = np.zeros(param.shape).astype(np.float32)


+ 3
- 3
imperative/python/test/integration/test_save_load.py View File

@@ -23,11 +23,11 @@ def test_save_load():


optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
optim.clear_grad() optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


data = tensor([2.34]) data = tensor([2.34])


with gm.record():
with gm:
loss = net(data) loss = net(data)
gm.backward(loss) gm.backward(loss)


@@ -55,7 +55,7 @@ def test_save_load():
optim.load_state_dict(checkpoint["opt_state"]) optim.load_state_dict(checkpoint["opt_state"])
print("load done") print("load done")


with gm.record():
with gm:
loss = net([1.23]) loss = net([1.23])
gm.backward(loss) gm.backward(loss)




+ 5
- 5
imperative/python/test/integration/test_sgd_momentum.py View File

@@ -31,12 +31,12 @@ def test_sgd_momentum():


optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
optim.clear_grad() optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())


data = tensor([2.34]) data = tensor([2.34])


# do a step of train # do a step of train
with gm.record():
with gm:
loss = net(data) loss = net(data)
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -51,7 +51,7 @@ def test_sgd_momentum():


# do a step of train # do a step of train
optim.clear_grad() optim.clear_grad()
with gm.record():
with gm:
loss = net(data) loss = net(data)
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -69,7 +69,7 @@ def test_sgd_momentum_trace():
@trace(symbolic=symbolic) @trace(symbolic=symbolic)
def train_func(data, *, model=None, optim=None, gm=None): def train_func(data, *, model=None, optim=None, gm=None):
optim.clear_grad() optim.clear_grad()
with gm.record():
with gm:
loss = net(data) loss = net(data)
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -82,7 +82,7 @@ def test_sgd_momentum_trace():


net = Simple() net = Simple()
optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = tensor([2.34]) data = tensor([2.34])
train_func(data, model=net, optim=optim, gm=gm) train_func(data, model=net, optim=optim, gm=gm)
np.testing.assert_almost_equal( np.testing.assert_almost_equal(


+ 3
- 3
imperative/python/test/integration/test_trace_dump.py View File

@@ -61,15 +61,15 @@ class XORNet(M.Module):


def test_xornet_trace_dump(): def test_xornet_trace_dump():
net = XORNet() net = XORNet()
opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9)
gm = GradManager().register(net.parameters(requires_grad=True))
opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
gm = GradManager().attach(net.parameters())
batch_size = 64 batch_size = 64
train_dataset = minibatch_generator(batch_size) train_dataset = minibatch_generator(batch_size)
val_dataset = minibatch_generator(batch_size) val_dataset = minibatch_generator(batch_size)


@trace @trace
def train_fun(data, label): def train_fun(data, label):
with gm.record():
with gm:
net.train() net.train()
pred = net(data) pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label) loss = F.cross_entropy_with_softmax(pred, label)


+ 4
- 4
imperative/python/test/unit/functional/test_functional.py View File

@@ -14,7 +14,7 @@ import pytest
import megengine.core.ops.builtin as builtin import megengine.core.ops.builtin as builtin
import megengine.core.tensor.dtype as dtype import megengine.core.tensor.dtype as dtype
import megengine.functional as F import megengine.functional as F
from megengine import Buffer, Parameter, is_cuda_available, tensor
from megengine import Parameter, Tensor, is_cuda_available, tensor
from megengine.core._trace_option import use_tensor_shape from megengine.core._trace_option import use_tensor_shape
from megengine.core.autodiff.grad import Grad from megengine.core.autodiff.grad import Grad
from megengine.core.tensor.utils import make_shape_tuple from megengine.core.tensor.utils import make_shape_tuple
@@ -330,7 +330,7 @@ def test_roi_pooling():
def test_add_update(): def test_add_update():
shape = (2, 3) shape = (2, 3)
v = np.random.random(shape).astype(np.float32) v = np.random.random(shape).astype(np.float32)
b = Buffer(v)
b = Tensor(v)


u = F.add_update(b, 1) u = F.add_update(b, 1)
assertTensorClose(u.numpy(), v + 1) assertTensorClose(u.numpy(), v + 1)
@@ -347,7 +347,7 @@ def test_add_update():


def test_add_update_params(): def test_add_update_params():
b = np.random.random((2, 3)).astype(np.float32) b = np.random.random((2, 3)).astype(np.float32)
y = Buffer(b)
y = Tensor(b)


# @jit.trace # @jit.trace
def f(x): def f(x):
@@ -355,7 +355,7 @@ def test_add_update_params():


f(np.zeros((2, 3)).astype(np.float32)) f(np.zeros((2, 3)).astype(np.float32))


z = Buffer(np.zeros((2, 3)).astype(np.float32))
z = Tensor(np.zeros((2, 3)).astype(np.float32))
F.add_update(y, z, beta=0.1) F.add_update(y, z, beta=0.1)


res = f(np.ones((2, 3)).astype(np.float32)) res = f(np.ones((2, 3)).astype(np.float32))


+ 1
- 1
imperative/python/test/unit/functional/test_tensor.py View File

@@ -12,7 +12,7 @@ import numpy as np
import pytest import pytest


import megengine.functional as F import megengine.functional as F
from megengine import Buffer, Parameter, is_cuda_available, tensor
from megengine import tensor
from megengine.core._trace_option import use_tensor_shape from megengine.core._trace_option import use_tensor_shape
from megengine.core.tensor.utils import astensor1d from megengine.core.tensor.utils import astensor1d
from megengine.distributed.helper import get_device_count_by_fork from megengine.distributed.helper import get_device_count_by_fork


+ 14
- 33
imperative/python/test/unit/module/test_batchnorm.py View File

@@ -14,10 +14,9 @@ import pytest


import megengine as mge import megengine as mge
import megengine.distributed as dist import megengine.distributed as dist
from megengine import tensor
from megengine import Tensor
from megengine.core._trace_option import use_tensor_shape from megengine.core._trace_option import use_tensor_shape
from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm
from megengine.tensor import Tensor
from megengine.test import assertTensorClose from megengine.test import assertTensorClose




@@ -45,10 +44,8 @@ def test_syncbn():
return return
dist.init_process_group("localhost", port, nr_ranks, rank, rank) dist.init_process_group("localhost", port, nr_ranks, rank, rank)
bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps)
data_tensor = tensor([])
for i in range(steps): for i in range(steps):
data_tensor.set_value(data[i])
yv = bn(data_tensor)
yv = bn(Tensor(data[i]))


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6) assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6)
@@ -105,7 +102,6 @@ def test_batchnorm():
bn = BatchNorm1d(nr_chan, momentum=momentum) bn = BatchNorm1d(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32) running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
data = tensor([])
for i in range(3): for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
@@ -120,8 +116,7 @@ def test_batchnorm():
running_mean = running_mean * momentum + mean * (1 - momentum) running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -137,7 +132,7 @@ def test_batchnorm():
var_backup = bn.running_var.numpy() var_backup = bn.running_var.numpy()
bn.training = False bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data) yv1 = bn(data)
yv2 = bn(data) yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
@@ -161,7 +156,6 @@ def test_syncbn1d():
bn = SyncBatchNorm(nr_chan, momentum=momentum) bn = SyncBatchNorm(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32) running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
data = tensor([])
for i in range(3): for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
@@ -176,8 +170,7 @@ def test_syncbn1d():
running_mean = running_mean * momentum + mean * (1 - momentum) running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -193,7 +186,7 @@ def test_syncbn1d():
var_backup = bn.running_var.numpy() var_backup = bn.running_var.numpy()
bn.training = False bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data) yv1 = bn(data)
yv2 = bn(data) yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
@@ -210,7 +203,6 @@ def test_batchnorm2d():
bn = BatchNorm2d(nr_chan, momentum=momentum) bn = BatchNorm2d(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32) running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
data = tensor([])
for i in range(3): for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape( xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
@@ -226,8 +218,7 @@ def test_batchnorm2d():
running_mean = running_mean * momentum + mean * (1 - momentum) running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -239,7 +230,7 @@ def test_batchnorm2d():
var_backup = bn.running_var.numpy() var_backup = bn.running_var.numpy()
bn.training = False bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data) yv1 = bn(data)
yv2 = bn(data) yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
@@ -263,7 +254,6 @@ def test_syncbn2d():
bn = SyncBatchNorm(nr_chan, momentum=momentum) bn = SyncBatchNorm(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32) running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
data = tensor([])
for i in range(3): for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape( xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
@@ -279,8 +269,7 @@ def test_syncbn2d():
running_mean = running_mean * momentum + mean * (1 - momentum) running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -292,7 +281,7 @@ def test_syncbn2d():
var_backup = bn.running_var.numpy() var_backup = bn.running_var.numpy()
bn.training = False bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data) yv1 = bn(data)
yv2 = bn(data) yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
@@ -306,7 +295,6 @@ def test_batchnorm_no_stats():
nr_chan = 8 nr_chan = 8
data_shape = (3, nr_chan, 4) data_shape = (3, nr_chan, 4)
bn = BatchNorm1d(8, track_running_stats=False) bn = BatchNorm1d(8, track_running_stats=False)
data = tensor([])
for i in range(4): for i in range(4):
if i == 2: if i == 2:
bn.training = False bn.training = False
@@ -320,8 +308,7 @@ def test_batchnorm_no_stats():
).reshape((1, nr_chan, 1)) ).reshape((1, nr_chan, 1))
sd = np.sqrt(var + bn.eps) sd = np.sqrt(var + bn.eps)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -338,7 +325,6 @@ def test_syncbn_no_stats():
nr_chan = 8 nr_chan = 8
data_shape = (3, nr_chan, 4) data_shape = (3, nr_chan, 4)
bn = SyncBatchNorm(8, track_running_stats=False) bn = SyncBatchNorm(8, track_running_stats=False)
data = tensor([])
for i in range(4): for i in range(4):
if i == 2: if i == 2:
bn.training = False bn.training = False
@@ -352,8 +338,7 @@ def test_syncbn_no_stats():
).reshape((1, nr_chan, 1)) ).reshape((1, nr_chan, 1))
sd = np.sqrt(var + bn.eps) sd = np.sqrt(var + bn.eps)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -363,7 +348,6 @@ def test_batchnorm2d_no_stats():
nr_chan = 8 nr_chan = 8
data_shape = (3, nr_chan, 16, 16) data_shape = (3, nr_chan, 16, 16)
bn = BatchNorm2d(8, track_running_stats=False) bn = BatchNorm2d(8, track_running_stats=False)
data = tensor([])
for i in range(4): for i in range(4):
if i == 2: if i == 2:
bn.training = False bn.training = False
@@ -376,8 +360,7 @@ def test_batchnorm2d_no_stats():
var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
sd = np.sqrt(var + bn.eps) sd = np.sqrt(var + bn.eps)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
@@ -394,7 +377,6 @@ def test_syncbn2d_no_stats():
nr_chan = 8 nr_chan = 8
data_shape = (3, nr_chan, 16, 16) data_shape = (3, nr_chan, 16, 16)
bn = SyncBatchNorm(8, track_running_stats=False) bn = SyncBatchNorm(8, track_running_stats=False)
data = tensor([])
for i in range(4): for i in range(4):
if i == 2: if i == 2:
bn.training = False bn.training = False
@@ -407,8 +389,7 @@ def test_syncbn2d_no_stats():
var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
sd = np.sqrt(var + bn.eps) sd = np.sqrt(var + bn.eps)


data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd yv_expect = (xv - mean) / sd


assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)

+ 5
- 5
imperative/python/test/unit/module/test_external.py View File

@@ -12,7 +12,7 @@ import numpy as np
import pytest import pytest


import megengine as mge import megengine as mge
from megengine import tensor
from megengine import Tensor
from megengine.module import Module from megengine.module import Module




@@ -35,12 +35,12 @@ def test_cambricon_module():
with open(model, "rb") as f: with open(model, "rb") as f:
data = f.read() data = f.read()
m = MyModule(data) m = MyModule(data)
inputs = []
inputs.append(tensor(data=[], dtype=np.float16, device="cambricon0"))
inputs[0].set_value(np.random.normal(size=(1, 64, 32, 32)).astype(np.float16))
inp = Tensor(
np.random.normal((1, 64, 32, 32)).astype(np.float16), device="cambricon0"
)


def inference(inps): def inference(inps):
pred = m(inps) pred = m(inps)
return pred return pred


pred = inference(inputs)
pred = inference([inp])

+ 5
- 8
imperative/python/test/unit/module/test_module.py View File

@@ -16,7 +16,7 @@ import pytest


import megengine as mge import megengine as mge
import megengine.functional as F import megengine.functional as F
from megengine import Buffer, Parameter, Tensor, tensor
from megengine import Parameter, Tensor, tensor
from megengine.module import ( from megengine.module import (
BatchNorm1d, BatchNorm1d,
BatchNorm2d, BatchNorm2d,
@@ -196,7 +196,7 @@ class MyModule(Module):
self.i = self.InnerModule() self.i = self.InnerModule()
self.bn = BatchNorm2d(4) self.bn = BatchNorm2d(4)
self.param = Parameter(np.ones(1, dtype=np.float32)) self.param = Parameter(np.ones(1, dtype=np.float32))
self.buff = Buffer(np.ones(1, dtype=np.float32))
self.buff = Tensor(np.ones(1, dtype=np.float32))


def forward(self, x): def forward(self, x):
x = self.i(x) x = self.i(x)
@@ -464,8 +464,7 @@ def test_sequential_named_children():


def test_state_dict(): def test_state_dict():
data_shape = (2, 28) data_shape = (2, 28)
data = tensor([])
data.set_value(np.random.random(data_shape))
data = tensor(np.random.random(data_shape))
mlp = MLP() mlp = MLP()
pred0 = mlp(data) pred0 = mlp(data)


@@ -542,8 +541,7 @@ def test_shared_param():


def test_pickle_module(): def test_pickle_module():
data_shape = (2, 28) data_shape = (2, 28)
data = tensor([])
data.set_value(np.random.random(data_shape))
data = tensor(np.random.random(data_shape))
mlp = MLP() mlp = MLP()
# pickle before forward # pickle before forward
with BytesIO() as fout: with BytesIO() as fout:
@@ -568,8 +566,7 @@ def test_pickle_module():
@pytest.mark.skip(reason="under development") @pytest.mark.skip(reason="under development")
def test_dump_model(): def test_dump_model():
data_shape = (2, 28) data_shape = (2, 28)
data = tensor([])
data.set_value(np.random.random(data_shape))
data = Tensor(np.random.random(data_shape))
mlp = MLP() mlp = MLP()
pred = mlp(data) pred = mlp(data)
f = tempfile.NamedTemporaryFile(delete=False) f = tempfile.NamedTemporaryFile(delete=False)


+ 3
- 3
imperative/python/test/unit/module/test_tensor.py View File

@@ -13,7 +13,7 @@ import pytest


import megengine as mge import megengine as mge
import megengine.functional as F import megengine.functional as F
from megengine import Buffer, Parameter
from megengine import Parameter, Tensor
from megengine.module import Conv2d from megengine.module import Conv2d
from megengine.test import assertTensorClose from megengine.test import assertTensorClose


@@ -33,7 +33,7 @@ def test_set_value():


@pytest.mark.skip(reason="fill unsupported") @pytest.mark.skip(reason="fill unsupported")
def test_fill(): def test_fill():
a = Buffer(np.zeros((2, 3), dtype=np.float32))
a = Tensor(np.zeros((2, 3), dtype=np.float32))
a.fill(3) a.fill(3)
assertTensorClose(a.numpy(), np.full((2, 3), 3, dtype=np.float32)) assertTensorClose(a.numpy(), np.full((2, 3), 3, dtype=np.float32))
a.fill(124.568) a.fill(124.568)
@@ -80,7 +80,7 @@ def test_fill():
# def test_shape_warning(): # def test_shape_warning():
# with Graph() as cg: # with Graph() as cg:
# cg.set_option("eager_evaluation", False) # cg.set_option("eager_evaluation", False)
# b = Buffer(np.ones((2, 3)).astype(np.float32))
# b = Tensor(np.ones((2, 3)).astype(np.float32))
# with pytest.warns(None) as record: # with pytest.warns(None) as record:
# print(b.shape) # print(b.shape)
# if len(record) != 0: # if len(record) != 0:


+ 14
- 14
imperative/python/test/unit/test_function.py View File

@@ -42,11 +42,11 @@ def test_single_input():
return x return x


net = Simple(av) net = Simple(av)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0) opt = optimizer.SGD(net.parameters(), lr=1.0)


opt.clear_grad() opt.clear_grad()
with gm.record():
with gm:
loss = net() loss = net()
gm.backward(loss.sum()) gm.backward(loss.sum())
opt.step() opt.step()
@@ -81,11 +81,11 @@ def test_multi_input():
return x return x


net = Simple(av, bv) net = Simple(av, bv)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0) opt = optimizer.SGD(net.parameters(), lr=1.0)


opt.clear_grad() opt.clear_grad()
with gm.record():
with gm:
loss = net() loss = net()
gm.backward(loss.sum()) gm.backward(loss.sum())
opt.step() opt.step()
@@ -121,11 +121,11 @@ def test_multi_output():
return x + y return x + y


net = Simple(av, bv) net = Simple(av, bv)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0) opt = optimizer.SGD(net.parameters(), lr=1.0)


opt.clear_grad() opt.clear_grad()
with gm.record():
with gm:
loss = net() loss = net()
gm.backward(loss.sum()) gm.backward(loss.sum())
opt.step() opt.step()
@@ -163,9 +163,9 @@ def test_skip_invalid_grad():


net = Simple(av, bv) net = Simple(av, bv)
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad() optim.clear_grad()
with gm.record():
with gm:
loss = net().sum() loss = net().sum()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -198,10 +198,10 @@ def test_ste():
av = np.random.random(data_shape).astype(np.float32) av = np.random.random(data_shape).astype(np.float32)
net = Simple(av) net = Simple(av)
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad() optim.clear_grad()


with gm.record():
with gm:
loss = net() loss = net()
gm.backward(loss.sum()) gm.backward(loss.sum())
optim.step() optim.step()
@@ -256,9 +256,9 @@ def test_none_in_out_grad():
b = tensor(np.array([2.0], dtype=np.float32)) b = tensor(np.array([2.0], dtype=np.float32))
net = Simple(a, b) net = Simple(a, b)
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad() optim.clear_grad()
with gm.record():
with gm:
loss, _ = net() loss, _ = net()
gm.backward(loss) gm.backward(loss)
optim.step() optim.step()
@@ -293,10 +293,10 @@ def test_zero_grad():
a = tensor(np.array([1.0], dtype=np.float32)) a = tensor(np.array([1.0], dtype=np.float32))
net = Simple(a) net = Simple(a)
optim = optimizer.SGD(net.parameters(), lr=1.0) optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad() optim.clear_grad()


with gm.record():
with gm:
loss = net() loss = net()
gm.backward(loss.sum()) gm.backward(loss.sum())
optim.step() optim.step()


+ 1
- 1
imperative/python/test/unit/test_indexing_op.py View File

@@ -38,7 +38,7 @@ def cvt_to_shape_desc(val, inpvar, config=None):
if isinstance(val, RawTensor): if isinstance(val, RawTensor):
return as_tensor(val, device) return as_tensor(val, device)


if not isinstance(val, collections.Iterable):
if not isinstance(val, collections.abc.Iterable):
val = [val] val = [val]


components = [] components = []


+ 8
- 9
imperative/python/test/unit/test_serialization.py View File

@@ -12,19 +12,18 @@ from tempfile import TemporaryFile
import numpy as np import numpy as np


import megengine as mge import megengine as mge
from megengine import Buffer, Parameter, tensor
from megengine import Parameter, Tensor




def test_tensor_serialization(): def test_tensor_serialization():
def tensor_eq(a, b): def tensor_eq(a, b):
assert a.dtype == b.dtype assert a.dtype == b.dtype
assert a.device == b.device assert a.device == b.device
assert a.requires_grad == b.requires_grad
np.testing.assert_equal(a.numpy(), b.numpy()) np.testing.assert_equal(a.numpy(), b.numpy())


with TemporaryFile() as f: with TemporaryFile() as f:
data = np.random.randint(low=0, high=7, size=[233]) data = np.random.randint(low=0, high=7, size=[233])
a = tensor(data, device="xpux", dtype=np.int32)
a = Tensor(data, device="xpux", dtype=np.int32)
pickle.dump(a, f) pickle.dump(a, f)
f.seek(0) f.seek(0)
b = pickle.load(f) b = pickle.load(f)
@@ -39,19 +38,19 @@ def test_tensor_serialization():
np.testing.assert_equal(a.numpy(), b.numpy()) np.testing.assert_equal(a.numpy(), b.numpy())


with TemporaryFile() as f: with TemporaryFile() as f:
a = Buffer(np.random.random(size=(2, 233)).astype(np.float32))
a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
pickle.dump(a, f) pickle.dump(a, f)
f.seek(0) f.seek(0)
b = pickle.load(f) b = pickle.load(f)
assert isinstance(b, Buffer)
assert type(b) is Tensor
np.testing.assert_equal(a.numpy(), b.numpy()) np.testing.assert_equal(a.numpy(), b.numpy())


with TemporaryFile() as f: with TemporaryFile() as f:
a = Buffer(np.random.random(size=(2, 233)).astype(np.float32))
a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
mge.save(a, f) mge.save(a, f)
f.seek(0) f.seek(0)
b = mge.load(f, map_location="cpux") b = mge.load(f, map_location="cpux")
assert isinstance(b, Buffer)
assert type(b) is Tensor
assert "cpu" in str(b.device) assert "cpu" in str(b.device)
np.testing.assert_equal(a.numpy(), b.numpy()) np.testing.assert_equal(a.numpy(), b.numpy())


@@ -59,12 +58,12 @@ def test_tensor_serialization():
if mge.is_cuda_available(): if mge.is_cuda_available():
device_org = mge.get_default_device() device_org = mge.get_default_device()
mge.set_default_device("gpu0") mge.set_default_device("gpu0")
a = Buffer(np.random.random(size=(2, 233)).astype(np.float32))
a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
mge.save(a, f) mge.save(a, f)
f.seek(0) f.seek(0)
mge.set_default_device("cpux") mge.set_default_device("cpux")
b = mge.load(f, map_location={"gpu0": "cpu0"}) b = mge.load(f, map_location={"gpu0": "cpu0"})
assert isinstance(b, Buffer)
assert type(b) is Tensor
assert "cpu0" in str(b.device) assert "cpu0" in str(b.device)
np.testing.assert_equal(a.numpy(), b.numpy()) np.testing.assert_equal(a.numpy(), b.numpy())
mge.set_default_device(device_org) mge.set_default_device(device_org)

+ 1
- 1
sdk/xor-deploy/xornet.py View File

@@ -66,7 +66,7 @@ def main():
mge.set_default_device("cpux") mge.set_default_device("cpux")


net = XORNet() net = XORNet()
opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9)
opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
batch_size = 64 batch_size = 64
train_dataset = minibatch_generator(batch_size) train_dataset = minibatch_generator(batch_size)
val_dataset = minibatch_generator(batch_size) val_dataset = minibatch_generator(batch_size)


Loading…
Cancel
Save