@@ -11,38 +11,37 @@ from ..core.tensor import amp | |||
class autocast: | |||
r""" | |||
A class to control autocast mode for amp as a context manager or a decorator. | |||
r"""A class to control autocast mode for amp as a context manager or a decorator. | |||
:param enabled: Whether autocast mode is enabled. | |||
:param low_prec_dtype: Set amp autocast mode's lower precision dtype. It will change | |||
the target dtype in tensor casting for better speed and memory. Default: float16. | |||
:param high_prec_dtype: Set amp autocast mode's higher precision dtype. It will | |||
change the target dtype in tensor casting for better precision. Default: float32. | |||
Args: | |||
enabled: Whether autocast mode is enabled. | |||
low_prec_dtype: Set amp autocast mode's lower precision dtype. It will change | |||
the target dtype in tensor casting for better speed and memory. Default: float16. | |||
high_prec_dtype: Set amp autocast mode's higher precision dtype. It will | |||
change the target dtype in tensor casting for better precision. Default: float32. | |||
Examples: | |||
.. code-block:: | |||
.. code-block:: | |||
# used as decorator | |||
@autocast() | |||
def train_step(image, label): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
gm.backward(loss) | |||
opt.step().clear_grad() | |||
return loss | |||
# used as decorator | |||
@autocast() | |||
def train_step(image, label): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
gm.backward(loss) | |||
opt.step().clear_grad() | |||
return loss | |||
# used as context manager | |||
def train_step(image, label): | |||
with autocast(): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
gm.backward(loss) | |||
opt.step().clear_grad() | |||
return loss | |||
# used as context manager | |||
def train_step(image, label): | |||
with autocast(): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
gm.backward(loss) | |||
opt.step().clear_grad() | |||
return loss | |||
""" | |||
def __init__( | |||
@@ -16,50 +16,51 @@ from ..tensor import Tensor | |||
class GradScaler: | |||
r""" | |||
A helper class that performs grad scaling to prevent from data overflow in | |||
r"""A helper class that performs grad scaling to prevent from data overflow in | |||
:class:`~.autocast` mode. | |||
:param init_scale: Initial scale factor. | |||
:param growth_factor: Factor that the scale is multiplied by in actual | |||
:meth:`update` stage. If growth_factor is 0, scale_factor will not update. | |||
:param backoff_factor: Factor that the scale is multiplied by when encountering | |||
overflow grad. | |||
:param growth_interval: The interval between two scale update stages. | |||
Example:: | |||
gm = GradManager() | |||
opt = ... | |||
scaler = GradScaler() | |||
gm.attach(model.parameters()) | |||
@autocast() | |||
def train_step(image, label): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
scaler.backward(gm, loss) | |||
opt.step().clear_grad() | |||
return loss | |||
If need more flexible usage, could split ``scaler.backward`` into three lines: | |||
.. code-block:: | |||
@autocast() | |||
def train_step(image, label): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
gm.backward(loss, dy=megengine.tensor(scaler.scale_factor)) | |||
scaler.unscale(gm.attached_tensors()) | |||
scaler.update() | |||
opt.step().clear_grad() | |||
return loss | |||
This is useful when need to accumulate grads for multi batches. | |||
Args: | |||
init_scale: Initial scale factor. | |||
growth_factor: Factor that the scale is multiplied by in actual | |||
:meth:`update` stage. If growth_factor is 0, scale_factor will not update. | |||
backoff_factor: Factor that the scale is multiplied by when encountering | |||
overflow grad. | |||
growth_interval: The interval between two scale update stages. | |||
Example: | |||
.. code-block:: | |||
gm = GradManager() | |||
opt = ... | |||
scaler = GradScaler() | |||
gm.attach(model.parameters()) | |||
@autocast() | |||
def train_step(image, label): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
scaler.backward(gm, loss) | |||
opt.step().clear_grad() | |||
return loss | |||
If need more flexible usage, could split ``scaler.backward`` into three lines: | |||
.. code-block:: | |||
@autocast() | |||
def train_step(image, label): | |||
with gm: | |||
logits = model(image) | |||
loss = F.nn.cross_entropy(logits, label) | |||
gm.backward(loss, dy=megengine.tensor(scaler.scale_factor)) | |||
scaler.unscale(gm.attached_tensors()) | |||
scaler.update() | |||
opt.step().clear_grad() | |||
return loss | |||
This is useful when need to accumulate grads for multi batches. | |||
""" | |||
def __init__( | |||
@@ -86,18 +87,18 @@ class GradScaler: | |||
unscale_grad: bool = True, | |||
update_scale: bool = "if_unscale_grad" | |||
): | |||
r""" | |||
A wrapper of GradManager's :meth:`~.GradManager.backward`, used to scale | |||
r"""A wrapper of GradManager's :meth:`~.GradManager.backward`, used to scale | |||
``y``'s grad and unscale parameters' grads. | |||
:param gm: The to be wrapped GradManager. | |||
:param y: Same as GradManager backward's ``y``. | |||
:param dy: Same as GradManager backward's ``dy``. Will be multiplied | |||
by ``scale_factor``. | |||
:param unscale_grad: Whether do :meth:`unscale` at the same time. Could be | |||
``False`` if needs to accumulate grads. | |||
:param update_scale: Same as :meth:`unscale`'s ``update``. Will be ignored | |||
if ``unscale_grad`` is ``False``. | |||
Args: | |||
gm: The to be wrapped GradManager. | |||
y: Same as GradManager backward's ``y``. | |||
dy: Same as GradManager backward's ``dy``. Will be multiplied | |||
by ``scale_factor``. | |||
unscale_grad: Whether do :meth:`unscale` at the same time. Could be | |||
``False`` if needs to accumulate grads. | |||
update_scale: Same as :meth:`unscale`'s ``update``. Will be ignored | |||
if ``unscale_grad`` is ``False``. | |||
""" | |||
# These checks should be consistent with GradManager's | |||
if y is None: | |||
@@ -121,11 +122,11 @@ class GradScaler: | |||
self.update() | |||
def unscale(self, grad_tensors: Iterable[Tensor]): | |||
r""" | |||
Unscale all ``grad_tensors``'s grad. | |||
r"""Unscale all ``grad_tensors``'s grad. | |||
:param grad_tensors: Tensors needed to unscale grads. Should be all tensors | |||
that are affected by ``target`` tensor in GradManager's backward. | |||
Args: | |||
grad_tensors: Tensors needed to unscale grads. Should be all tensors | |||
that are affected by ``target`` tensor in GradManager's backward. | |||
""" | |||
# use float64 for better precision | |||
inv_scale = Tensor(1.0 / self.scale_factor) | |||
@@ -151,7 +152,8 @@ class GradScaler: | |||
def update(self, new_scale: float = None): | |||
r"""Update the scale factor according to whether encountered overflow grad. | |||
If ``new_scale`` is provided, internal update mechanism will be ignored.""" | |||
If ``new_scale`` is provided, internal update mechanism will be ignored. | |||
""" | |||
if self.growth_interval == 0: | |||
return | |||
@@ -32,8 +32,7 @@ _global_priority = 0 | |||
class GradManager: | |||
r""" | |||
GradManager computes gradients or more generally, vector-Jacobian product, by reverse mode | |||
r"""GradManager computes gradients or more generally, vector-Jacobian product, by reverse mode | |||
automatic differentiation (a.k.a. back propagation). | |||
Reverse mode autodiff normally reuses many intermediate tensors for best computation efficiency. | |||
@@ -120,7 +119,6 @@ class GradManager: | |||
gm = GradManager() | |||
gm.attach(model.parameters(), callback=dist.make_allreduce_cb("MEAN")) | |||
""" | |||
def __init__(self): | |||
@@ -136,8 +134,7 @@ class GradManager: | |||
return [spec.tensor() for spec in self._attach_specs.values()] | |||
def attach(self, tensors: Iterable[Tensor], callbacks=None): | |||
r""" | |||
Instruct GradManager to track operations on tensors, so that gradients with respect | |||
r"""Instruct GradManager to track operations on tensors, so that gradients with respect | |||
to those tensors could be evaluated later. | |||
:meth:`attach` also accepts a list of callbacks, which will be called with the tensor and | |||
@@ -188,8 +185,9 @@ class GradManager: | |||
multiple uses of a GradManager, which is unrelated to whether resources is timely | |||
released within a single use. | |||
:param tensors: tensor or list of tensors to track | |||
:param callbacks: callback or list of callbacks | |||
Args: | |||
tensors: tensor or list of tensors to track | |||
callbacks: callback or list of callbacks | |||
""" | |||
if callbacks is None: | |||
callbacks = [] | |||
@@ -234,8 +232,7 @@ class GradManager: | |||
y: Union[Tensor, List[Tensor]] = None, | |||
dy: Union[Tensor, List[Tensor]] = None, | |||
): | |||
r""" | |||
Compute gradients (or vector-Jacobian product) for all attached tensors, accumulate to | |||
r"""Compute gradients (or vector-Jacobian product) for all attached tensors, accumulate to | |||
corresponding .grad attribute, and release resources along the way. | |||
:meth:`backward` computes the vector-Jacobian product :math:`dx_j = \sum_{i} dy_i J_{ij}` | |||
@@ -257,8 +254,9 @@ class GradManager: | |||
process of this call. When the call successfully finishes, the GradManager will be put back | |||
to an inactive state. | |||
:param y: tensor or list of tensors | |||
:param dy: tensor or list of tensors. Defaults to 1 if y is scalar | |||
Args: | |||
y: tensor or list of tensors | |||
dy: tensor or list of tensors. Defaults to 1 if y is scalar | |||
""" | |||
push_scope("backward") | |||
set_option("record_computing_path", 0) | |||
@@ -310,8 +308,7 @@ class GradManager: | |||
pop_scope("backward") | |||
def record(self): | |||
r""" | |||
Start recording operations | |||
r"""Start recording operations | |||
After this call, you will be able to call :meth:`backward`. | |||
""" | |||
@@ -342,8 +339,7 @@ class GradManager: | |||
self._grad.wrt(tensor, callback=callback) | |||
def release(self): | |||
r""" | |||
Stop recording operations and release resources kept for gradient computation | |||
r"""Stop recording operations and release resources kept for gradient computation | |||
After this call, you will not be able to call :meth:`backward`. | |||
""" | |||
@@ -15,16 +15,12 @@ if os.environ.get("MEGENGINE_USE_SYMBOLIC_SHAPE"): | |||
def use_symbolic_shape() -> bool: | |||
""" | |||
Returns whether tensor.shape returns a tensor instead of a tuple | |||
""" | |||
r"""Returns whether tensor.shape returns a tensor instead of a tuple""" | |||
return _use_symbolic_shape | |||
def set_symbolic_shape(option: bool): | |||
""" Sets whether tensor.shape returns a tensor instead of a tuple | |||
""" | |||
r"""Sets whether tensor.shape returns a tensor instead of a tuple""" | |||
global _use_symbolic_shape | |||
_org = _use_symbolic_shape | |||
_use_symbolic_shape = option | |||
@@ -88,67 +88,56 @@ class Grad: | |||
class Function(ops.PyOpBase): | |||
""" | |||
Defines a block of operations with customizable differentiation. | |||
r"""Defines a block of operations with customizable differentiation. | |||
The computation should be defined in ``forward`` method, with gradient | |||
computation defined in ``backward`` method. | |||
Each instance of ``Function`` should be used only once during forwardding. | |||
Examples: | |||
.. code-block:: | |||
class Sigmoid(Function): | |||
def forward(self, x): | |||
y = 1 / (1 + F.exp(-x)) | |||
self.y = y | |||
return y | |||
def backward(self, dy): | |||
y = self.y | |||
return dy * y * (1-y) | |||
.. code-block:: | |||
class Sigmoid(Function): | |||
def forward(self, x): | |||
y = 1 / (1 + F.exp(-x)) | |||
self.y = y | |||
return y | |||
def backward(self, dy): | |||
y = self.y | |||
""" | |||
def forward(self, *args, **kwargs): | |||
""" | |||
Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | |||
:param input: input tensors. | |||
:return: a tuple of Tensor or a single Tensor. | |||
.. note:: | |||
This method should return a tuple of Tensor or a single Tensor representing the output | |||
of the function. | |||
.. note:: | |||
positional arguments should all be Tensor | |||
r"""Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | |||
Args: | |||
input: input tensors. | |||
Returns: | |||
a tuple of Tensor or a single Tensor. | |||
Note: | |||
* This method should return a tuple of Tensor or a single Tensor representing the output | |||
of the function. | |||
* positional arguments should all be Tensor | |||
""" | |||
raise NotImplementedError | |||
def backward(self, *output_grads): | |||
""" | |||
Compute the gradient of the forward function. It must be overriden by all subclasses. | |||
:param output_grads: gradients of outputs that are returned by :meth:`forward`. | |||
.. note:: | |||
In case when some tensors of outputs are not related to loss function, the corresponding | |||
values in ``output_grads`` would be ``None``. | |||
.. note:: | |||
This method should return a tuple which containing the gradients of all inputs, in the same order | |||
as the ``inputs`` argument of :meth:`forward` . A ``Tensor`` could be returned | |||
instead if there is only one input. If users want to stop the propagation of some gradients, | |||
the corresponding returned values should be set ``None`` . | |||
r"""Compute the gradient of the forward function. It must be overriden by all subclasses. | |||
Args: | |||
output_grads: gradients of outputs that are returned by :meth:`forward`. | |||
Note: | |||
* In case when some tensors of outputs are not related to loss function, the corresponding | |||
values in ``output_grads`` would be ``None``. | |||
* This method should return a tuple which containing the gradients of all inputs, in the same order | |||
as the ``inputs`` argument of :meth:`forward` . A ``Tensor`` could be returned | |||
instead if there is only one input. If users want to stop the propagation of some gradients, | |||
the corresponding returned values should be set ``None`` . | |||
""" | |||
raise NotImplementedError | |||
@@ -12,16 +12,14 @@ _low_prec_dtype = "float16" | |||
@property | |||
def enabled(mod): | |||
r""" | |||
Get or set amp autocast mode enabled or not. | |||
r"""Get or set amp autocast mode enabled or not. | |||
Examples: | |||
.. code-block:: | |||
.. code-block:: | |||
import megengine as mge | |||
mge.amp.enabled = True | |||
import megengine as mge | |||
mge.amp.enabled = True | |||
""" | |||
return _enabled | |||
@@ -34,17 +32,15 @@ def enabled(mod, enabled: bool): | |||
@property | |||
def high_prec_dtype(mod): | |||
r""" | |||
Get or set amp autocast mode's higher precision dtype. It will change the | |||
r"""Get or set amp autocast mode's higher precision dtype. It will change the | |||
target dtype in tensor casting for better precision. Default: float32. | |||
Examples: | |||
.. code-block:: | |||
.. code-block:: | |||
import megengine as mge | |||
mge.amp.high_prec_dtype = "float32" | |||
import megengine as mge | |||
mge.amp.high_prec_dtype = "float32" | |||
""" | |||
return _high_prec_dtype | |||
@@ -57,17 +53,15 @@ def high_prec_dtype(mod, dtype: str): | |||
@property | |||
def low_prec_dtype(mod): | |||
r""" | |||
Get or set amp autocast mode's lower precision dtype. It will change the | |||
r"""Get or set amp autocast mode's lower precision dtype. It will change the | |||
target dtype in tensor casting for better speed and memory. Default: float16. | |||
Examples: | |||
.. code-block:: | |||
.. code-block:: | |||
import megengine as mge | |||
mge.amp.low_prec_dtype = "float16" | |||
import megengine as mge | |||
mge.amp.low_prec_dtype = "float16" | |||
""" | |||
return _low_prec_dtype | |||
@@ -389,9 +389,7 @@ class ArrayMethodMixin(abc.ABC): | |||
@property | |||
def ndim(self): | |||
r""" | |||
Returns the number of dimensions of self :class:`~.Tensor`. | |||
""" | |||
r"""Returns the number of dimensions of self :class:`~.Tensor`.""" | |||
shape = self._tuple_shape | |||
if shape is None: | |||
raise ValueError("unkown ndim") | |||
@@ -399,8 +397,7 @@ class ArrayMethodMixin(abc.ABC): | |||
@property | |||
def size(self): | |||
r""" | |||
Returns the size of the self :class:`~.Tensor`. | |||
r"""Returns the size of the self :class:`~.Tensor`. | |||
The returned value is a subclass of :class:`tuple`. | |||
""" | |||
shape = self.shape | |||
@@ -410,14 +407,11 @@ class ArrayMethodMixin(abc.ABC): | |||
@property | |||
def T(self): | |||
r""" | |||
alias of :attr:`~.Tensor.transpose`. | |||
""" | |||
r"""alias of :attr:`~.Tensor.transpose`.""" | |||
return self.transpose() | |||
def item(self, *args): | |||
r""" | |||
Returns the value of this :class:`~.Tensor` as a standard Python :class:`numbers.Number`. | |||
r"""Returns the value of this :class:`~.Tensor` as a standard Python :class:`numbers.Number`. | |||
This only works for tensors with one element. For other cases, see :meth:`~.tolist`. | |||
""" | |||
if not args: | |||
@@ -427,8 +421,7 @@ class ArrayMethodMixin(abc.ABC): | |||
return self[args].item() | |||
def tolist(self): | |||
r""" | |||
Returns the tensor as a (nested) list. | |||
r"""Returns the tensor as a (nested) list. | |||
For scalars, a standard Python number is returned, just like with :meth:`~.item`. | |||
Tensors are automatically moved to the CPU first if necessary. | |||
@@ -437,16 +430,13 @@ class ArrayMethodMixin(abc.ABC): | |||
return self.numpy().tolist() | |||
def astype(self, dtype): | |||
r""" | |||
Returns a :class:`Tensor` with the same data and number of elements | |||
r"""Returns a :class:`Tensor` with the same data and number of elements | |||
with the specified :attr:`~.Tensor.dtype`. | |||
""" | |||
return astype(self, dtype) | |||
def reshape(self, *args): | |||
r""" | |||
See :func:`~.reshape`. | |||
""" | |||
r"""See :func:`~.reshape`.""" | |||
return _reshape(self, _expand_args(args)) | |||
# FIXME: remove this method | |||
@@ -454,9 +444,7 @@ class ArrayMethodMixin(abc.ABC): | |||
return _broadcast(self, _expand_args(args)) | |||
def transpose(self, *args): | |||
r""" | |||
See :func:`~.transpose`. | |||
""" | |||
r"""See :func:`~.transpose`.""" | |||
if self.ndim == 0: | |||
assert ( | |||
len(args) == 0 | |||
@@ -469,172 +457,170 @@ class ArrayMethodMixin(abc.ABC): | |||
return _transpose(self, _expand_args(args)) | |||
def flatten(self): | |||
r""" | |||
See :func:`~.flatten`. | |||
""" | |||
r"""See :func:`~.flatten`.""" | |||
return self.reshape(-1) | |||
def sum(self, axis=None, keepdims: bool = False): | |||
r""" | |||
Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
r"""Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
If ``axis`` is a list of axises, reduce over all of them. | |||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
except in the dimension(s) ``axis`` where it is of size 1. | |||
Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
:param axis: the dimension or dimensions to reduce. | |||
:param keepdims: whether the output tensor has ndim retained or not. | |||
:return: output tensor. | |||
Args: | |||
axis: the dimension or dimensions to reduce. | |||
keepdims: whether the output tensor has ndim retained or not. | |||
Examples: | |||
.. testcode:: | |||
Returns: | |||
output tensor. | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.sum().numpy()) | |||
print(b.sum().numpy()) | |||
Examples: | |||
.. testcode:: | |||
Outputs: | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.sum().numpy()) | |||
print(b.sum().numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
2 | |||
10.0 | |||
.. testoutput:: | |||
2 | |||
10.0 | |||
""" | |||
return _reduce("sum")(self, axis, keepdims) | |||
def prod(self, axis=None, keepdims: bool = False): | |||
r""" | |||
Returns the product of each row of the input tensor in the given dimension ``axis``. | |||
r"""Returns the product of each row of the input tensor in the given dimension ``axis``. | |||
If ``axis`` is a list of axises, reduce over all of them. | |||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
except in the dimension(s) ``axis`` where it is of size 1. | |||
Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
:param axis: the dimension or dimensions to reduce. | |||
:param keepdims: whether the output tensor has ndim retained or not. | |||
:return: output tensor. | |||
Examples: | |||
Args: | |||
axis: the dimension or dimensions to reduce. | |||
keepdims: whether the output tensor has ndim retained or not. | |||
.. testcode:: | |||
Returns: | |||
output tensor. | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.prod().numpy()) | |||
print(b.prod().numpy()) | |||
Examples: | |||
.. testcode:: | |||
Outputs: | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.prod().numpy()) | |||
print(b.prod().numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
0 | |||
24.0 | |||
.. testoutput:: | |||
0 | |||
24.0 | |||
""" | |||
return _reduce("product")(self, axis, keepdims) | |||
def min(self, axis=None, keepdims: bool = False): | |||
r""" | |||
Returns the min value of each row of the input tensor in the given dimension ``axis``. | |||
r"""Returns the min value of each row of the input tensor in the given dimension ``axis``. | |||
If ``axis`` is a list of axises, reduce over all of them. | |||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
except in the dimension(s) ``axis`` where it is of size 1. | |||
Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
:param axis: the dimension or dimensions to reduce. | |||
:param keepdims: whether the output tensor has ndim retained or not. | |||
:return: output tensor. | |||
Examples: | |||
Args: | |||
axis: the dimension or dimensions to reduce. | |||
keepdims: whether the output tensor has ndim retained or not. | |||
.. testcode:: | |||
Returns: | |||
output tensor. | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.min().numpy()) | |||
print(b.min().numpy()) | |||
Examples: | |||
.. testcode:: | |||
Outputs: | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.min().numpy()) | |||
print(b.min().numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
False | |||
1.0 | |||
.. testoutput:: | |||
False | |||
1.0 | |||
""" | |||
return _reduce("min")(self, axis, keepdims) | |||
def max(self, axis=None, keepdims: bool = False): | |||
r""" | |||
Returns the max value of each row of the input tensor in the given dimension ``axis``. | |||
r"""Returns the max value of each row of the input tensor in the given dimension ``axis``. | |||
If ``axis`` is a list of axises, reduce over all of them. | |||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
except in the dimension(s) ``axis`` where it is of size 1. | |||
Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
:param axis: the dimension or dimensions to reduce. | |||
:param keepdims: whether the output tensor has ndim retained or not. | |||
:return: output tensor. | |||
Examples: | |||
Args: | |||
axis: the dimension or dimensions to reduce. | |||
keepdims: whether the output tensor has ndim retained or not. | |||
.. testcode:: | |||
Returns: | |||
output tensor. | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.max().numpy()) | |||
print(b.max().numpy()) | |||
Examples: | |||
.. testcode:: | |||
Outputs: | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.max().numpy()) | |||
print(b.max().numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
True | |||
4.0 | |||
.. testoutput:: | |||
True | |||
4.0 | |||
""" | |||
return _reduce("max")(self, axis, keepdims) | |||
def mean(self, axis=None, keepdims: bool = False): | |||
r""" | |||
Returns the mean value of each row of the input tensor in the given dimension ``axis``. | |||
r"""Returns the mean value of each row of the input tensor in the given dimension ``axis``. | |||
If ``axis`` is a list of axises, reduce over all of them. | |||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, | |||
except in the dimension(s) ``axis`` where it is of size 1. | |||
Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`). | |||
:param axis: the dimension or dimensions to reduce. | |||
:param keepdims: whether the output tensor has ndim retained or not. | |||
:return: output tensor. | |||
Args: | |||
axis: the dimension or dimensions to reduce. | |||
keepdims: whether the output tensor has ndim retained or not. | |||
Examples: | |||
Returns: | |||
output tensor. | |||
.. testcode:: | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.mean().numpy()) | |||
print(b.mean().numpy()) | |||
Examples: | |||
.. testcode:: | |||
Outputs: | |||
from megengine import tensor | |||
a = tensor([False, True, True, False]) | |||
b = tensor([1.0, 2.0, 3.0, 4.0]) | |||
print(a.mean().numpy()) | |||
print(b.mean().numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
0.5 | |||
2.5 | |||
.. testoutput:: | |||
0.5 | |||
2.5 | |||
""" | |||
return _reduce("mean")(self, axis, keepdims) |
@@ -47,17 +47,17 @@ class QuantDtypeMeta( | |||
["name", "cname", "np_dtype_str", "qmin", "qmax", "is_unsigned"], | |||
) | |||
): | |||
r""" | |||
Store metadata for quantize dtype. Could be used to create custom quant dtype | |||
r"""Store metadata for quantize dtype. Could be used to create custom quant dtype | |||
for QAT when the network don't need to be converted for inference, but only | |||
to export network metadata for third-party platform inference. | |||
:param name: a unique name string. | |||
:param cname: used in :func:`~.create_quantized_dtype` for model dump and inference. | |||
:param np_dtype_str: used in :func:`~.create_quantized_dtype` to generate ``np.dtype``. | |||
:param qmin: a int number indicating quant dtype's lowerbound. | |||
:param qmax: a int number indicating quant dtype's upperbound. | |||
:param is_unsigned: a helper value that could be inference from np_dtype_str. | |||
Args: | |||
name: a unique name string. | |||
cname: used in :func:`~.create_quantized_dtype` for model dump and inference. | |||
np_dtype_str: used in :func:`~.create_quantized_dtype` to generate ``np.dtype``. | |||
qmin: a int number indicating quant dtype's lowerbound. | |||
qmax: a int number indicating quant dtype's upperbound. | |||
is_unsigned: a helper value that could be inference from np_dtype_str. | |||
""" | |||
def __new__( | |||
@@ -77,7 +77,7 @@ class QuantDtypeMeta( | |||
return self | |||
def __deepcopy__(self, _): | |||
""" | |||
r""" | |||
Ignore deepcopy so that a dtype meta can be treated as singleton, for more | |||
strict check in :meth:`~.FakeQuantize.fake_quant_forward`. | |||
""" | |||
@@ -113,17 +113,17 @@ def _check_zero_point(zp: int, dtype_meta: QuantDtypeMeta): | |||
def create_quantized_dtype( | |||
dtype_meta: QuantDtypeMeta, scale: float, zp: Union[int, None] | |||
): | |||
r""" | |||
Get quantized dtype with metadata attribute according to _metadata_dict. | |||
r"""Get quantized dtype with metadata attribute according to _metadata_dict. | |||
Note that unsigned dtype must have ``zero_point`` and signed dtype must | |||
not have ``zero_point``, to be consitent with tensor generated by calling | |||
compiled function from `CompGraph.compile(inputs, outspec)`. | |||
:param dtype_meta: a QuantDtypeMeta indicating which dtype to return. the | |||
``cname`` attribute cannot be ``None``. | |||
:param scale: a number for scale to store in dtype's metadata | |||
:param zp: a number for zero_point to store in dtype's metadata | |||
Args: | |||
dtype_meta: a QuantDtypeMeta indicating which dtype to return. the | |||
``cname`` attribute cannot be ``None``. | |||
scale: a number for scale to store in dtype's metadata | |||
zp: a number for zero_point to store in dtype's metadata | |||
""" | |||
if dtype_meta.cname is None: | |||
raise ValueError("dtype {} without cname attr is not supported.") | |||
@@ -152,8 +152,7 @@ def create_quantized_dtype( | |||
def quint8(scale, zero_point): | |||
""" | |||
Consturct a quantized unsigned int8 data type with ``scale`` (float) and | |||
r"""Consturct a quantized unsigned int8 data type with ``scale`` (float) and | |||
``zero_point`` (uint8). The real value represented by a quint8 data type is | |||
float_val = scale * (uint8_val - zero_point) | |||
""" | |||
@@ -161,24 +160,21 @@ def quint8(scale, zero_point): | |||
def qint8(scale): | |||
""" | |||
Construct a quantized int8 data type with ``scale`` (float). The real value | |||
r"""Construct a quantized int8 data type with ``scale`` (float). The real value | |||
represented by a qint8 data type is float_val = scale * int8_val | |||
""" | |||
return create_quantized_dtype(_builtin_quant_dtypes["qint8"], scale, None) | |||
def qint32(scale): | |||
""" | |||
Construct a quantized int32 data type with ``scale`` (float). The real value | |||
r"""Construct a quantized int32 data type with ``scale`` (float). The real value | |||
represented by a qint32 data type is float_val = scale * int32_val | |||
""" | |||
return create_quantized_dtype(_builtin_quant_dtypes["qint32"], scale, None) | |||
def quint4(scale, zero_point): | |||
""" | |||
Consturct a quantized unsigned int4 data type with ``scale`` (float) and | |||
r"""Consturct a quantized unsigned int4 data type with ``scale`` (float) and | |||
``zero_point`` (uint8). The real value represented by a quint4 data type is | |||
float_val = scale * (uint4_val - zero_point) | |||
""" | |||
@@ -186,8 +182,7 @@ def quint4(scale, zero_point): | |||
def qint4(scale): | |||
""" | |||
Construct a quantized int4 data type with ``scale`` (float). The real value | |||
r"""Construct a quantized int4 data type with ``scale`` (float). The real value | |||
represented by a qint4 data type is float_val = scale * int4_val | |||
""" | |||
return create_quantized_dtype(_builtin_quant_dtypes["qint4"], scale, None) | |||
@@ -244,95 +239,95 @@ def _convert_from_quantized_dtype(arr: np.ndarray, dtype_meta: QuantDtypeMeta): | |||
def convert_to_quint8(arr: np.ndarray, q: np.dtype): | |||
""" | |||
Quantize a float NumPy ndarray into a quint8 one with specified params. | |||
r"""Quantize a float NumPy ndarray into a quint8 one with specified params. | |||
:param arr: Input ndarray. | |||
:param q: Target data type, should be a quint8. | |||
Args: | |||
arr: Input ndarray. | |||
q: Target data type, should be a quint8. | |||
""" | |||
return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["quint8"]) | |||
def convert_from_quint8(arr: np.ndarray): | |||
""" | |||
Dequantize a quint8 NumPy ndarray into a float one. | |||
r"""Dequantize a quint8 NumPy ndarray into a float one. | |||
:param arr: Input ndarray. | |||
Args: | |||
arr: Input ndarray. | |||
""" | |||
return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["quint8"]) | |||
def convert_to_qint8(arr: np.ndarray, q: np.dtype): | |||
""" | |||
Quantize a float NumPy ndarray into a qint8 one with specified params. | |||
r"""Quantize a float NumPy ndarray into a qint8 one with specified params. | |||
:param arr: Input ndarray. | |||
:param q: Target data type, should be a qint8. | |||
Args: | |||
arr: Input ndarray. | |||
q: Target data type, should be a qint8. | |||
""" | |||
return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint8"]) | |||
def convert_from_qint8(arr: np.ndarray): | |||
""" | |||
Dequantize a qint8 NumPy ndarray into a float one. | |||
r"""Dequantize a qint8 NumPy ndarray into a float one. | |||
:param arr: Input ndarray. | |||
Args: | |||
arr: Input ndarray. | |||
""" | |||
return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint8"]) | |||
def convert_to_qint32(arr: np.ndarray, q: np.dtype): | |||
""" | |||
Quantize a float NumPy ndarray into a qint32 one with specified params. | |||
r"""Quantize a float NumPy ndarray into a qint32 one with specified params. | |||
:param arr: Input ndarray. | |||
:param q: Target data type, should be a qint8. | |||
Args: | |||
arr: Input ndarray. | |||
q: Target data type, should be a qint8. | |||
""" | |||
return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint32"]) | |||
def convert_from_qint32(arr): | |||
""" | |||
Dequantize a qint32 NumPy ndarray into a float one. | |||
r"""Dequantize a qint32 NumPy ndarray into a float one. | |||
:param arr: Input ndarray. | |||
Args: | |||
arr: Input ndarray. | |||
""" | |||
return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint32"]) | |||
def convert_to_quint4(arr: np.ndarray, q: np.dtype): | |||
""" | |||
Quantize a float NumPy ndarray into a quint4 one with specified params. | |||
r"""Quantize a float NumPy ndarray into a quint4 one with specified params. | |||
:param arr: Input ndarray. | |||
:param q: Target data type, should be a quint4. | |||
Args: | |||
arr: Input ndarray. | |||
q: Target data type, should be a quint4. | |||
""" | |||
return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["quint4"]) | |||
def convert_from_quint4(arr: np.ndarray): | |||
""" | |||
Dequantize a quint4 NumPy ndarray into a float one. | |||
r"""Dequantize a quint4 NumPy ndarray into a float one. | |||
:param arr: Input ndarray. | |||
Args: | |||
arr: Input ndarray. | |||
""" | |||
return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["quint4"]) | |||
def convert_to_qint4(arr: np.ndarray, q: np.dtype): | |||
""" | |||
Quantize a float NumPy ndarray into a qint4 one with specified params. | |||
r"""Quantize a float NumPy ndarray into a qint4 one with specified params. | |||
:param arr: Input ndarray. | |||
:param q: Target data type, should be a qint4. | |||
Args: | |||
arr: Input ndarray. | |||
q: Target data type, should be a qint4. | |||
""" | |||
return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint4"]) | |||
def convert_from_qint4(arr: np.ndarray): | |||
""" | |||
Dequantize a qint4 NumPy ndarray into a float one. | |||
r"""Dequantize a qint4 NumPy ndarray into a float one. | |||
:param arr: Input ndarray. | |||
Args: | |||
arr: Input ndarray. | |||
""" | |||
return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint4"]) |
@@ -24,11 +24,11 @@ from .core import TensorBase | |||
def set_priority_to_id(dest_vars): | |||
""" | |||
For all oprs in the subgraph constructed by dest_vars, | |||
r"""For all oprs in the subgraph constructed by dest_vars, | |||
sets its priority to id if its original priority is zero. | |||
:param dest_vars: target vars representing the graph. | |||
Args: | |||
dest_vars: target vars representing the graph. | |||
""" | |||
dest_vec = [] | |||
for i in dest_vars: | |||
@@ -220,54 +220,50 @@ class OpNode: | |||
def optimize_for_inference(dest_vars, **kwargs): | |||
r""" | |||
Applies optimize_for_inference pass for computing graph. | |||
:param dest_vars: list of output vars in the computing graph | |||
:Keyword Arguments: | |||
* enable_io16xc32 -- | |||
whether to use float16 for I/O between oprs and use | |||
float32 as internal computation precision. Note the output var would be | |||
changed to float16. | |||
* enable_ioc16 -- | |||
whether to use float16 for both I/O and computation | |||
precision. | |||
* enable_hwcd4 -- | |||
whether to use NHWCD4 data layout. This is faster on some | |||
OpenCL backend. | |||
* enable_nchw88 -- | |||
whether to use NCHW88 data layout, currently | |||
used in X86 AVX backend. | |||
* enable_nchw44 -- | |||
whether to use NCHW44 data layout, currently | |||
used in arm backend. | |||
* enable_nchw44_dot -- | |||
whether to use NCHW44_dot data layout, currently | |||
used in armv8.2+dotprod backend. | |||
* enable_nchw4 -- | |||
whether to use NCHW4 data layout, currently | |||
used in nvidia backend(based on cudnn). | |||
* enable_nchw32 -- | |||
whether to use NCHW32 data layout, currently | |||
used in nvidia backend with tensorcore(based on cudnn). | |||
* enable_chwn4 -- | |||
whether to use CHWN4 data layout, currently | |||
used in nvidia backend with tensorcore. | |||
* enable_nchw64 -- | |||
whether to use NCHW64 data layout, used for fast int4 | |||
support on Nvidia GPU. | |||
* enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
into one opr. | |||
* enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
input for inference on nvidia backend(this optimization pass will | |||
result in mismatch of the precision of output of training and | |||
inference) | |||
* enable_fuse_preprocess: whether to fuse astype\pad channel\dimshuffle and | |||
etc opr from h2d opr. | |||
r"""Applies optimize_for_inference pass for computing graph. | |||
Args: | |||
dest_vars: list of output vars in the computing graph | |||
Keyword Arguments: | |||
* enable_io16xc32 -- | |||
whether to use float16 for I/O between oprs and use | |||
float32 as internal computation precision. Note the output var would be | |||
changed to float16. | |||
* enable_ioc16 -- | |||
whether to use float16 for both I/O and computation | |||
precision. | |||
* enable_hwcd4 -- | |||
whether to use NHWCD4 data layout. This is faster on some | |||
OpenCL backend. | |||
* enable_nchw88 -- | |||
whether to use NCHW88 data layout, currently | |||
used in X86 AVX backend. | |||
* enable_nchw44 -- | |||
whether to use NCHW44 data layout, currently | |||
used in arm backend. | |||
* enable_nchw44_dot -- | |||
whether to use NCHW44_dot data layout, currently | |||
used in armv8.2+dotprod backend. | |||
* enable_nchw4 -- | |||
whether to use NCHW4 data layout, currently | |||
used in nvidia backend(based on cudnn). | |||
* enable_nchw32 -- | |||
whether to use NCHW32 data layout, currently | |||
used in nvidia backend with tensorcore(based on cudnn). | |||
* enable_chwn4 -- | |||
whether to use CHWN4 data layout, currently | |||
used in nvidia backend with tensorcore. | |||
* enable_nchw64 -- | |||
whether to use NCHW64 data layout, used for fast int4 | |||
support on Nvidia GPU. | |||
* enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
into one opr. | |||
* enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
input for inference on nvidia backend(this optimization pass will | |||
result in mismatch of the precision of output of training and | |||
inference) | |||
""" | |||
inference_options = GraphOptimizeOptions() | |||
inference_optimize_layout_transform_map = { | |||
@@ -305,11 +301,13 @@ def optimize_for_inference(dest_vars, **kwargs): | |||
def deserialize_infer_option(x: int) -> Dict[str, bool]: | |||
r""" | |||
Deserailize optimize options generated by ``imperative_rt.GraphOptimizeOptions``. | |||
r"""Deserailize optimize options generated by ``imperative_rt.GraphOptimizeOptions``. | |||
:param x: inference options represented by int. | |||
:return: inference options represented by dict. | |||
Args: | |||
x: inference options represented by int. | |||
Returns: | |||
inference options represented by dict. | |||
""" | |||
inference_options = GraphOptimizeOptions.deserialize(x) | |||
@@ -346,13 +344,12 @@ def deserialize_infer_option(x: int) -> Dict[str, bool]: | |||
def modify_opr_algo_strategy_inplace(dest_vars, strategy: str): | |||
""" | |||
C++ graph version of :func:`~.set_execution_strategy`. Used to inplacely modify | |||
r"""C++ graph version of :func:`~.set_execution_strategy`. Used to inplacely modify | |||
dumped graph's fast-run strategy. | |||
:param dest_vars: list of output vars in the computing graph. | |||
:param strategy: fast-run algorithms strategy. | |||
Args: | |||
dest_vars: list of output vars in the computing graph. | |||
strategy: fast-run algorithms strategy. | |||
""" | |||
dest_vars = _unwrap(dest_vars) | |||
_imperative_rt.modify_opr_algo_strategy_inplace(dest_vars, strategy) | |||
@@ -383,39 +380,40 @@ def dump_graph( | |||
append_json=False, | |||
metadata=None | |||
) -> Tuple[bytes, CompGraphDumpResult]: | |||
""" | |||
serialize the computing graph of `output_vars` and get byte result. | |||
:param output_vars: output variables which are the graph's end point. | |||
.. note:: | |||
The underlying C++ API only accepts a var list. If a dict is given, | |||
the vars would be renamed to the given names. | |||
:param keep_var_name: level for keeping variable names: | |||
* 0: none of the names are kept | |||
* 1: (default)keep names of output vars | |||
* 2: keep names of all (output and internal) vars | |||
:param keep_opr_name: whether to keep operator names. | |||
:param keep_param_name: whether to keep param names, so param values can be | |||
easily manipulated after loading model | |||
:param keep_opr_priority: whether to keep priority setting for operators | |||
:param strip_info_file: a string for path or a file handler. if is not None, | |||
then the dump information for code strip would be written to ``strip_info_file`` | |||
:param append_json: will be check when `strip_info_file` is not None. if set | |||
true, the information for code strip will be append to strip_info_file. | |||
if set false, will rewrite strip_info_file | |||
:return: dump result as byte string, and an instance of namedtuple | |||
r"""serialize the computing graph of `output_vars` and get byte result. | |||
Args: | |||
output_vars: output variables which are the graph's end point. | |||
keep_var_name: level for keeping variable names: | |||
* 0: none of the names are kept | |||
* 1: (default)keep names of output vars | |||
* 2: keep names of all (output and internal) vars | |||
keep_opr_name: whether to keep operator names. | |||
keep_param_name: whether to keep param names, so param values can be | |||
easily manipulated after loading model | |||
keep_opr_priority: whether to keep priority setting for operators | |||
strip_info_file: a string for path or a file handler. if is not None, | |||
then the dump information for code strip would be written to ``strip_info_file`` | |||
append_json: will be check when `strip_info_file` is not None. if set | |||
true, the information for code strip will be append to strip_info_file. | |||
if set false, will rewrite strip_info_file | |||
Note: | |||
The underlying C++ API only accepts a var list. If a dict is given, | |||
the vars would be renamed to the given names. | |||
Returns: | |||
dump result as byte string, and an instance of namedtuple | |||
:class:`CompGraphDumpResult`, whose fields are: | |||
* ``nr_opr`` number of operators dumped | |||
* ``tot_bytes`` total bytes for the whole graph | |||
* ``tensor_value_bytes`` bytes consumed for dumping tensor values | |||
* ``inputs`` names of input tensors | |||
* ``params`` list of names of dumped params | |||
* ``outputs`` names of output vars | |||
* ``nr_opr`` number of operators dumped | |||
* ``tot_bytes`` total bytes for the whole graph | |||
* ``tensor_value_bytes`` bytes consumed for dumping tensor values | |||
* ``inputs`` names of input tensors | |||
* ``params`` list of names of dumped params | |||
* ``outputs`` names of output vars | |||
""" | |||
if isinstance(output_vars, dict): | |||
used_vars = set() | |||
@@ -483,17 +481,19 @@ CompGraphLoadResult = collections.namedtuple( | |||
def load_graph(fpath) -> CompGraphLoadResult: | |||
""" | |||
Load a serialized computing graph from file. | |||
r"""Load a serialized computing graph from file. | |||
Args: | |||
fpath: Path or Handle of the input file | |||
:param fpath: Path or Handle of the input file | |||
:return: An instance of namedtuple :class:`CompGraphLoadResult`, | |||
Returns: | |||
An instance of namedtuple :class:`CompGraphLoadResult`, | |||
whose fields are: | |||
* ``graph`` loaded CompGraph | |||
* ``output_vars_dict`` A Python dict, mapping name to output SymbolVar | |||
* ``output_vars_list`` A Python list, containing output vars in the | |||
order passed to serialize_comp_graph_to_file | |||
* ``graph`` loaded CompGraph | |||
* ``output_vars_dict`` A Python dict, mapping name to output SymbolVar | |||
* ``output_vars_list`` A Python list, containing output vars in the | |||
order passed to serialize_comp_graph_to_file | |||
""" | |||
output_vars_map = [] | |||
output_vars_list = [] | |||
@@ -24,12 +24,12 @@ _enable_convert_inputs = True | |||
def get_convert_inputs(): | |||
""" get the curerent state of `_enable_convert_inputs` """ | |||
r"""get the curerent state of `_enable_convert_inputs`""" | |||
return _enable_convert_inputs | |||
def set_convert_inputs(flag): | |||
""" This function is a temporary workaround for reducing the overhead of operator | |||
r"""This function is a temporary workaround for reducing the overhead of operator | |||
invocations. The function `convert_inputs` is disabled if the global state | |||
`_enable_convert_inputs` is set to `False`, otherwise enabled. This function is for | |||
internal use only, and should be removed when the tensor-like system is refactored. | |||
@@ -137,11 +137,11 @@ def setscalar(x): | |||
def astensor1d(x, *reference, dtype=None, device=None): | |||
""" | |||
Convert something to 1D tensor. Support following types | |||
* sequence of scalar literal / tensor | |||
* numpy array | |||
* tensor (returned as is, regardless of dtype and device) | |||
"""Convert something to 1D tensor. Support following types | |||
* sequence of scalar literal / tensor | |||
* numpy array | |||
* tensor (returned as is, regardless of dtype and device) | |||
""" | |||
try: | |||
ndim = x.ndim | |||
@@ -33,16 +33,11 @@ default_collate_err_msg_format = ( | |||
class Collator: | |||
r""" | |||
Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset. | |||
r"""Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset. | |||
Modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py | |||
""" | |||
def apply(self, inputs): | |||
""" | |||
:param inputs: sequence_N(tuple(CHW, C, CK)). | |||
:return: tuple(NCHW, NC, NCK). | |||
""" | |||
elem = inputs[0] | |||
elem_type = type(elem) | |||
if ( | |||
@@ -44,28 +44,28 @@ def raise_timeout_error(): | |||
class DataLoader: | |||
r"""Provides a convenient way to iterate on a given dataset. | |||
DataLoader combines a dataset with | |||
:class:`~.Sampler`, :class:`~.Transform` and :class:`~.Collator`, | |||
make it flexible to get minibatch continually from a dataset. | |||
:param dataset: dataset from which to load the minibatch. | |||
:param sampler: defines the strategy to sample data from the dataset. | |||
:param transform: defined the transforming strategy for a sampled batch. | |||
Default: None | |||
:param collator: defined the merging strategy for a transformed batch. | |||
Default: None | |||
:param num_workers: the number of sub-process to load, transform and collate | |||
the batch. ``0`` means using single-process. Default: 0 | |||
:param timeout: if positive, means the timeout value(second) for collecting a | |||
batch from workers. Default: 0 | |||
:param timeout_event: callback function triggered by timeout, default to raise | |||
runtime error. | |||
:param divide: define the paralleling strategy in multi-processing mode. | |||
``True`` means one batch is divided into :attr:`num_workers` pieces, and | |||
the workers will process these pieces parallelly. ``False`` means | |||
different sub-process will process different batch. Default: False | |||
Args: | |||
dataset: dataset from which to load the minibatch. | |||
sampler: defines the strategy to sample data from the dataset. | |||
transform: defined the transforming strategy for a sampled batch. | |||
Default: None | |||
collator: defined the merging strategy for a transformed batch. | |||
Default: None | |||
num_workers: the number of sub-process to load, transform and collate | |||
the batch. ``0`` means using single-process. Default: 0 | |||
timeout: if positive, means the timeout value(second) for collecting a | |||
batch from workers. Default: 0 | |||
timeout_event: callback function triggered by timeout, default to raise | |||
runtime error. | |||
divide: define the paralleling strategy in multi-processing mode. | |||
``True`` means one batch is divided into :attr:`num_workers` pieces, and | |||
the workers will process these pieces parallelly. ``False`` means | |||
different sub-process will process different batch. Default: False | |||
""" | |||
__initialized = False | |||
@@ -11,8 +11,7 @@ from typing import Tuple | |||
class Dataset(ABC): | |||
r""" | |||
An abstract base class for all datasets. | |||
r"""An abstract base class for all datasets. | |||
__getitem__ and __len__ method are aditionally needed. | |||
""" | |||
@@ -31,8 +30,7 @@ class Dataset(ABC): | |||
class StreamDataset(Dataset): | |||
r""" | |||
An abstract class for stream data. | |||
r"""An abstract class for stream data. | |||
__iter__ method is aditionally needed. | |||
""" | |||
@@ -53,10 +51,9 @@ class StreamDataset(Dataset): | |||
class ArrayDataset(Dataset): | |||
r""" | |||
ArrayDataset is a dataset for numpy array data. | |||
r"""ArrayDataset is a dataset for numpy array data. | |||
One or more numpy arrays are needed to initiate the dataset. | |||
One or more numpy arrays are needed to initiate the dataset. | |||
And the dimensions represented sample number are expected to be the same. | |||
""" | |||
@@ -21,8 +21,7 @@ logger = get_logger(__name__) | |||
class CIFAR10(VisionDataset): | |||
r""" :class:`~.Dataset` for CIFAR10 meta data. | |||
""" | |||
r""":class:`~.Dataset` for CIFAR10 meta data.""" | |||
url_path = "http://www.cs.utoronto.ca/~kriz/" | |||
raw_file_name = "cifar-10-python.tar.gz" | |||
@@ -138,8 +137,7 @@ class CIFAR10(VisionDataset): | |||
class CIFAR100(CIFAR10): | |||
r""" :class:`~.Dataset` for CIFAR100 meta data. | |||
""" | |||
r""":class:`~.Dataset` for CIFAR100 meta data.""" | |||
url_path = "http://www.cs.utoronto.ca/~kriz/" | |||
raw_file_name = "cifar-100-python.tar.gz" | |||
@@ -23,9 +23,7 @@ from .meta_vision import VisionDataset | |||
class Cityscapes(VisionDataset): | |||
r""" | |||
`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||
""" | |||
r"""`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset.""" | |||
supported_order = ( | |||
"image", | |||
@@ -46,9 +46,7 @@ def has_valid_annotation(anno, order): | |||
class COCO(VisionDataset): | |||
r""" | |||
`MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||
""" | |||
r"""`MS COCO <http://cocodataset.org/#home>`_ Dataset.""" | |||
supported_order = ( | |||
"image", | |||
@@ -26,22 +26,21 @@ from .utils import is_img | |||
class ImageFolder(VisionDataset): | |||
r""" | |||
ImageFolder is a class for loading image data and labels from a organized folder. | |||
r"""ImageFolder is a class for loading image data and labels from a organized folder. | |||
The folder is expected to be organized as followed: root/cls/xxx.img_ext | |||
Labels are indices of sorted classes in the root directory. | |||
:param root: root directory of an image folder. | |||
:param loader: a function used to load image from path, | |||
if ``None``, default function that loads | |||
images with PIL will be called. | |||
:param check_valid_func: a function used to check if files in folder are | |||
expected image files, if ``None``, default function | |||
that checks file extensions will be called. | |||
:param class_name: if ``True``, return class name instead of class index. | |||
Args: | |||
root: root directory of an image folder. | |||
loader: a function used to load image from path, | |||
if ``None``, default function that loads | |||
images with PIL will be called. | |||
check_valid_func: a function used to check if files in folder are | |||
expected image files, if ``None``, default function | |||
that checks file extensions will be called. | |||
class_name: if ``True``, return class name instead of class index. | |||
""" | |||
def __init__(self, root: str, check_valid_func=None, class_name: bool = False): | |||
@@ -30,11 +30,10 @@ logger = get_logger(__name__) | |||
class ImageNet(ImageFolder): | |||
r""" | |||
Load ImageNet from raw files or folder. Expected folder looks like: | |||
.. code-block:: bash | |||
r"""Load ImageNet from raw files or folder. Expected folder looks like: | |||
.. code-block:: shell | |||
${root}/ | |||
| [REQUIRED TAR FILES] | |||
|- ILSVRC2012_img_train.tar | |||
@@ -45,22 +44,8 @@ class ImageNet(ImageFolder): | |||
|- val/cls/xxx.${img_ext} | |||
|- ILSVRC2012_devkit_t12/data/meta.mat | |||
|- ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt | |||
If the image folders don't exist, raw tar files are required to get extracted and processed. | |||
""" | |||
raw_file_meta = { | |||
"train": ("ILSVRC2012_img_train.tar", "1d675b47d978889d74fa0da5fadfb00e"), | |||
"val": ("ILSVRC2012_img_val.tar", "29b22e2961454d5413ddabcf34fc5622"), | |||
"devkit": ("ILSVRC2012_devkit_t12.tar.gz", "fa75699e90414af021442c21a62c3abf"), | |||
} # ImageNet raw files | |||
default_train_dir = "train" | |||
default_val_dir = "val" | |||
default_devkit_dir = "ILSVRC2012_devkit_t12" | |||
def __init__(self, root: str = None, train: bool = True, **kwargs): | |||
r""" | |||
Initialization: | |||
* if ``root`` contains ``self.target_folder`` depending on ``train``: | |||
@@ -77,10 +62,22 @@ class ImageNet(ImageFolder): | |||
* raise error. | |||
:param root: root directory of imagenet data, if root is ``None``, use default_dataset_root. | |||
:param train: if ``True``, load the train split, otherwise load the validation split. | |||
""" | |||
Args: | |||
root: root directory of imagenet data, if root is ``None``, use default_dataset_root. | |||
train: if ``True``, load the train split, otherwise load the validation split. | |||
""" | |||
raw_file_meta = { | |||
"train": ("ILSVRC2012_img_train.tar", "1d675b47d978889d74fa0da5fadfb00e"), | |||
"val": ("ILSVRC2012_img_val.tar", "29b22e2961454d5413ddabcf34fc5622"), | |||
"devkit": ("ILSVRC2012_devkit_t12.tar.gz", "fa75699e90414af021442c21a62c3abf"), | |||
} # ImageNet raw files | |||
default_train_dir = "train" | |||
default_val_dir = "val" | |||
default_devkit_dir = "ILSVRC2012_devkit_t12" | |||
def __init__(self, root: str = None, train: bool = True, **kwargs): | |||
# process the root path | |||
if root is None: | |||
self.root = self._default_root | |||
@@ -22,8 +22,7 @@ logger = get_logger(__name__) | |||
class MNIST(VisionDataset): | |||
r""" :class:`~.Dataset` for MNIST meta data. | |||
""" | |||
r""":class:`~.Dataset` for MNIST meta data.""" | |||
url_path = "http://yann.lecun.com/exdb/mnist/" | |||
""" | |||
@@ -23,9 +23,7 @@ from .meta_vision import VisionDataset | |||
class Objects365(VisionDataset): | |||
r""" | |||
`Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||
""" | |||
r"""`Objects365 <https://www.objects365.org/overview.html>`_ Dataset.""" | |||
supported_order = ( | |||
"image", | |||
@@ -24,9 +24,7 @@ from .meta_vision import VisionDataset | |||
class PascalVOC(VisionDataset): | |||
r""" | |||
`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||
""" | |||
r"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset.""" | |||
supported_order = ( | |||
"image", | |||
@@ -17,9 +17,7 @@ import megengine.distributed as dist | |||
class Sampler(ABC): | |||
r""" | |||
An abstract base class for all Sampler | |||
""" | |||
r"""An abstract base class for all Sampler""" | |||
@abstractmethod | |||
def __init__(self): | |||
@@ -27,19 +25,19 @@ class Sampler(ABC): | |||
class MapSampler(Sampler): | |||
r""" | |||
Sampler for map dataset. | |||
:param dataset: dataset to sample from. | |||
:param batch_size: batch size for batch method. | |||
:param drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
:param num_samples: number of samples assigned to one rank. | |||
:param world_size: number of ranks. | |||
:param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
:param seed: seed for random operators. | |||
r"""Sampler for map dataset. | |||
Args: | |||
dataset: dataset to sample from. | |||
batch_size: batch size for batch method. | |||
drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
num_samples: number of samples assigned to one rank. | |||
world_size: number of ranks. | |||
rank: rank id, non-negative interger within 0 and ``world_size``. | |||
seed: seed for random operators. | |||
""" | |||
def __init__( | |||
@@ -106,14 +104,11 @@ class MapSampler(Sampler): | |||
return int(math.ceil(self.num_samples / self.batch_size)) | |||
def sample(self): | |||
""" | |||
Return a list contains all sample indices. | |||
""" | |||
r"""Return a list contains all sample indices.""" | |||
raise NotImplementedError | |||
def scatter(self, indices) -> List: | |||
r""" | |||
Scatter method is used for splitting indices into subset, each subset | |||
r"""Scatter method is used for splitting indices into subset, each subset | |||
will be assigned to a rank. Indices are evenly splitted by default. | |||
If customized indices assignment method is needed, please rewrite this method. | |||
""" | |||
@@ -130,9 +125,7 @@ class MapSampler(Sampler): | |||
return indices | |||
def batch(self) -> Iterator[List[Any]]: | |||
r""" | |||
Batch method provides a batch indices generator. | |||
""" | |||
r"""Batch method provides a batch indices generator.""" | |||
indices = list(self.sample()) | |||
# user might pass the world_size parameter without dist, | |||
@@ -150,18 +143,15 @@ class MapSampler(Sampler): | |||
class StreamSampler(Sampler): | |||
r""" | |||
Sampler for stream dataset. | |||
.. warning:: | |||
r"""Sampler for stream dataset. | |||
Warning: | |||
In the case of multiple machines, sampler should ensure that each worker gets | |||
different data. But this class cannot do it yet, please build your own | |||
dataset and sampler to achieve this goal. | |||
Usually, :meth:`~.StreamDataset.__iter__` can return different iterator by | |||
``rank = dist.get_rank()``. So that they will get different data. | |||
""" | |||
def __init__(self, batch_size=1): | |||
@@ -175,18 +165,18 @@ class StreamSampler(Sampler): | |||
class SequentialSampler(MapSampler): | |||
r""" | |||
Sample elements sequentially. | |||
:param dataset: dataset to sample from. | |||
:param batch_size: batch size for batch method. | |||
:param drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
:param indices: indice of samples. | |||
:param world_size: number of ranks. | |||
:param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
r"""Sample elements sequentially. | |||
Args: | |||
dataset: dataset to sample from. | |||
batch_size: batch size for batch method. | |||
drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
indices: indice of samples. | |||
world_size: number of ranks. | |||
rank: rank id, non-negative interger within 0 and ``world_size``. | |||
""" | |||
def __init__( | |||
@@ -207,9 +197,7 @@ class SequentialSampler(MapSampler): | |||
self.indices = indices | |||
def sample(self) -> Iterator[Any]: | |||
r""" | |||
Return a generator. | |||
""" | |||
r"""Return a generator.""" | |||
if self.indices is None: | |||
return iter(range(len(self.dataset))) | |||
else: | |||
@@ -217,19 +205,19 @@ class SequentialSampler(MapSampler): | |||
class RandomSampler(MapSampler): | |||
r""" | |||
Sample elements randomly without replacement. | |||
:param dataset: dataset to sample from. | |||
:param batch_size: batch size for batch method. | |||
:param drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
:param indices: indice of samples. | |||
:param world_size: number of ranks. | |||
:param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
:param seed: seed for random operators. | |||
r"""Sample elements randomly without replacement. | |||
Args: | |||
dataset: dataset to sample from. | |||
batch_size: batch size for batch method. | |||
drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
indices: indice of samples. | |||
world_size: number of ranks. | |||
rank: rank id, non-negative interger within 0 and ``world_size``. | |||
seed: seed for random operators. | |||
""" | |||
def __init__( | |||
@@ -258,20 +246,20 @@ class RandomSampler(MapSampler): | |||
class ReplacementSampler(MapSampler): | |||
r""" | |||
Sample elements randomly with replacement. | |||
:param dataset: dataset to sample from. | |||
:param batch_size: batch size for batch method. | |||
:param drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
:param num_samples: number of samples assigned to one rank. | |||
:param weights: weights for sampling indices, it could be unnormalized weights. | |||
:param world_size: number of ranks. | |||
:param rank: rank id, non-negative interger within 0 and ``world_size``. | |||
:param seed: seed for random operators. | |||
r"""Sample elements randomly with replacement. | |||
Args: | |||
dataset: dataset to sample from. | |||
batch_size: batch size for batch method. | |||
drop_last: set ``True`` to drop the last incomplete batch, | |||
if the dataset size is not divisible by the batch size. If ``False`` and | |||
the size of dataset is not divisible by the batch_size, then the last batch will | |||
be smaller. Default: False | |||
num_samples: number of samples assigned to one rank. | |||
weights: weights for sampling indices, it could be unnormalized weights. | |||
world_size: number of ranks. | |||
rank: rank id, non-negative interger within 0 and ``world_size``. | |||
seed: seed for random operators. | |||
""" | |||
def __init__( | |||
@@ -59,15 +59,13 @@ class _PlasmaStoreManager: | |||
class PlasmaShmQueue: | |||
def __init__(self, maxsize: int = 0): | |||
r""" | |||
Use pyarrow in-memory plasma store to implement shared memory queue. | |||
r"""Use pyarrow in-memory plasma store to implement shared memory queue. | |||
Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle | |||
and communication overhead, leading to better performance in multi-process | |||
application. | |||
:type maxsize: int | |||
:param maxsize: maximum size of the queue, `None` means no limit. (default: ``None``) | |||
Args: | |||
maxsize: maximum size of the queue, `None` means no limit. (default: ``None``) | |||
""" | |||
# Lazy start the plasma store manager | |||
@@ -11,9 +11,7 @@ from typing import Sequence, Tuple | |||
class Transform(ABC): | |||
""" | |||
Rewrite apply method in subclass. | |||
""" | |||
r"""Rewrite apply method in subclass.""" | |||
def apply_batch(self, inputs: Sequence[Tuple]): | |||
return tuple(self.apply(input) for input in inputs) | |||
@@ -15,7 +15,7 @@ import numpy as np | |||
def wrap_keepdims(func): | |||
"""Wraper to keep the dimension of input images unchanged.""" | |||
r"""Wraper to keep the dimension of input images unchanged.""" | |||
@functools.wraps(func) | |||
def wrapper(image, *args, **kwargs): | |||
@@ -33,41 +33,47 @@ def wrap_keepdims(func): | |||
@wrap_keepdims | |||
def to_gray(image): | |||
r""" | |||
Change BGR format image's color space to gray. | |||
r"""Change BGR format image's color space to gray. | |||
:param image: input BGR format image, with `(H, W, C)` shape. | |||
:return: gray format image, with `(H, W, C)` shape. | |||
Args: | |||
image: input BGR format image, with `(H, W, C)` shape. | |||
Returns: | |||
gray format image, with `(H, W, C)` shape. | |||
""" | |||
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |||
@wrap_keepdims | |||
def to_bgr(image): | |||
r""" | |||
Change gray format image's color space to BGR. | |||
r"""Change gray format image's color space to BGR. | |||
Args: | |||
image: input Gray format image, with `(H, W, C)` shape. | |||
:param image: input Gray format image, with `(H, W, C)` shape. | |||
:return: BGR format image, with `(H, W, C)` shape. | |||
Returns: | |||
BGR format image, with `(H, W, C)` shape. | |||
""" | |||
return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |||
@wrap_keepdims | |||
def pad(input, size, value): | |||
r""" | |||
Pad input data with *value* and given *size*. | |||
:param input: input data, with `(H, W, C)` shape. | |||
:param size: padding size of input data, it could be integer or sequence. | |||
If it is an integer, the input data will be padded in four directions. | |||
If it is a sequence contains two integer, the bottom and right side | |||
of input data will be padded. | |||
If it is a sequence contains four integer, the top, bottom, left, right | |||
side of input data will be padded with given size. | |||
:param value: padding value of data, could be a sequence of int or float. | |||
If it is float value, the dtype of image will be casted to float32 also. | |||
:return: padded image. | |||
r"""Pad input data with *value* and given *size*. | |||
Args: | |||
input: input data, with `(H, W, C)` shape. | |||
size: padding size of input data, it could be integer or sequence. | |||
If it is an integer, the input data will be padded in four directions. | |||
If it is a sequence contains two integer, the bottom and right side | |||
of input data will be padded. | |||
If it is a sequence contains four integer, the top, bottom, left, right | |||
side of input data will be padded with given size. | |||
value: padding value of data, could be a sequence of int or float. | |||
If it is float value, the dtype of image will be casted to float32 also. | |||
Returns: | |||
padded image. | |||
""" | |||
if isinstance(size, int): | |||
size = (size, size, size, size) | |||
@@ -80,32 +86,33 @@ def pad(input, size, value): | |||
@wrap_keepdims | |||
def flip(image, flipCode): | |||
r""" | |||
Accordding to the flipCode (the type of flip), flip the input image. | |||
:param image: input image, with `(H, W, C)` shape. | |||
:param flipCode: code that indicates the type of flip. | |||
r"""Accordding to the flipCode (the type of flip), flip the input image. | |||
* 1 : Flip horizontally | |||
Args: | |||
image: input image, with `(H, W, C)` shape. | |||
flipCode: code that indicates the type of flip. | |||
* 0 : Flip vertically | |||
* 1 : Flip horizontally | |||
* 0 : Flip vertically | |||
* -1: Flip horizontally and vertically | |||
* -1: Flip horizontally and vertically | |||
:return: BGR format image, with `(H, W, C)` shape. | |||
Returns: | |||
BGR format image, with `(H, W, C)` shape. | |||
""" | |||
return cv2.flip(image, flipCode=flipCode) | |||
@wrap_keepdims | |||
def resize(input, size, interpolation=cv2.INTER_LINEAR): | |||
r""" | |||
Resize the input data to given size. | |||
r"""Resize the input data to given size. | |||
Args: | |||
input: input data, could be image or masks, with `(H, W, C)` shape. | |||
size: target size of input data, with (height, width) shape. | |||
interpolation: interpolation method. | |||
:param input: input data, could be image or masks, with `(H, W, C)` shape. | |||
:param size: target size of input data, with (height, width) shape. | |||
:param interpolation: interpolation method. | |||
:return: resized data, with `(H, W, C)` shape. | |||
Returns: | |||
resized data, with `(H, W, C)` shape. | |||
""" | |||
if len(size) != 2: | |||
raise ValueError("resize needs (h, w), but got {}".format(size)) | |||
@@ -42,36 +42,36 @@ __all__ = [ | |||
class VisionTransform(Transform): | |||
r""" | |||
Base class of all transforms used in computer vision. | |||
r"""Base class of all transforms used in computer vision. | |||
Calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*() | |||
method. If you want to implement a self-defined transform method for image, | |||
rewrite _apply_image method in subclass. | |||
:param order: input type order. Input is a tuple containing different structures, | |||
order is used to specify the order of structures. For example, if your input | |||
is (image, boxes) type, then the ``order`` should be ("image", "boxes"). | |||
Current available strings and data type are describe below: | |||
* "image": input image, with shape of `(H, W, C)`. | |||
* "coords": coordinates, with shape of `(N, 2)`. | |||
* "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format, | |||
the 1st "xy" represents top left point of a box, | |||
the 2nd "xy" represents right bottom point. | |||
* "mask": map used for segmentation, with shape of `(H, W, 1)`. | |||
* "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances, | |||
and K for number of keypoints in one instance. The first two dimensions | |||
of last axis is coordinate of keypoints and the the 3rd dimension is | |||
the label of keypoints. | |||
* "polygons": a sequence containing numpy arrays, its length is the number of instances. | |||
Each numpy array represents polygon coordinate of one instance. | |||
* "category": categories for some data type. For example, "image_category" | |||
means category of the input image and "boxes_category" means categories of | |||
bounding boxes. | |||
* "info": information for images such as image shapes and image path. | |||
You can also customize your data types only if you implement the corresponding | |||
_apply_*() methods, otherwise ``NotImplementedError`` will be raised. | |||
Args: | |||
order: input type order. Input is a tuple containing different structures, | |||
order is used to specify the order of structures. For example, if your input | |||
is (image, boxes) type, then the ``order`` should be ("image", "boxes"). | |||
Current available strings and data type are describe below: | |||
* "image": input image, with shape of `(H, W, C)`. | |||
* "coords": coordinates, with shape of `(N, 2)`. | |||
* "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format, | |||
the 1st "xy" represents top left point of a box, | |||
the 2nd "xy" represents right bottom point. | |||
* "mask": map used for segmentation, with shape of `(H, W, 1)`. | |||
* "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances, | |||
and K for number of keypoints in one instance. The first two dimensions | |||
of last axis is coordinate of keypoints and the the 3rd dimension is | |||
the label of keypoints. | |||
* "polygons": a sequence containing numpy arrays, its length is the number of instances. | |||
Each numpy array represents polygon coordinate of one instance. | |||
* "category": categories for some data type. For example, "image_category" | |||
means category of the input image and "boxes_category" means categories of | |||
bounding boxes. | |||
* "info": information for images such as image shapes and image path. | |||
You can also customize your data types only if you implement the corresponding | |||
_apply_*() methods, otherwise ``NotImplementedError`` will be raised. | |||
""" | |||
def __init__(self, order=None): | |||
@@ -154,13 +154,13 @@ class VisionTransform(Transform): | |||
class ToMode(VisionTransform): | |||
r""" | |||
Change input data to a target mode. | |||
r"""Change input data to a target mode. | |||
For example, most transforms use HWC mode image, | |||
while the neural network might use CHW mode input tensor. | |||
:param mode: output mode of input. Default: "CHW" | |||
:param order: the same with :class:`VisionTransform` | |||
Args: | |||
mode: output mode of input. Default: "CHW" | |||
order: the same with :class:`VisionTransform` | |||
""" | |||
def __init__(self, mode="CHW", *, order=None): | |||
@@ -183,32 +183,31 @@ class ToMode(VisionTransform): | |||
class Compose(VisionTransform): | |||
r""" | |||
Composes several transforms together. | |||
:param transforms: list of :class:`VisionTransform` to compose. | |||
:param batch_compose: whether use shuffle_indices for batch data or not. | |||
If True, use original input sequence. | |||
Otherwise, the shuffle_indices will be used for transforms. | |||
:param shuffle_indices: indices used for random shuffle, start at 1. | |||
For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform | |||
will be random shuffled, the 2nd and 4th transform will also be shuffled. | |||
:param order: the same with :class:`VisionTransform` | |||
r"""Composes several transforms together. | |||
Args: | |||
transforms: list of :class:`VisionTransform` to compose. | |||
batch_compose: whether use shuffle_indices for batch data or not. | |||
If True, use original input sequence. | |||
Otherwise, the shuffle_indices will be used for transforms. | |||
shuffle_indices: indices used for random shuffle, start at 1. | |||
For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform | |||
will be random shuffled, the 2nd and 4th transform will also be shuffled. | |||
order: the same with :class:`VisionTransform` | |||
Examples: | |||
.. testcode:: | |||
from megengine.data.transform import RandomHorizontalFlip, RandomVerticalFlip, CenterCrop, ToMode, Compose | |||
transform_func = Compose([ | |||
RandomHorizontalFlip(), | |||
RandomVerticalFlip(), | |||
CenterCrop(100), | |||
ToMode("CHW"), | |||
], | |||
shuffle_indices=[(1, 2, 3)] | |||
) | |||
.. testcode:: | |||
from megengine.data.transform import RandomHorizontalFlip, RandomVerticalFlip, CenterCrop, ToMode, Compose | |||
transform_func = Compose([ | |||
RandomHorizontalFlip(), | |||
RandomVerticalFlip(), | |||
CenterCrop(100), | |||
ToMode("CHW"), | |||
], | |||
shuffle_indices=[(1, 2, 3)] | |||
) | |||
""" | |||
def __init__( | |||
@@ -260,13 +259,13 @@ class Compose(VisionTransform): | |||
class TorchTransformCompose(VisionTransform): | |||
r""" | |||
Compose class used for transforms in torchvision, only support PIL image, | |||
r"""Compose class used for transforms in torchvision, only support PIL image, | |||
some transforms with tensor in torchvision are not supported, | |||
such as Normalize and ToTensor in torchvision. | |||
:param transforms: the same with ``Compose``. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
transforms: the same with ``Compose``. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, transforms, *, order=None): | |||
@@ -302,19 +301,19 @@ class TorchTransformCompose(VisionTransform): | |||
class Pad(VisionTransform): | |||
r""" | |||
Pad the input data. | |||
:param size: padding size of input image, it could be integer or sequence. | |||
If it is an integer, the input image will be padded in four directions. | |||
If it is a sequence containing two integers, the bottom and right side | |||
of image will be padded. | |||
If it is a sequence containing four integers, the top, bottom, left, right | |||
side of image will be padded with given size. | |||
:param value: padding value of image, could be a sequence of int or float. | |||
if it is float value, the dtype of image will be casted to float32 also. | |||
:param mask_value: padding value of segmentation map. | |||
:param order: the same with :class:`VisionTransform`. | |||
r"""Pad the input data. | |||
Args: | |||
size: padding size of input image, it could be integer or sequence. | |||
If it is an integer, the input image will be padded in four directions. | |||
If it is a sequence containing two integers, the bottom and right side | |||
of image will be padded. | |||
If it is a sequence containing four integers, the top, bottom, left, right | |||
side of image will be padded with given size. | |||
value: padding value of image, could be a sequence of int or float. | |||
if it is float value, the dtype of image will be casted to float32 also. | |||
mask_value: padding value of segmentation map. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, size=0, value=0, mask_value=0, *, order=None): | |||
@@ -350,18 +349,18 @@ class Pad(VisionTransform): | |||
class Resize(VisionTransform): | |||
r""" | |||
Resize the input data. | |||
:param output_size: target size of image, with (height, width) shape. | |||
:param interpolation: interpolation method. All methods are listed below: | |||
* cv2.INTER_NEAREST – a nearest-neighbor interpolation. | |||
* cv2.INTER_LINEAR – a bilinear interpolation (used by default). | |||
* cv2.INTER_AREA – resampling using pixel area relation. | |||
* cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. | |||
* cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. | |||
:param order: the same with :class:`VisionTransform`. | |||
r"""Resize the input data. | |||
Args: | |||
output_size: target size of image, with (height, width) shape. | |||
interpolation: interpolation method. All methods are listed below: | |||
* cv2.INTER_NEAREST – a nearest-neighbor interpolation. | |||
* cv2.INTER_LINEAR – a bilinear interpolation (used by default). | |||
* cv2.INTER_AREA – resampling using pixel area relation. | |||
* cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. | |||
* cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None): | |||
@@ -410,9 +409,7 @@ class Resize(VisionTransform): | |||
class ShortestEdgeResize(VisionTransform): | |||
r""" | |||
Resize the input data with specified shortset edge. | |||
""" | |||
r"""Resize the input data with specified shortset edge.""" | |||
def __init__( | |||
self, | |||
@@ -481,11 +478,11 @@ class ShortestEdgeResize(VisionTransform): | |||
class RandomResize(VisionTransform): | |||
r""" | |||
Resize the input data randomly. | |||
r"""Resize the input data randomly. | |||
:param scale_range: range of scaling. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
scale_range: range of scaling. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None): | |||
@@ -526,15 +523,15 @@ class RandomResize(VisionTransform): | |||
class RandomCrop(VisionTransform): | |||
r""" | |||
Crop the input data randomly. Before applying the crop transform, | |||
r"""Crop the input data randomly. Before applying the crop transform, | |||
pad the image first. If target size is still bigger than the size of | |||
padded image, pad the image size to target size. | |||
:param output_size: target size of output image, with (height, width) shape. | |||
:param padding_size: the same with `size` in ``Pad``. | |||
:param padding_value: the same with `value` in ``Pad``. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
output_size: target size of output image, with (height, width) shape. | |||
padding_size: the same with `size` in ``Pad``. | |||
padding_value: the same with `value` in ``Pad``. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__( | |||
@@ -584,16 +581,16 @@ class RandomCrop(VisionTransform): | |||
class RandomResizedCrop(VisionTransform): | |||
r""" | |||
Crop the input data to random size and aspect ratio. | |||
r"""Crop the input data to random size and aspect ratio. | |||
A crop of random size (default: of 0.08 to 1.0) of the original size and a random | |||
aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | |||
After applying crop transfrom, the input data will be resized to given size. | |||
:param output_size: target size of output image, with (height, width) shape. | |||
:param scale_range: range of size of the origin size cropped. Default: (0.08, 1.0) | |||
:param ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
output_size: target size of output image, with (height, width) shape. | |||
scale_range: range of size of the origin size cropped. Default: (0.08, 1.0) | |||
ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__( | |||
@@ -674,11 +671,11 @@ class RandomResizedCrop(VisionTransform): | |||
class CenterCrop(VisionTransform): | |||
r""" | |||
Crops the given the input data at the center. | |||
r"""Crops the given the input data at the center. | |||
:param output_size: target size of output image, with (height, width) shape. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
output_size: target size of output image, with (height, width) shape. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, output_size, *, order=None): | |||
@@ -718,11 +715,11 @@ class CenterCrop(VisionTransform): | |||
class RandomHorizontalFlip(VisionTransform): | |||
r""" | |||
Horizontally flip the input data randomly with a given probability. | |||
r"""Horizontally flip the input data randomly with a given probability. | |||
:param p: probability of the input data being flipped. Default: 0.5 | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
p: probability of the input data being flipped. Default: 0.5 | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, prob: float = 0.5, *, order=None): | |||
@@ -751,11 +748,11 @@ class RandomHorizontalFlip(VisionTransform): | |||
class RandomVerticalFlip(VisionTransform): | |||
r""" | |||
Vertically flip the input data randomly with a given probability. | |||
r"""Vertically flip the input data randomly with a given probability. | |||
:param p: probability of the input data being flipped. Default: 0.5 | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
p: probability of the input data being flipped. Default: 0.5 | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, prob: float = 0.5, *, order=None): | |||
@@ -784,15 +781,15 @@ class RandomVerticalFlip(VisionTransform): | |||
class Normalize(VisionTransform): | |||
r""" | |||
Normalize the input data with mean and standard deviation. | |||
r"""Normalize the input data with mean and standard deviation. | |||
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, | |||
this transform will normalize each channel of the input data. | |||
``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | |||
:param mean: sequence of means for each channel. | |||
:param std: sequence of standard deviations for each channel. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
mean: sequence of means for each channel. | |||
std: sequence of standard deviations for each channel. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, mean=0.0, std=1.0, *, order=None): | |||
@@ -811,13 +808,13 @@ class Normalize(VisionTransform): | |||
class GaussianNoise(VisionTransform): | |||
r""" | |||
Add random gaussian noise to the input data. | |||
r"""Add random gaussian noise to the input data. | |||
Gaussian noise is generated with given mean and std. | |||
:param mean: Gaussian mean used to generate noise. | |||
:param std: Gaussian standard deviation used to generate noise. | |||
:param order: the same with :class:`VisionTransform` | |||
Args: | |||
mean: Gaussian mean used to generate noise. | |||
std: Gaussian standard deviation used to generate noise. | |||
order: the same with :class:`VisionTransform` | |||
""" | |||
def __init__(self, mean=0.0, std=1.0, *, order=None): | |||
@@ -839,12 +836,12 @@ class GaussianNoise(VisionTransform): | |||
class BrightnessTransform(VisionTransform): | |||
r""" | |||
Adjust brightness of the input data. | |||
r"""Adjust brightness of the input data. | |||
:param value: how much to adjust the brightness. Can be any | |||
non negative number. 0 gives the original image. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
value: how much to adjust the brightness. Can be any | |||
non negative number. 0 gives the original image. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, value, *, order=None): | |||
@@ -871,12 +868,12 @@ class BrightnessTransform(VisionTransform): | |||
class ContrastTransform(VisionTransform): | |||
r""" | |||
Adjust contrast of the input data. | |||
r"""Adjust contrast of the input data. | |||
:param value: how much to adjust the contrast. Can be any | |||
non negative number. 0 gives the original image. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
value: how much to adjust the contrast. Can be any | |||
non negative number. 0 gives the original image. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, value, *, order=None): | |||
@@ -903,12 +900,12 @@ class ContrastTransform(VisionTransform): | |||
class SaturationTransform(VisionTransform): | |||
r""" | |||
Adjust saturation of the input data. | |||
r"""Adjust saturation of the input data. | |||
:param value: how much to adjust the saturation. Can be any | |||
non negative number. 0 gives the original image. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
value: how much to adjust the saturation. Can be any | |||
non negative number. 0 gives the original image. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, value, *, order=None): | |||
@@ -935,12 +932,12 @@ class SaturationTransform(VisionTransform): | |||
class HueTransform(VisionTransform): | |||
r""" | |||
Adjust hue of the input data. | |||
r"""Adjust hue of the input data. | |||
:param value: how much to adjust the hue. Can be any number | |||
between 0 and 0.5, 0 gives the original image. | |||
:param order: the same with :class:`VisionTransform`. | |||
Args: | |||
value: how much to adjust the hue. Can be any number | |||
between 0 and 0.5, 0 gives the original image. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, value, *, order=None): | |||
@@ -974,22 +971,22 @@ class HueTransform(VisionTransform): | |||
class ColorJitter(VisionTransform): | |||
r""" | |||
Randomly change the brightness, contrast, saturation and hue of an image. | |||
:param brightness: how much to jitter brightness. | |||
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | |||
or the given [min, max]. Should be non negative numbers. | |||
:param contrast: how much to jitter contrast. | |||
Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] | |||
or the given [min, max]. Should be non negative numbers. | |||
:param saturation: how much to jitter saturation. | |||
Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] | |||
or the given [min, max]. Should be non negative numbers. | |||
:param hue: how much to jitter hue. | |||
Chosen uniformly from [-hue, hue] or the given [min, max]. | |||
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. | |||
:param order: the same with :class:`VisionTransform`. | |||
r"""Randomly change the brightness, contrast, saturation and hue of an image. | |||
Args: | |||
brightness: how much to jitter brightness. | |||
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | |||
or the given [min, max]. Should be non negative numbers. | |||
contrast: how much to jitter contrast. | |||
Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] | |||
or the given [min, max]. Should be non negative numbers. | |||
saturation: how much to jitter saturation. | |||
Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] | |||
or the given [min, max]. Should be non negative numbers. | |||
hue: how much to jitter hue. | |||
Chosen uniformly from [-hue, hue] or the given [min, max]. | |||
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. | |||
order: the same with :class:`VisionTransform`. | |||
""" | |||
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None): | |||
@@ -1014,11 +1011,10 @@ class ColorJitter(VisionTransform): | |||
class Lighting(VisionTransform): | |||
r""" | |||
Apply AlexNet-Style "lighting" augmentation to input data. | |||
r"""Apply AlexNet-Style "lighting" augmentation to input data. | |||
Input images are assumed to have 'RGB' channel order. | |||
The degree of color jittering is randomly sampled via a normal distribution, | |||
with standard deviation given by the scale parameter. | |||
""" | |||
@@ -54,10 +54,10 @@ _device_type_set = {"cpu", "gpu", "xpu", "rocm"} | |||
def get_device_count(device_type: str) -> int: | |||
""" | |||
Gets number of devices installed on this system. | |||
r"""Gets number of devices installed on this system. | |||
:param device_type: device type, one of 'gpu' or 'cpu' | |||
Args: | |||
device_type: device type, one of 'gpu' or 'cpu' | |||
""" | |||
assert device_type in _device_type_set, "device must be one of {}".format( | |||
_device_type_set | |||
@@ -67,73 +67,59 @@ def get_device_count(device_type: str) -> int: | |||
def is_cuda_available() -> bool: | |||
""" | |||
Returns whether cuda device is available on this system. | |||
""" | |||
r"""Returns whether cuda device is available on this system.""" | |||
t = _str2device_type("gpu") | |||
return CompNode._get_device_count(t, False) > 0 | |||
def is_cambricon_available() -> bool: | |||
""" | |||
Returns whether cambricon device is available on this system. | |||
""" | |||
r"""Returns whether cambricon device is available on this system.""" | |||
t = _str2device_type("cambricon") | |||
return CompNode._get_device_count(t, False) > 0 | |||
def is_atlas_available() -> bool: | |||
""" | |||
Returns whether atlas device is available on this system. | |||
""" | |||
r"""Returns whether atlas device is available on this system.""" | |||
t = _str2device_type("atlas") | |||
return CompNode._get_device_count(t, False) > 0 | |||
def is_rocm_available() -> bool: | |||
"""Returns whether rocm device is available on this system. | |||
""" | |||
r"""Returns whether rocm device is available on this system.""" | |||
t = _str2device_type("rocm") | |||
return CompNode._get_device_count(t, False) > 0 | |||
def set_default_device(device: str = "xpux"): | |||
r""" | |||
Sets default computing node. | |||
:param device: default device type. The type can be 'cpu0', 'cpu1', etc., | |||
or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | |||
'cpux' and 'gpux' can also be used to specify any number of cpu or gpu devices. | |||
'multithread' device type is avaliable when inference, which implements | |||
multi-threading parallelism at the operator level. For example, | |||
'multithread4' will compute with 4 threads. | |||
The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available. | |||
It can also be set by environment variable `MGE_DEFAULT_DEVICE`. | |||
r"""Sets default computing node. | |||
Args: | |||
device: default device type. | |||
Note: | |||
* The type can be 'cpu0', 'cpu1', etc., or 'gpu0', 'gpu1', etc., | |||
to specify the particular CPU or GPU to use. | |||
* 'cpux' and 'gpux' can also be used to specify any number of CPU or GPU devices. | |||
* The default value is 'xpux' to specify any device available. | |||
* The priority of using GPU is higher when both GPU and CPU are available. | |||
* 'multithread' device type is avaliable when inference, | |||
which implements multi-threading parallelism at the operator level. | |||
For example, 'multithread4' will compute with 4 threads. | |||
* It can also be set by environment variable ``MGE_DEFAULT_DEVICE``. | |||
""" | |||
assert _valid_device(device), "Invalid device name {}".format(device) | |||
CompNode._set_default_device(device) | |||
def get_default_device() -> str: | |||
r""" | |||
Gets default computing node. | |||
r"""Gets default computing node. | |||
It returns the value set by :func:`~.set_default_device`. | |||
""" | |||
return CompNode._get_default_device() | |||
def get_mem_status_bytes(device: Optional[str] = None): | |||
r""" | |||
Get total and free memory on the computing device in bytes. | |||
""" | |||
r"""Get total and free memory on the computing device in bytes.""" | |||
if device is None: | |||
device = get_default_device() | |||
tot, free = CompNode(device).get_mem_status_bytes | |||
@@ -150,15 +136,17 @@ def set_prealloc_config( | |||
growth_factor=2.0, | |||
device_type=DeviceType.CUDA, | |||
): | |||
""" | |||
Specifies how to pre-allocate from raw device allocator. | |||
:param alignment: specifies the alignment in bytes. | |||
:param min_req: min request size in bytes. | |||
:param max_overhead: max overhead above required size in bytes. | |||
:param growth_factor: `request size / cur allocated` | |||
:param device_type: the device type | |||
r"""Specifies how to pre-allocate from raw device allocator. | |||
Args: | |||
alignment: specifies the alignment in bytes. | |||
min_req: min request size in bytes. | |||
max_overhead: max overhead above required size in bytes. | |||
growth_factor: request size / cur allocated` | |||
device_type: the device type | |||
alignment: int: | |||
min_req: int: | |||
max_overhead: int: | |||
""" | |||
assert alignment > 0 | |||
assert min_req > 0 | |||
@@ -31,17 +31,15 @@ from .server import Client, Server | |||
@mproperty | |||
def backend(mod): | |||
r""" | |||
Get or set backend of collective communication. | |||
r"""Get or set backend of collective communication. | |||
Available backends are ['nccl', 'shm', 'rccl'] | |||
Examples: | |||
.. code-block:: | |||
import megengine.distributed as dist | |||
dist.backend = "nccl" | |||
.. code-block:: | |||
import megengine.distributed as dist | |||
dist.backend = "nccl" | |||
""" | |||
assert group._sd, "please call init_process_group first" | |||
return group._sd.backend | |||
@@ -50,7 +50,7 @@ def _backend(): | |||
def collective_comm(inp, mode, group, device): | |||
"""Helper function for applying collective communication functions.""" | |||
r"""Helper function for applying collective communication functions.""" | |||
assert isinstance(group, Group) | |||
if group is None: | |||
return inp | |||
@@ -158,8 +158,7 @@ class _ReduceSum(Function): | |||
def reduce_sum( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
) -> Tensor: | |||
r""" | |||
Reduce tensor data across the specified group by sum. | |||
r"""Reduce tensor data across the specified group by sum. | |||
Only root process will receive the final result. | |||
Args: | |||
@@ -176,22 +175,20 @@ def reduce_sum( | |||
Reduced tensor if in root process, None in other processes. | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = reduce_sum(input) | |||
# Rank 0 # output: Tensor([1]) | |||
# Rank 1 # output: None | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) # first rank is root | |||
output = reduce_sum(input, group) | |||
# Rank 0 # output: None | |||
# Rank 1 # output: Tensor([1]) | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = reduce_sum(input) | |||
# Rank 0 # output: Tensor([1]) | |||
# Rank 1 # output: None | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) # first rank is root | |||
output = reduce_sum(input, group) | |||
# Rank 0 # output: None | |||
# Rank 1 # output: Tensor([1]) | |||
""" | |||
op = _ReduceSum(group, device) | |||
(out,) = apply(op, inp) | |||
@@ -222,8 +219,7 @@ class _Broadcast(Function): | |||
def broadcast( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
) -> Tensor: | |||
r""" | |||
Broadcast tensor data from root process to others. | |||
r"""Broadcast tensor data from root process to others. | |||
Args: | |||
inp: Input tensor. | |||
@@ -240,21 +236,20 @@ def broadcast( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = broadcast(input) | |||
# Rank 0 # output: Tensor([0]) | |||
# Rank 1 # output: Tensor([0]) | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) # first rank is root | |||
output = broadcast(input, group) | |||
# Rank 0 # output: Tensor([1]) | |||
# Rank 1 # output: Tensor([1]) | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = broadcast(input) | |||
# Rank 0 # output: Tensor([0]) | |||
# Rank 1 # output: Tensor([0]) | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) # first rank is root | |||
output = broadcast(input, group) | |||
# Rank 0 # output: Tensor([1]) | |||
# Rank 1 # output: Tensor([1]) | |||
""" | |||
shape, dtype = _bcast_shape_dtype(group, inp) | |||
if group.rank != 0: | |||
@@ -278,8 +273,7 @@ def _bcast_param( | |||
def all_gather( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0, | |||
) -> Tensor: | |||
r""" | |||
Gather tensors across the specified group and concat them at first dimension. | |||
r"""Gather tensors across the specified group and concat them at first dimension. | |||
Args: | |||
inp: Input tensor. | |||
@@ -298,21 +292,20 @@ def all_gather( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = all_gather(input) | |||
# Rank 0 # output: Tensor([0 1]) | |||
# Rank 1 # output: Tensor([0 1]) | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) | |||
output = all_gather(input, group) | |||
# Rank 0 # output: Tensor([1 0]) | |||
# Rank 1 # output: Tensor([1 0]) | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = all_gather(input) | |||
# Rank 0 # output: Tensor([0 1]) | |||
# Rank 1 # output: Tensor([0 1]) | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) | |||
output = all_gather(input, group) | |||
# Rank 0 # output: Tensor([1 0]) | |||
# Rank 1 # output: Tensor([1 0]) | |||
""" | |||
mode = CollectiveComm.Mode.ALL_GATHER | |||
out = collective_comm(inp, mode, group, device) | |||
@@ -338,8 +331,7 @@ def all_gather( | |||
def reduce_scatter_sum( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0 | |||
) -> Tensor: | |||
r""" | |||
Reduce tensors across the specified group by sum and split them at first dimension. | |||
r"""Reduce tensors across the specified group by sum and split them at first dimension. | |||
Args: | |||
inp: Input tensor. | |||
@@ -358,21 +350,20 @@ def reduce_scatter_sum( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([0 1]) | |||
# Rank 0 # input: Tensor([0 1]) | |||
# Rank 1 # input: Tensor([0 1]) | |||
output = reduce_scatter_sum(input) | |||
# Rank 0 # output: Tensor([0]) | |||
# Rank 1 # output: Tensor([2]) | |||
.. code-block:: | |||
input = Tensor([0 1]) | |||
group = Group([1, 0]) | |||
output = reduce_scatter_sum(input, group) | |||
# Rank 0 # output: Tensor([2]) | |||
# Rank 1 # output: Tensor([0]) | |||
input = Tensor([0 1]) | |||
# Rank 0 # input: Tensor([0 1]) | |||
# Rank 1 # input: Tensor([0 1]) | |||
output = reduce_scatter_sum(input) | |||
# Rank 0 # output: Tensor([0]) | |||
# Rank 1 # output: Tensor([2]) | |||
input = Tensor([0 1]) | |||
group = Group([1, 0]) | |||
output = reduce_scatter_sum(input, group) | |||
# Rank 0 # output: Tensor([2]) | |||
# Rank 1 # output: Tensor([0]) | |||
""" | |||
group_size = group.size if group is not None else 1 | |||
assert ( | |||
@@ -398,8 +389,7 @@ def reduce_scatter_sum( | |||
def all_reduce_sum( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
) -> Tensor: | |||
r""" | |||
Reduce tensors across the specified group by sum. | |||
r"""Reduce tensors across the specified group by sum. | |||
Args: | |||
inp: Input tensor. | |||
@@ -416,15 +406,14 @@ def all_reduce_sum( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor(rank) | |||
# Rank 0 # input: Tensor(0) | |||
# Rank 1 # input: Tensor(1) | |||
output = all_reduce_sum(input) | |||
# Rank 0 # output: Tensor(1) | |||
# Rank 1 # output: Tensor(1) | |||
.. code-block:: | |||
input = Tensor(rank) | |||
# Rank 0 # input: Tensor(0) | |||
# Rank 1 # input: Tensor(1) | |||
output = all_reduce_sum(input) | |||
# Rank 0 # output: Tensor(1) | |||
# Rank 1 # output: Tensor(1) | |||
""" | |||
mode = CollectiveComm.Mode.ALL_REDUCE_SUM | |||
return collective_comm(inp, mode, group, device) | |||
@@ -433,8 +422,7 @@ def all_reduce_sum( | |||
def all_reduce_max( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
) -> Tensor: | |||
r""" | |||
Reduce tensors across the specified group by max. | |||
r"""Reduce tensors across the specified group by max. | |||
Args: | |||
inp: Input tensor. | |||
@@ -451,15 +439,14 @@ def all_reduce_max( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor(rank) | |||
# Rank 0 # input: Tensor(0) | |||
# Rank 1 # input: Tensor(1) | |||
output = all_reduce_max(input) | |||
# Rank 0 # output: Tensor(1) | |||
# Rank 1 # output: Tensor(1) | |||
.. code-block:: | |||
input = Tensor(rank) | |||
# Rank 0 # input: Tensor(0) | |||
# Rank 1 # input: Tensor(1) | |||
output = all_reduce_max(input) | |||
# Rank 0 # output: Tensor(1) | |||
# Rank 1 # output: Tensor(1) | |||
""" | |||
mode = CollectiveComm.Mode.ALL_REDUCE_MAX | |||
return collective_comm(inp, mode, group, device) | |||
@@ -468,8 +455,7 @@ def all_reduce_max( | |||
def all_reduce_min( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, | |||
) -> Tensor: | |||
r""" | |||
Reduce tensors across the specified group by min. | |||
r"""Reduce tensors across the specified group by min. | |||
Args: | |||
inp: Input tensor. | |||
@@ -486,15 +472,14 @@ def all_reduce_min( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor(rank) | |||
# Rank 0 # input: Tensor(0) | |||
# Rank 1 # input: Tensor(1) | |||
output = all_reduce_min(input) | |||
# Rank 0 # output: Tensor(0) | |||
# Rank 1 # output: Tensor(0) | |||
.. code-block:: | |||
input = Tensor(rank) | |||
# Rank 0 # input: Tensor(0) | |||
# Rank 1 # input: Tensor(1) | |||
output = all_reduce_min(input) | |||
# Rank 0 # output: Tensor(0) | |||
# Rank 1 # output: Tensor(0) | |||
""" | |||
mode = CollectiveComm.Mode.ALL_REDUCE_MIN | |||
return collective_comm(inp, mode, group, device) | |||
@@ -520,8 +505,7 @@ class _Gather(Function): | |||
def gather( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0, | |||
) -> Tensor: | |||
r""" | |||
Gather tensors across the specified group. | |||
r"""Gather tensors across the specified group. | |||
Only root process will receive the final result. | |||
Args: | |||
@@ -534,27 +518,23 @@ def gather( | |||
Specify "gpu0:1" to execute this operator on diffrent cuda stream, | |||
1 is stream id, and default stream id is 0. | |||
axis: The concat axis for collective_comm result | |||
The default axis is 0 | |||
Returns: | |||
Result tensor if in root process, None if in other process | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = gather(input) | |||
# Rank 0 # output: Tensor([0 1]) | |||
# Rank 1 # output: None | |||
.. code-block:: | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) # first rank is root | |||
output = gather(input, group) | |||
# Rank 0 # output: None | |||
# Rank 1 # output: Tensor([1 0]) | |||
input = Tensor([rank]) | |||
# Rank 0 # input: Tensor([0]) | |||
# Rank 1 # input: Tensor([1]) | |||
output = gather(input) | |||
# Rank 0 # output: Tensor([0 1]) | |||
# Rank 1 # output: None | |||
input = Tensor([rank]) | |||
group = Group([1, 0]) # first rank is root | |||
output = gather(input, group) | |||
# Rank 0 # output: None | |||
# Rank 1 # output: Tensor([1 0]) | |||
""" | |||
assert ( | |||
axis < inp.ndim | |||
@@ -607,8 +587,7 @@ class _Scatter(Function): | |||
def scatter( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = None, axis=0, | |||
) -> Tensor: | |||
r""" | |||
Split tensor in root process at first dimension. | |||
r"""Split tensor in root process at first dimension. | |||
Args: | |||
inp: Input tensor. | |||
@@ -627,21 +606,20 @@ def scatter( | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([0 1]) + rank*2 | |||
# Rank 0 # input: Tensor([0 1]) | |||
# Rank 1 # input: Tensor([2 3]) | |||
output = scatter(input) | |||
# Rank 0 # output: Tensor([0]) | |||
# Rank 1 # output: Tensor([1]) | |||
.. code-block:: | |||
input = Tensor([0 1]) + rank*2 | |||
group = Group([1, 0]) # first rank is root | |||
output = scatter(input, group) | |||
# Rank 0 # output: Tensor([3]) | |||
# Rank 1 # output: Tensor([2]) | |||
input = Tensor([0 1]) + rank*2 | |||
# Rank 0 # input: Tensor([0 1]) | |||
# Rank 1 # input: Tensor([2 3]) | |||
output = scatter(input) | |||
# Rank 0 # output: Tensor([0]) | |||
# Rank 1 # output: Tensor([1]) | |||
input = Tensor([0 1]) + rank*2 | |||
group = Group([1, 0]) # first rank is root | |||
output = scatter(input, group) | |||
# Rank 0 # output: Tensor([3]) | |||
# Rank 1 # output: Tensor([2]) | |||
""" | |||
shape, dtype = _bcast_shape_dtype(group, inp) | |||
if group.rank != 0: | |||
@@ -680,8 +658,7 @@ def all_to_all( | |||
split_axis: int = 0, | |||
concat_axis: int = 0, | |||
) -> Tensor: | |||
r""" | |||
Each process scatter input tensor to all processes and return gathered tensor. | |||
r"""Each process scatter input tensor to all processes and return gathered tensor. | |||
Args: | |||
inp: Input tensor. | |||
@@ -694,29 +671,26 @@ def all_to_all( | |||
1 is stream id, and default stream id is 0. | |||
split_axis: The axis that collectivecomm will split data | |||
the default axis is 0 | |||
split_axis: The axis that collectivecomm will concat data | |||
the default axis is 0 | |||
Returns: | |||
Result tensor. | |||
Examples: | |||
.. code-block:: | |||
input = Tensor([0 1]) + rank*2 | |||
# Rank 0 # input: Tensor([0 1]) | |||
# Rank 1 # input: Tensor([2 3]) | |||
output = all_to_all(input) | |||
# Rank 0 # output: Tensor([0 2]) | |||
# Rank 1 # output: Tensor([1 3]) | |||
.. code-block:: | |||
input = Tensor([0 1]) + rank*2 | |||
group = Group([1, 0]) | |||
output = all_to_all(input, group) | |||
# Rank 0 # output: Tensor([0 3]) | |||
# Rank 1 # output: Tensor([2 1]) | |||
input = Tensor([0 1]) + rank*2 | |||
# Rank 0 # input: Tensor([0 1]) | |||
# Rank 1 # input: Tensor([2 3]) | |||
output = all_to_all(input) | |||
# Rank 0 # output: Tensor([0 2]) | |||
# Rank 1 # output: Tensor([1 3]) | |||
input = Tensor([0 1]) + rank*2 | |||
group = Group([1, 0]) | |||
output = all_to_all(input, group) | |||
# Rank 0 # output: Tensor([0 3]) | |||
# Rank 1 # output: Tensor([2 1]) | |||
""" | |||
group_size = group.size if group is not None else 1 | |||
assert ( | |||
@@ -805,8 +779,7 @@ class _RemoteRecv(Function): | |||
def remote_send(inp: Tensor, dest_rank: int): | |||
r""" | |||
Send tensor to another process. | |||
r"""Send tensor to another process. | |||
Args: | |||
inp: Tensor to send. | |||
@@ -816,17 +789,15 @@ def remote_send(inp: Tensor, dest_rank: int): | |||
None. | |||
Examples: | |||
.. code-block:: | |||
if rank == 0: | |||
data = mge.tensor(1) | |||
# Tensor(1) | |||
F.distributed.remote_send(data, 1) # return None | |||
else: | |||
data = F.distributed.remote_recv(0) | |||
# Tensor(1) | |||
.. code-block:: | |||
if rank == 0: | |||
data = mge.tensor(1) | |||
# Tensor(1) | |||
F.distributed.remote_send(data, 1) # return None | |||
else: | |||
data = F.distributed.remote_recv(0) | |||
# Tensor(1) | |||
""" | |||
group = _SendRecvGroup(get_rank(), dest_rank) | |||
_bcast_shape_dtype(group, inp) | |||
@@ -844,8 +815,7 @@ def remote_send(inp: Tensor, dest_rank: int): | |||
def remote_recv(src_rank: int, device: Optional[str] = None, inp=None) -> Tensor: | |||
r""" | |||
Receive a tensor from another process. | |||
r"""Receive a tensor from another process. | |||
Args: | |||
src_rank: Rank of source process. | |||
@@ -862,14 +832,13 @@ def remote_recv(src_rank: int, device: Optional[str] = None, inp=None) -> Tensor | |||
.. code-block:: | |||
if rank == 0: | |||
data = mge.tensor(1) | |||
# Tensor(1) | |||
F.distributed.remote_send(data, 1) # return None | |||
else: | |||
data = F.distributed.remote_recv(0) | |||
# Tensor(1) | |||
if rank == 0: | |||
data = mge.tensor(1) | |||
# Tensor(1) | |||
F.distributed.remote_send(data, 1) # return None | |||
else: | |||
data = F.distributed.remote_recv(0) | |||
# Tensor(1) | |||
""" | |||
group = _SendRecvGroup(src_rank, get_rank()) | |||
shape, dtype = _bcast_shape_dtype(group, None) | |||
@@ -36,15 +36,13 @@ _sd = None | |||
class Group: | |||
r""" | |||
Include ranked nodes running collective communication (See :mod:`~.functional.distributed`). | |||
r"""Include ranked nodes running collective communication (See :mod:`~.functional.distributed`). | |||
By default collectives operate on the default group (also called ``WORLD``) | |||
and require all processes to enter the distributed function call. | |||
By default collectives operate on the default group (also called ``WORLD``) | |||
and require all processes to enter the distributed function call. | |||
:param proc_ranks: rank list of the group, the first one is root rank. | |||
Args: | |||
proc_ranks: rank list of the group, the first one is root rank. | |||
""" | |||
def __init__(self, proc_ranks): | |||
@@ -116,15 +114,15 @@ def init_process_group( | |||
backend: Optional[str] = "auto", | |||
device_type: str = "xpu", | |||
) -> None: | |||
""" | |||
Initialize the distributed process group and specify the device used in the current process | |||
:param master_ip: ip address of the master node. | |||
:param port: port available for all processes to communicate. | |||
:param world_size: total number of processes participating in the job. | |||
:param rank: rank of the current process. | |||
:param device: the GPU device id to bind this process to. | |||
:param backend: communicator backend, currently support 'nccl' and 'shm'. | |||
r"""Initialize the distributed process group and specify the device used in the current process | |||
Args: | |||
master_ip: ip address of the master node. | |||
port: port available for all processes to communicate. | |||
world_size: total number of processes participating in the job. | |||
rank: rank of the current process. | |||
device: the GPU device id to bind this process to. | |||
backend: communicator backend, currently support 'nccl' and 'shm'. | |||
""" | |||
physical_device_type = what_is_xpu() if device_type == "xpu" else device_type | |||
if not isinstance(master_ip, str): | |||
@@ -180,10 +178,10 @@ def _set_machine_ranks(ranks) -> None: | |||
@contextmanager | |||
def override_backend(new_backend: str): | |||
""" | |||
Override distributed backend | |||
r"""Override distributed backend | |||
:param new_backend: communicator backend set in this context. | |||
Args: | |||
new_backend: communicator backend set in this context. | |||
""" | |||
global _sd | |||
assert _sd, "please call init_process_group first" | |||
@@ -196,51 +194,51 @@ def override_backend(new_backend: str): | |||
def is_distributed() -> bool: | |||
"""Return True if the distributed process group has been initialized.""" | |||
r"""Return True if the distributed process group has been initialized.""" | |||
return _sd is not None | |||
def get_rank() -> int: | |||
"""Get the rank of the current process.""" | |||
r"""Get the rank of the current process.""" | |||
return _sd.proc_rank if _sd is not None else 0 | |||
def get_world_size() -> int: | |||
"""Get the total number of processes participating in the job.""" | |||
r"""Get the total number of processes participating in the job.""" | |||
return _sd.world_size if _sd is not None else 1 | |||
def get_backend() -> str: | |||
"""Get the backend str.""" | |||
r"""Get the backend str.""" | |||
assert _sd is not None, "please call init_process_group first" | |||
return _sd.backend if _sd is not None else None | |||
def get_py_server_addr() -> Tuple[str, int]: | |||
"""Get master_ip and port of python XML RPC server.""" | |||
r"""Get master_ip and port of python XML RPC server.""" | |||
assert _sd is not None, "please call init_process_group first" | |||
return _sd.master_ip, _sd.py_server_port | |||
def get_mm_server_addr() -> Tuple[str, int]: | |||
"""Get master_ip and port of C++ mm_server.""" | |||
r"""Get master_ip and port of C++ mm_server.""" | |||
assert _sd is not None, "please call init_process_group first" | |||
return _sd.master_ip, _sd.mm_server_port | |||
def get_client() -> Client: | |||
"""Get client of python XML RPC server.""" | |||
r"""Get client of python XML RPC server.""" | |||
assert _sd is not None, "please call init_process_group first" | |||
return _sd.client | |||
def new_group(proc_ranks: List[int]) -> Group: | |||
"""Build a subgroup containing certain ranks.""" | |||
r"""Build a subgroup containing certain ranks.""" | |||
return Group(proc_ranks) | |||
def group_barrier(group: Group = WORLD) -> None: | |||
"""Block until all ranks in the group reach this barrier.""" | |||
r"""Block until all ranks in the group reach this barrier.""" | |||
# if running with single node, skip it | |||
if _sd is None: | |||
return | |||
@@ -28,39 +28,40 @@ from .group import WORLD, Group, group_barrier, is_distributed, override_backend | |||
def param_pack_split(inp: Tensor, offsets: list, shapes: list): | |||
r""" | |||
Returns split tensor to tensor list as offsets and shapes described, | |||
only used for ``parampack``. | |||
r"""Returns split tensor to tensor list as offsets and shapes described, | |||
only used for ``parampack``. | |||
:param inp: input tensor. | |||
:param offsets: offsets of outputs, length of `2 * n`, | |||
Args: | |||
inp: input tensor. | |||
offsets: offsets of outputs, length of `2 * n`, | |||
while n is tensor nums you want to split, | |||
format `[begin0, end0, begin1, end1]`. | |||
:param shapes: tensor shapes of outputs. | |||
:return: splitted tensors. | |||
shapes: tensor shapes of outputs. | |||
Examples: | |||
Returns: | |||
splitted tensors. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
from megengine import tensor | |||
from megengine.distributed.helper import param_pack_split | |||
.. testcode:: | |||
a = tensor(np.ones((10,), np.int32)) | |||
b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)]) | |||
print(b.numpy()) | |||
print(c.numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
from megengine.distributed.helper import param_pack_split | |||
Outputs: | |||
a = tensor(np.ones((10,), np.int32)) | |||
b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)]) | |||
print(b.numpy()) | |||
print(c.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[1] | |||
[[1 1 1] | |||
[1 1 1] | |||
[1 1 1]] | |||
.. testoutput:: | |||
[1] | |||
[[1 1 1] | |||
[1 1 1] | |||
[1 1 1]] | |||
""" | |||
op = ParamPackSplit() | |||
op.offsets = offsets | |||
@@ -73,36 +74,37 @@ def param_pack_split(inp: Tensor, offsets: list, shapes: list): | |||
def param_pack_concat(inps: list, offsets: Tensor, offsets_val: list): | |||
r""" | |||
Returns concated tensor, only used for ``parampack``. | |||
r"""Returns concated tensor, only used for ``parampack``. | |||
:param inps: input tensors. | |||
:param offsets: device value of offsets. | |||
:param offsets_val: offsets of inputs, length of `2 * n`, | |||
Args: | |||
inps: input tensors. | |||
offsets: device value of offsets. | |||
offsets_val: offsets of inputs, length of `2 * n`, | |||
format `[begin0, end0, begin1, end1]`. | |||
:return: concated tensor. | |||
Examples: | |||
Returns: | |||
concated tensor. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
from megengine import tensor | |||
from megengine.distributed.helper import param_pack_concat | |||
.. testcode:: | |||
a = tensor(np.ones((1,), np.int32)) | |||
b = tensor(np.ones((3, 3), np.int32)) | |||
offsets_val = [0, 1, 1, 10] | |||
offsets = tensor(offsets_val, np.int32) | |||
c = param_pack_concat([a, b], offsets, offsets_val) | |||
print(c.numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
from megengine.distributed.helper import param_pack_concat | |||
Outputs: | |||
a = tensor(np.ones((1,), np.int32)) | |||
b = tensor(np.ones((3, 3), np.int32)) | |||
offsets_val = [0, 1, 1, 10] | |||
offsets = tensor(offsets_val, np.int32) | |||
c = param_pack_concat([a, b], offsets, offsets_val) | |||
print(c.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[1 1 1 1 1 1 1 1 1 1] | |||
.. testoutput:: | |||
[1 1 1 1 1 1 1 1 1 1] | |||
""" | |||
op = ParamPackConcat() | |||
op.offsets = offsets_val | |||
@@ -165,9 +167,9 @@ class TensorFuture(Future): | |||
def synchronized(func: Callable): | |||
r"""Decorator. Decorated function will synchronize when finished. | |||
Specifically, we use this to prevent data race during hub.load | |||
""" | |||
Decorator. Decorated function will synchronize when finished. | |||
Specifically, we use this to prevent data race during hub.load""" | |||
@functools.wraps(func) | |||
def wrapper(*args, **kwargs): | |||
@@ -199,23 +201,23 @@ get_device_count_by_fork = deprecated_func( | |||
def bcast_list_(inps: list, group: Group = WORLD): | |||
""" | |||
Broadcast tensors between given group. | |||
r"""Broadcast tensors between given group. | |||
:param inps: input tensors. | |||
:param group: communication group. | |||
Args: | |||
inps: input tensors. | |||
group: communication group. | |||
""" | |||
for inp in inps: | |||
inp._reset(_bcast_param(inp, group)) | |||
class AllreduceCallback: | |||
""" | |||
Allreduce Callback with tensor fusion optimization. | |||
r"""Allreduce Callback with tensor fusion optimization. | |||
:param reduce_method: the method to reduce gradiants. | |||
:param group: communication group. | |||
:param backend: override distributed backend in allreduce | |||
Args: | |||
reduce_method: the method to reduce gradiants. | |||
group: communication group. | |||
backend: override distributed backend in allreduce | |||
""" | |||
def __init__(self, reduce_method: str, group: Group = WORLD, backend: str = None): | |||
@@ -39,7 +39,7 @@ def _run_wrapped( | |||
queue: mp.Queue, | |||
machine_ranks: list, | |||
): | |||
"""Init distributed process group and run wrapped function.""" | |||
r"""Init distributed process group and run wrapped function.""" | |||
_check_device_initialized(device_type, dev) | |||
init_process_group( | |||
master_ip=master_ip, | |||
@@ -64,15 +64,16 @@ def _run_wrapped( | |||
class launcher: | |||
"""Decorator for launching multiple processes in single-machine multi-gpu training. | |||
:param func: the function you want to launch in distributed mode. | |||
:param n_gpus: how many devices each node. | |||
:param world_size: how many devices totally. | |||
:param rank_start: start number for rank. | |||
:param master_ip: ip address for master node (where the rank 0 is). | |||
:param port: server port for distributed server. | |||
:param backend: set default collective communication backend. | |||
r"""Decorator for launching multiple processes in single-machine multi-gpu training. | |||
Args: | |||
func: the function you want to launch in distributed mode. | |||
n_gpus: how many devices each node. | |||
world_size: how many devices totally. | |||
rank_start: start number for rank. | |||
master_ip: ip address for master node (where the rank 0 is). | |||
port: server port for distributed server. | |||
backend: set default collective communication backend. | |||
""" | |||
def __new__(cls, *args, **kwargs): | |||
@@ -20,11 +20,11 @@ from ..utils.future import Future | |||
class Methods: | |||
""" | |||
Distributed Server Method. | |||
r"""Distributed Server Method. | |||
Used for exchange information between distributed nodes. | |||
:param mm_server_port: multiple machine rpc server port. | |||
Args: | |||
mm_server_port: multiple machine rpc server port. | |||
""" | |||
def __init__(self, mm_server_port): | |||
@@ -39,19 +39,19 @@ class Methods: | |||
self.bcast_dict = {} | |||
def connect(self): | |||
"""Method for checking connection success.""" | |||
r"""Method for checking connection success.""" | |||
return True | |||
def get_mm_server_port(self): | |||
"""Get multiple machine rpc server port.""" | |||
r"""Get multiple machine rpc server port.""" | |||
return self.mm_server_port | |||
def set_is_grad(self, key, is_grad): | |||
""" | |||
Mark send/recv need gradiants by key. | |||
r"""Mark send/recv need gradiants by key. | |||
:param key: key to match send/recv op. | |||
:param is_grad: whether this op need grad. | |||
Args: | |||
key: key to match send/recv op. | |||
is_grad: whether this op need grad. | |||
""" | |||
with self.lock: | |||
future = self.dict_is_grad[key] | |||
@@ -59,10 +59,10 @@ class Methods: | |||
return True | |||
def check_is_grad(self, key): | |||
""" | |||
Check whether send/recv need gradiants. | |||
r"""Check whether send/recv need gradiants. | |||
:param key: key to match send/recv op. | |||
Args: | |||
key: key to match send/recv op. | |||
""" | |||
with self.lock: | |||
future = self.dict_is_grad[key] | |||
@@ -72,11 +72,11 @@ class Methods: | |||
return ret | |||
def set_remote_tracer(self, key, tracer_set): | |||
""" | |||
Set tracer dict for tracing send/recv op. | |||
r"""Set tracer dict for tracing send/recv op. | |||
:param key: key to match send/recv op. | |||
:param tracer_set: valid tracer set. | |||
Args: | |||
key: key to match send/recv op. | |||
tracer_set: valid tracer set. | |||
""" | |||
with self.lock: | |||
future = self.dict_remote_tracer[key] | |||
@@ -84,10 +84,10 @@ class Methods: | |||
return True | |||
def check_remote_tracer(self, key): | |||
""" | |||
Get tracer dict for send/recv op. | |||
r"""Get tracer dict for send/recv op. | |||
:param key: key to match send/recv op. | |||
Args: | |||
key: key to match send/recv op. | |||
""" | |||
with self.lock: | |||
future = self.dict_remote_tracer[key] | |||
@@ -97,11 +97,11 @@ class Methods: | |||
return ret | |||
def group_barrier(self, key, size): | |||
""" | |||
A barrier wait for all group member. | |||
r"""A barrier wait for all group member. | |||
:param key: group key to match each other. | |||
:param size: group size. | |||
Args: | |||
key: group key to match each other. | |||
size: group size. | |||
""" | |||
with self.lock: | |||
self.dict_barrier_counter[key] += 1 | |||
@@ -116,14 +116,14 @@ class Methods: | |||
return True | |||
def user_set(self, key, val): | |||
"""Set user defined key-value pairs across processes.""" | |||
r"""Set user defined key-value pairs across processes.""" | |||
with self.lock: | |||
future = self.user_dict[key] | |||
future.set(val) | |||
return True | |||
def user_get(self, key): | |||
"""Get user defined key-value pairs across processes.""" | |||
r"""Get user defined key-value pairs across processes.""" | |||
with self.lock: | |||
future = self.user_dict[key] | |||
return future.get() | |||
@@ -161,12 +161,12 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): | |||
def _start_server(py_server_port, queue): | |||
""" | |||
Start python distributed server and multiple machine server. | |||
r"""Start python distributed server and multiple machine server. | |||
:param py_server_port: python server port. | |||
:param mm_server_port: multiple machine server port. | |||
:param queue: server port will put in this queue, puts exception when process fails. | |||
Args: | |||
py_server_port: python server port. | |||
mm_server_port: multiple machine server port. | |||
queue: server port will put in this queue, puts exception when process fails. | |||
""" | |||
try: | |||
mm_server_port = create_mm_server("0.0.0.0", 0) | |||
@@ -182,11 +182,11 @@ def _start_server(py_server_port, queue): | |||
class Server: | |||
""" | |||
Distributed Server for distributed training. | |||
r"""Distributed Server for distributed training. | |||
Should be running at master node. | |||
:param port: python server port. | |||
Args: | |||
port: python server port. | |||
""" | |||
def __init__(self, port=0): | |||
@@ -204,11 +204,11 @@ class Server: | |||
class Client: | |||
""" | |||
Distributed Client for distributed training. | |||
r"""Distributed Client for distributed training. | |||
:param master_ip: ip address of master node. | |||
:param port: port of server at master node. | |||
Args: | |||
master_ip: ip address of master node. | |||
port: port of server at master node. | |||
""" | |||
def __init__(self, master_ip, port): | |||
@@ -218,7 +218,7 @@ class Client: | |||
self.bcast_dict = defaultdict(lambda: 0) | |||
def connect(self): | |||
"""Check connection success.""" | |||
r"""Check connection success.""" | |||
while True: | |||
try: | |||
self.proxy = ServerProxy( | |||
@@ -230,62 +230,62 @@ class Client: | |||
time.sleep(1) | |||
def get_mm_server_port(self): | |||
"""Get multiple machine server port.""" | |||
r"""Get multiple machine server port.""" | |||
return self.proxy.get_mm_server_port() | |||
def set_is_grad(self, key, is_grad): | |||
""" | |||
Mark send/recv need gradiants by key. | |||
r"""Mark send/recv need gradiants by key. | |||
:param key: key to match send/recv op. | |||
:param is_grad: whether this op need grad. | |||
Args: | |||
key: key to match send/recv op. | |||
is_grad: whether this op need grad. | |||
""" | |||
self.proxy.set_is_grad(key, is_grad) | |||
def check_is_grad(self, key): | |||
""" | |||
Check whether send/recv need gradiants. | |||
r"""Check whether send/recv need gradiants. | |||
:param key: key to match send/recv op. | |||
Args: | |||
key: key to match send/recv op. | |||
""" | |||
return self.proxy.check_is_grad(key) | |||
def set_remote_tracer(self, key, tracer_set): | |||
""" | |||
Set tracer dict for tracing send/recv op. | |||
r"""Set tracer dict for tracing send/recv op. | |||
:param key: key to match send/recv op. | |||
:param tracer_set: valid tracer set. | |||
Args: | |||
key: key to match send/recv op. | |||
tracer_set: valid tracer set. | |||
""" | |||
self.proxy.set_remote_tracer(key, tracer_set) | |||
def check_remote_tracer(self, key): | |||
""" | |||
Get tracer dict for send/recv op. | |||
r"""Get tracer dict for send/recv op. | |||
:param key: key to match send/recv op. | |||
Args: | |||
key: key to match send/recv op. | |||
""" | |||
return self.proxy.check_remote_tracer(key) | |||
def group_barrier(self, key, size): | |||
""" | |||
A barrier wait for all group member. | |||
r"""A barrier wait for all group member. | |||
:param key: group key to match each other. | |||
:param size: group size. | |||
Args: | |||
key: group key to match each other. | |||
size: group size. | |||
""" | |||
self.proxy.group_barrier(key, size) | |||
def user_set(self, key, val): | |||
"""Set user defined key-value pairs across processes.""" | |||
r"""Set user defined key-value pairs across processes.""" | |||
return self.proxy.user_set(key, val) | |||
def user_get(self, key): | |||
"""Get user defined key-value pairs across processes.""" | |||
r"""Get user defined key-value pairs across processes.""" | |||
return self.proxy.user_get(key) | |||
def user_pop(self, key): | |||
"""Get user defined key-value pairs and delete the resources when the get is done""" | |||
r"""Get user defined key-value pairs and delete the resources when the get is done""" | |||
return self.proxy.user_pop(key) | |||
def bcast_val(self, val, key, size): | |||
@@ -30,24 +30,20 @@ def _str2bytes(text: str) -> int: | |||
@property | |||
def eviction_threshold(mod): | |||
r""" | |||
Get or set the eviction threshold in bytes. It can also be set to a string, | |||
r"""Get or set the eviction threshold in bytes. It can also be set to a string, | |||
whose formatting supports byte(B), kilobyte(KB), megabyte(MB) and | |||
gigabyte(GB) units. | |||
.. note:: | |||
Note: | |||
When GPU memory usage exceeds this value, DTR will heuristically select | |||
and evict resident tensors until the amount of used memory falls below | |||
this threshold. | |||
Examples: | |||
.. code-block:: | |||
.. code-block:: | |||
import megengine as mge | |||
mge.dtr.eviction_threshold = "2GB" | |||
import megengine as mge | |||
mge.dtr.eviction_threshold = "2GB" | |||
""" | |||
return _eviction_threshold | |||
@@ -66,24 +62,21 @@ def eviction_threshold(mod, value: Union[int, str]): | |||
@property | |||
def evictee_minimum_size(mod): | |||
r""" | |||
Get or set the memory threshold of tensors in bytes. It can also be set to a | |||
r"""Get or set the memory threshold of tensors in bytes. It can also be set to a | |||
string, whose formatting supports byte(B), kilobyte(KB), megabyte(MB) and | |||
gigabyte(GB) units. | |||
.. note:: | |||
Note: | |||
Only tensors whose size exceeds this threshold will be added to the | |||
candidate set. A tensor that is not added to the candidate set will | |||
never be evicted during its lifetime. | |||
Examples: | |||
.. code-block:: | |||
.. code-block:: | |||
import megengine as mge | |||
mge.dtr.evictee_minimum_size = "2MB" | |||
import megengine as mge | |||
mge.dtr.evictee_minimum_size = "2MB" | |||
""" | |||
return _evictee_minimum_size | |||
@@ -102,19 +95,16 @@ def evictee_minimum_size(mod, value: Union[int, str]): | |||
@property | |||
def enable_sqrt_sampling(mod): | |||
r""" | |||
Get or set whether sqrt sampling is allowed. Sqrt sampling means that given | |||
r"""Get or set whether sqrt sampling is allowed. Sqrt sampling means that given | |||
the size of the candidate set is N, only enumerate sqrt(N) tensors. When | |||
the number of tensors is very high, enabling this optimization will speed | |||
up the training. | |||
Examples: | |||
.. code-block:: | |||
Examples: | |||
.. code-block:: | |||
import megengine as mge | |||
mge.dtr.enable_sqrt_sampling = True | |||
import megengine as mge | |||
mge.dtr.enable_sqrt_sampling = True | |||
""" | |||
return _enable_sqrt_sampling | |||
@@ -127,9 +117,7 @@ def enable_sqrt_sampling(mod, value: bool): | |||
def enable(): | |||
r""" | |||
Enable to record computing path of tensors and to perform DTR policy. | |||
""" | |||
r"""Enable to record computing path of tensors and to perform DTR policy.""" | |||
_set_defrag(True) | |||
_set_option("enable_dtr_auto_drop", 1) | |||
_set_option("enable_drop", 1) | |||
@@ -138,9 +126,7 @@ def enable(): | |||
def disable(): | |||
r""" | |||
Stop recording computing path of tensors and performing DTR policy. | |||
""" | |||
r"""Stop recording computing path of tensors and performing DTR policy.""" | |||
_set_defrag(False) | |||
_set_option("enable_dtr_auto_drop", 0) | |||
_set_option("enable_drop", 0) | |||
@@ -23,8 +23,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None: | |||
def get_execution_strategy() -> Strategy: | |||
""" | |||
Returns the execution strategy of :class:`~module..Conv2d` and :func:`~.matmul` | |||
r"""Returns the execution strategy of :class:`~module..Conv2d` and :func:`~.matmul` | |||
See :func:`~.set_execution_strategy` for possible return values | |||
""" | |||
@@ -32,31 +31,32 @@ def get_execution_strategy() -> Strategy: | |||
def set_execution_strategy(option): | |||
""" | |||
Sets the execution strategy of :class:`~module.Conv2d` and :func:`~.matmul` | |||
r"""Sets the execution strategy of :class:`~module.Conv2d` and :func:`~.matmul` | |||
Args: | |||
option: Decides how :class:`~.module.Conv2d`and :func:`~.matmul` algorithms are chosen. | |||
Available value Strategy | |||
:param option: Decides how :class:`~module.Conv2d`and :func:`~.matmul` algorithms are chosen. | |||
Available value Strategy | |||
* HEURISTIC uses heuristic to choose the fastest algorithm. | |||
* PROFILE runs possible algorithms on real device to find the best one. | |||
* REPRODUCIBLE uses the algorithms that is reproducible. | |||
* OPTIMIZED uses the algorithms that is optimized. | |||
* HEURISTIC uses heuristic to choose the fastest algorithm. | |||
* PROFILE runs possible algorithms on real device to find the best one. | |||
* REPRODUCIBLE uses the algorithms that is reproducible. | |||
* OPTIMIZED uses the algorithms that is optimized. | |||
The default strategy is HEURISTIC, this options can be combined to | |||
form a combination option, e.g. PROFILE | REPRODUCIBLE | |||
can combined a option that uses the fastest of profiling result that is also reproducible. | |||
The default strategy is HEURISTIC, this options can be combined to | |||
form a combination option, e.g. PROFILE | REPRODUCIBLE | |||
can combined a option that uses the fastest of profiling result that is also reproducible. | |||
Available values string: | |||
Available values string: | |||
* 'HEURISTIC' uses heuristic to choose the fastest algorithm. | |||
* 'PROFILE' runs possible algorithms on real device to find the best one. | |||
* 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm. | |||
* 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible. | |||
* 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. | |||
* 'HEURISTIC' uses heuristic to choose the fastest algorithm. | |||
* 'PROFILE' runs possible algorithms on real device to find the best one. | |||
* 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm. | |||
* 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible. | |||
* 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. | |||
The default strategy is 'HEURISTIC'. | |||
The default strategy is 'HEURISTIC'. | |||
It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'. | |||
It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'. | |||
""" | |||
valid_string_option = { | |||
"REPRODUCIBLE": Strategy.REPRODUCIBLE, | |||
@@ -78,182 +78,163 @@ def _elemwise_multi_type(*args, mode, **kwargs): | |||
def add(x, y): | |||
""" | |||
Element-wise `addition`. | |||
At least one operand should be tensor. | |||
Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | |||
:param x: input tensor. | |||
:return: computed tensor. | |||
r"""Element-wise `addition`. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.add(x, y) | |||
print(out.numpy()) | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.add(x, y) | |||
print(out.numpy()) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
[[ 0. 2. 4.] | |||
[ 6. 8. 10.]] | |||
.. testoutput:: | |||
[[ 0. 2. 4.] | |||
[ 6. 8. 10.]] | |||
""" | |||
return _elwise(x, y, mode=Elemwise.Mode.ADD) | |||
def sub(x, y): | |||
"""Element-wise `subtraction`.""" | |||
r"""Element-wise `subtraction`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.SUB) | |||
def mul(x, y): | |||
"""Element-wise `multiplication`.""" | |||
r"""Element-wise `multiplication`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.MUL) | |||
def div(x, y): | |||
"""Element-wise `(x / y)`.""" | |||
r"""Element-wise `(x / y)`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.TRUE_DIV) | |||
def floor_div(x, y): | |||
"""Element-wise `floor(x / y)`.""" | |||
r"""Element-wise `floor(x / y)`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.FLOOR_DIV) | |||
def neg(x): | |||
"""Element-wise `negation`.""" | |||
r"""Element-wise `negation`.""" | |||
return _elwise(x, mode=Elemwise.Mode.NEGATE) | |||
def pow(x, y): | |||
"""Element-wise `power`.""" | |||
r"""Element-wise `power`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.POW) | |||
def mod(x, y): | |||
"""Element-wise `remainder of division`.""" | |||
r"""Element-wise `remainder of division`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.MOD) | |||
def abs(x): | |||
"""Element-wise `absolute value`.""" | |||
r"""Element-wise `absolute value`.""" | |||
return _elwise(x, mode=Elemwise.Mode.ABS) | |||
def exp(x): | |||
"""Element-wise `exponential`.""" | |||
r"""Element-wise `exponential`.""" | |||
return _elwise(x, mode=Elemwise.Mode.EXP) | |||
def expm1(x): | |||
"""Element-wise `exp(x)-1`.""" | |||
r"""Element-wise `exp(x)-1`.""" | |||
return _elwise(x, mode=Elemwise.Mode.EXPM1) | |||
def log(x): | |||
"""Element-wise `logarithm (base e)`.""" | |||
r"""Element-wise `logarithm (base e)`.""" | |||
return _elwise(x, mode=Elemwise.Mode.LOG) | |||
def log1p(x): | |||
"""Element-wise `log(x+1) (base e)`.""" | |||
r"""Element-wise `log(x+1) (base e)`.""" | |||
return _elwise(x, mode=Elemwise.Mode.LOG1P) | |||
def sqrt(x: Tensor) -> Tensor: | |||
""" | |||
Element-wise `sqrt`. | |||
Returns ``NaN`` for negative input value. | |||
:param x: input tensor. | |||
:return: computed tensor. | |||
r"""Element-wise `sqrt`. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.sqrt(x) | |||
print(out.numpy().round(decimals=4)) | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.sqrt(x) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
[[0. 1. 1.4142] | |||
[1.7321 2. 2.2361]] | |||
.. testoutput:: | |||
[[0. 1. 1.4142] | |||
[1.7321 2. 2.2361]] | |||
""" | |||
return x ** 0.5 | |||
def square(x: Tensor) -> Tensor: | |||
""" | |||
Returns a new tensor with the square of the elements of input tensor. | |||
:param inp: input tensor. | |||
:return: computed tensor. | |||
r"""Element-wise `square`. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.square(data) | |||
print(out.numpy().round(decimals=4)) | |||
data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.square(data) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
[[ 0. 1. 4.] | |||
[ 9. 16. 25.]] | |||
.. testoutput:: | |||
[[ 0. 1. 4.] | |||
[ 9. 16. 25.]] | |||
""" | |||
return x ** 2 | |||
def round(x): | |||
"""Element-wise `rounding to int`.""" | |||
r"""Element-wise `rounding to int`.""" | |||
return _elwise(x, mode=Elemwise.Mode.ROUND) | |||
def ceil(x): | |||
"""Element-wise `ceiling`.""" | |||
r"""Element-wise `ceiling`.""" | |||
return _elwise(x, mode=Elemwise.Mode.CEIL) | |||
def floor(x): | |||
"""Element-wise `floor`.""" | |||
r"""Element-wise `floor`.""" | |||
return _elwise(x, mode=Elemwise.Mode.FLOOR) | |||
def maximum(x, y): | |||
"""Element-wise `maximum of array elements`.""" | |||
r"""Element-wise `maximum of array elements`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.MAX) | |||
def minimum(x, y): | |||
"""Element-wise `minimum of array elements`.""" | |||
r"""Element-wise `minimum of array elements`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.MIN) | |||
@@ -261,62 +242,57 @@ def minimum(x, y): | |||
def cos(x): | |||
""" | |||
Element-wise `cosine`. | |||
:param x: input tensor. | |||
:return: computed tensor. | |||
r"""Element-wise `cosine`. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.cos(x) | |||
print(out.numpy().round(decimals=4)) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.cos(x) | |||
print(out.numpy().round(decimals=4)) | |||
.. testoutput:: | |||
Outputs: | |||
[[ 1. 0.5403 -0.4161] | |||
[-0.99 -0.6536 0.2837]] | |||
.. testoutput:: | |||
[[ 1. 0.5403 -0.4161] | |||
[-0.99 -0.6536 0.2837]] | |||
""" | |||
return _elwise(x, mode=Elemwise.Mode.COS) | |||
def sin(x): | |||
"""Element-wise `sine`.""" | |||
r"""Element-wise `sine`.""" | |||
return _elwise(x, mode=Elemwise.Mode.SIN) | |||
def tan(x): | |||
"""Element-wise `tangent`.""" | |||
r"""Element-wise `tangent`.""" | |||
return sin(x) / cos(x) | |||
def acos(x): | |||
"""Element-wise `inverse cosine`.""" | |||
r"""Element-wise `inverse cosine`.""" | |||
return _elwise(x, mode=Elemwise.Mode.ACOS) | |||
def asin(x): | |||
"""Element-wise `inverse sine`.""" | |||
r"""Element-wise `inverse sine`.""" | |||
return _elwise(x, mode=Elemwise.Mode.ASIN) | |||
def atan(x): | |||
"""Element-wise `inverse tangent`.""" | |||
r"""Element-wise `inverse tangent`.""" | |||
return _elwise(x, 1, mode=Elemwise.Mode.ATAN2) | |||
def atan2(y, x): | |||
"""Element-wise `2-argument arctangent`.""" | |||
r"""Element-wise `2-argument arctangent`.""" | |||
return _elwise(y, x, mode=Elemwise.Mode.ATAN2) | |||
@@ -355,38 +331,33 @@ def atanh(x): | |||
def left_shift(x, y): | |||
""" | |||
Element-wise `bitwise binary: x << y`. | |||
r"""Element-wise `bitwise binary: x << y`. | |||
:param x: input tensor, should be int. | |||
:param y: how many bits to be left-shifted. | |||
:return: computed tensor. | |||
Examples: | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
x = tensor(np.arange(0, 6, dtype=np.int32).reshape(2, 3)) | |||
out = F.left_shift(x, 2) | |||
print(out.numpy()) | |||
x = tensor(np.arange(0, 6, dtype=np.int32).reshape(2, 3)) | |||
out = F.left_shift(x, 2) | |||
print(out.numpy()) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
.. testoutput:: | |||
[[ 0 4 8] | |||
[12 16 20]] | |||
[[ 0 4 8] | |||
[12 16 20]] | |||
""" | |||
return _elwise(x, y, mode=Elemwise.Mode.SHL) | |||
def right_shift(x, y): | |||
"""Element-wise `bitwise binary: x >> y`.""" | |||
r"""Element-wise `bitwise binary: x >> y`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.SHR) | |||
@@ -394,22 +365,22 @@ def right_shift(x, y): | |||
def logical_and(x, y): | |||
"""Element-wise `logical and: x && y`.""" | |||
r"""Element-wise `logical and: x && y`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.AND) | |||
def logical_not(x): | |||
"""Element-wise `logical not: ~x`.""" | |||
r"""Element-wise `logical not: ~x`.""" | |||
return _elwise(x, mode=Elemwise.Mode.NOT) | |||
def logical_or(x, y): | |||
"""Element-wise `logical or: x || y`.""" | |||
r"""Element-wise `logical or: x || y`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.OR) | |||
def logical_xor(x, y): | |||
"""Element-wise `logical xor: x ^ y`.""" | |||
r"""Element-wise `logical xor: x ^ y`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.XOR) | |||
@@ -417,59 +388,53 @@ def logical_xor(x, y): | |||
def equal(x, y): | |||
""" | |||
Element-wise `(x == y)`. | |||
:param x: input tensor 1. | |||
:param y: input tensor 2. | |||
:return: computed tensor. | |||
r"""Element-wise `(x == y)`. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.equal(x, y) | |||
print(out.numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.equal(x, y) | |||
print(out.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[1. 1. 1.] | |||
[1. 1. 1.]] | |||
.. testoutput:: | |||
[[1. 1. 1.] | |||
[1. 1. 1.]] | |||
""" | |||
return _elwise(x, y, mode=Elemwise.Mode.EQ) | |||
def not_equal(x, y): | |||
"""Element-wise `(x != y)`.""" | |||
r"""Element-wise `(x != y)`.""" | |||
return x != y | |||
def less(x, y): | |||
"""Element-wise `(x < y)`.""" | |||
r"""Element-wise `(x < y)`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.LT) | |||
def less_equal(x, y): | |||
"""Element-wise `(x <= y)`.""" | |||
r"""Element-wise `(x <= y)`.""" | |||
return _elwise(x, y, mode=Elemwise.Mode.LEQ) | |||
def greater(x, y): | |||
"""Element-wise `(x > y)`.""" | |||
r"""Element-wise `(x > y)`.""" | |||
return _elwise(y, x, mode=Elemwise.Mode.LT) | |||
def greater_equal(x, y): | |||
"""Element-wise `(x >= y)`.""" | |||
r"""Element-wise `(x >= y)`.""" | |||
return _elwise(y, x, mode=Elemwise.Mode.LEQ) | |||
@@ -477,43 +442,45 @@ def greater_equal(x, y): | |||
def clip(x: Tensor, lower=None, upper=None) -> Tensor: | |||
r""" | |||
Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||
r"""Clamps all elements in input tensor into the range ``[ lower, upper ]`` and returns | |||
a resulting tensor: | |||
.. math:: | |||
y_i = \begin{cases} | |||
\text{lower} & \text{if } x_i < \text{lower} \\ | |||
x_i & \text{if } \text{lower} \leq x_i \leq \text{upper} \\ | |||
\text{upper} & \text{if } x_i > \text{upper} | |||
\end{cases} | |||
:param x: input tensor. | |||
:param lower: lower-bound of the range to be clamped to. | |||
:param upper: upper-bound of the range to be clamped to. | |||
:return: output clamped tensor. | |||
Args: | |||
x: input tensor. | |||
lower: lower-bound of the range to be clamped to. | |||
upper: upper-bound of the range to be clamped to. | |||
Examples: | |||
Returns: | |||
output clamped tensor. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
a = tensor(np.arange(5).astype(np.int32)) | |||
print(F.clip(a, 2, 4).numpy()) | |||
print(F.clip(a, lower=3).numpy()) | |||
print(F.clip(a, upper=3).numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
a = tensor(np.arange(5).astype(np.int32)) | |||
print(F.clip(a, 2, 4).numpy()) | |||
print(F.clip(a, lower=3).numpy()) | |||
print(F.clip(a, upper=3).numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[2 2 2 3 4] | |||
[3 3 3 3 4] | |||
[0 1 2 3 3] | |||
.. testoutput:: | |||
[2 2 2 3 4] | |||
[3 3 3 3 4] | |||
[0 1 2 3 3] | |||
""" | |||
assert ( | |||
lower is not None or upper is not None | |||
@@ -23,14 +23,14 @@ def tensorrt_runtime_opr(inputs, *, data: bytes = None): | |||
def cambricon_runtime_opr(inputs, data, symbol, tensor_dim_mutable): | |||
r""" | |||
Load a serialized Cambricon model as a runtime operator in MegEngine. | |||
:param inputs: list of input tensors. | |||
:param data: the serialized Cambricon model. | |||
:param symbol: name of the function in Cambricon model. | |||
:param tensor_dim_mutable: whether the input tensors' shapes are mutable | |||
in ``cnrtModel_t``. | |||
r"""Load a serialized Cambricon model as a runtime operator in MegEngine. | |||
Args: | |||
inputs: list of input tensors. | |||
data: the serialized Cambricon model. | |||
symbol: name of the function in Cambricon model. | |||
tensor_dim_mutable: whether the input tensors' shapes are mutable | |||
in ``cnrtModel_t``. | |||
""" | |||
op = builtin.CambriconRuntime(data, len(data), symbol, tensor_dim_mutable) | |||
@@ -38,11 +38,11 @@ def cambricon_runtime_opr(inputs, data, symbol, tensor_dim_mutable): | |||
def atlas_runtime_opr(inputs, data): | |||
r""" | |||
Load a serialized Atlas model as a runtime operator in MegEngine. | |||
r"""Load a serialized Atlas model as a runtime operator in MegEngine. | |||
:param inputs: list of input tensors. | |||
:param data: the serialized Atlas model. | |||
Args: | |||
inputs: list of input tensors. | |||
data: the serialized Atlas model. | |||
""" | |||
op = builtin.AtlasRuntime(data, len(data)) | |||
@@ -26,9 +26,7 @@ __all__ = [ | |||
def _reduce_output(loss_fn): | |||
r""" | |||
Wrapper to apply canonical reductions to loss outputs. | |||
""" | |||
r"""Wrapper to apply canonical reductions to loss outputs.""" | |||
@functools.wraps(loss_fn) | |||
def reduced_loss_fn(*args, reduction="mean", **kwargs): | |||
@@ -45,13 +43,14 @@ def _reduce_output(loss_fn): | |||
@_reduce_output | |||
def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
r""" | |||
Calculates the mean absolute error (MAE) between | |||
r"""Calculates the mean absolute error (MAE) between | |||
each element in the pred :math:`x` and label :math:`y`. | |||
The mean absolute error can be described as: | |||
.. math:: \ell(x,y) = mean\left(L \right) | |||
.. math:: | |||
\ell(x,y) = mean\left(L \right) | |||
where | |||
@@ -63,30 +62,32 @@ def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
:math:`x` and :math:`y` are tensors of arbitrary shapes with a total | |||
of :math:`N` elements each. :math:`N` is the batch size. | |||
:param pred: predicted result from model. | |||
:param label: ground truth to compare. | |||
:param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
:return: loss value. | |||
Args: | |||
pred: predicted result from model. | |||
label: ground truth to compare. | |||
reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
Examples: | |||
Returns: | |||
loss value. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
.. testcode:: | |||
ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
loss = F.nn.l1_loss(ipt, tgt) | |||
print(loss.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
Outputs: | |||
ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
loss = F.nn.l1_loss(ipt, tgt) | |||
print(loss.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
2.75 | |||
.. testoutput:: | |||
2.75 | |||
""" | |||
diff = pred - label | |||
return abs(diff) | |||
@@ -94,53 +95,56 @@ def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
@_reduce_output | |||
def square_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: | |||
r""" | |||
Calculates the mean squared error (squared L2 norm) between | |||
r"""Calculates the mean squared error (squared L2 norm) between | |||
each element in the pred :math:`x` and label :math:`y`. | |||
The mean squared error can be described as: | |||
.. math:: \ell(x, y) = mean\left( L \right) | |||
.. math:: | |||
\ell(x, y) = mean\left( L \right) | |||
where | |||
.. math:: | |||
L = \{l_1,\dots,l_N\}, \quad | |||
l_n = \left( x_n - y_n \right)^2, | |||
L = \{l_1,\dots,l_N\}, \quad | |||
l_n = \left( x_n - y_n \right)^2, | |||
:math:`x` and :math:`y` are tensors of arbitrary shapes with a total | |||
of :math:`N` elements each. :math:`N` is the batch size. | |||
:param pred: predicted result from model. | |||
:param label: ground truth to compare. | |||
:param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
:return: loss value. | |||
Args: | |||
pred: predicted result from model. | |||
label: ground truth to compare. | |||
reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
Returns: | |||
loss value. | |||
Shape: | |||
- pred: :math:`(N, *)` where :math:`*` means any number of additional | |||
dimensions. | |||
- label: :math:`(N, *)`. Same shape as ``pred``. | |||
* pred: :math:`(N, *)` where :math:`*` means any number of additional | |||
dimensions. | |||
* label: :math:`(N, *)`. Same shape as ``pred``. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
.. testcode:: | |||
ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
loss = F.nn.square_loss(ipt, tgt) | |||
print(loss.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
Outputs: | |||
ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | |||
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | |||
loss = F.nn.square_loss(ipt, tgt) | |||
print(loss.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
9.75 | |||
.. testoutput:: | |||
9.75 | |||
""" | |||
diff = pred - label | |||
return diff ** 2 | |||
@@ -155,8 +159,7 @@ def cross_entropy( | |||
label_smooth: float = 0, | |||
reduction: str = "mean", | |||
) -> Tensor: | |||
r""" | |||
Computes the multi-class cross entropy loss (using logits by default). | |||
r"""Computes the multi-class cross entropy loss (using logits by default). | |||
By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
class probabilities are given by softmax. | |||
@@ -170,35 +173,37 @@ def cross_entropy( | |||
where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively. | |||
k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes. | |||
:param pred: input tensor representing the predicted probability. | |||
:param label: input tensor representing the classification label. | |||
:param axis: an axis along which softmax will be applied. Default: 1 | |||
:param with_logits: whether to apply softmax first. Default: True | |||
:param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 | |||
:param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
:return: loss value. | |||
Args: | |||
pred: input tensor representing the predicted probability. | |||
label: input tensor representing the classification label. | |||
axis: an axis along which softmax will be applied. Default: 1 | |||
with_logits: whether to apply softmax first. Default: True | |||
label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 | |||
reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
Examples: | |||
Returns: | |||
loss value. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
data_shape = (1, 2) | |||
label_shape = (1, ) | |||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||
label = tensor(np.ones(label_shape, dtype=np.int32)) | |||
loss = F.nn.cross_entropy(pred, label) | |||
print(loss.numpy().round(decimals=4)) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
data_shape = (1, 2) | |||
label_shape = (1, ) | |||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||
label = tensor(np.ones(label_shape, dtype=np.int32)) | |||
loss = F.nn.cross_entropy(pred, label) | |||
print(loss.numpy().round(decimals=4)) | |||
.. testoutput:: | |||
Outputs: | |||
0.6931 | |||
.. testoutput:: | |||
0.6931 | |||
""" | |||
n0 = pred.ndim | |||
n1 = label.ndim | |||
@@ -226,37 +231,38 @@ def cross_entropy( | |||
def binary_cross_entropy( | |||
pred: Tensor, label: Tensor, with_logits: bool = True, reduction: str = "mean", | |||
) -> Tensor: | |||
r""" | |||
Computes the binary cross entropy loss (using logits by default). | |||
r"""Computes the binary cross entropy loss (using logits by default). | |||
By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
class probabilities are given by sigmoid. | |||
:param pred: `(N, *)`, where `*` means any number of additional dimensions. | |||
:param label: `(N, *)`, same shape as the input. | |||
:param with_logits: bool, whether to apply sigmoid first. Default: True | |||
:param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
:return: loss value. | |||
Args: | |||
pred: `(N, *)`, where `*` means any number of additional dimensions. | |||
label: `(N, *)`, same shape as the input. | |||
with_logits: bool, whether to apply sigmoid first. Default: True | |||
reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
Examples: | |||
Returns: | |||
loss value. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||
label = tensor(np.ones((1, 2), dtype=np.float32)) | |||
loss = F.nn.binary_cross_entropy(pred, label) | |||
print(loss.numpy().round(decimals=4)) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||
label = tensor(np.ones((1, 2), dtype=np.float32)) | |||
loss = F.nn.binary_cross_entropy(pred, label) | |||
print(loss.numpy().round(decimals=4)) | |||
.. testoutput:: | |||
Outputs: | |||
0.6931 | |||
.. testoutput:: | |||
0.6931 | |||
""" | |||
if not with_logits: | |||
return -(label * log(pred) + (1 - label) * log(1 - pred)) | |||
@@ -269,37 +275,38 @@ def binary_cross_entropy( | |||
def hinge_loss( | |||
pred: Tensor, label: Tensor, norm: str = "L1", reduction: str = "mean" | |||
) -> Tensor: | |||
r""" | |||
Caculates the hinge loss which is often used in SVM. | |||
r"""Caculates the hinge loss which is often used in SVM. | |||
The hinge loss can be described as: | |||
.. math:: loss(x, y) = \frac{1}{N}\sum_i\sum_j(max(0, 1 - x_{ij}*y_{ij})) | |||
:param pred: input tensor representing the predicted probability, shape is `(N, C)`. | |||
:param label: input tensor representing the binary classification label, shape is `(N, C)`. | |||
:param norm: specify the norm to caculate the loss, should be "L1" or "L2". | |||
:param reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
:return: loss value. | |||
Args: | |||
pred: input tensor representing the predicted probability, shape is `(N, C)`. | |||
label: input tensor representing the binary classification label, shape is `(N, C)`. | |||
norm: specify the norm to caculate the loss, should be "L1" or "L2". | |||
reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' | |||
Examples: | |||
Returns: | |||
loss value. | |||
.. testcode:: | |||
Examples: | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") | |||
label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") | |||
loss = F.nn.hinge_loss(pred, label) | |||
print(loss.numpy()) | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") | |||
label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") | |||
loss = F.nn.hinge_loss(pred, label) | |||
print(loss.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
1.5 | |||
.. testoutput:: | |||
1.5 | |||
""" | |||
norm = norm.upper() | |||
assert norm in ["L1", "L2"], "norm must be L1 or L2" | |||
@@ -19,33 +19,16 @@ from .tensor import broadcast_to, transpose | |||
def topk_accuracy( | |||
logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1 | |||
) -> Union[Tensor, Iterable[Tensor]]: | |||
r""" | |||
Calculates the classification accuracy given predicted logits and ground-truth labels. | |||
r"""Calculates the classification accuracy given predicted logits and ground-truth labels. | |||
:param logits: model predictions of shape `[batch_size, num_classes]`, | |||
representing the probability (likelyhood) of each class. | |||
:param target: ground-truth labels, 1d tensor of int32. | |||
:param topk: specifies the topk values, could be an int or tuple of ints. Default: 1 | |||
:return: tensor(s) of classification accuracy between 0.0 and 1.0. | |||
Args: | |||
logits: model predictions of shape `[batch_size, num_classes]`, | |||
representing the probability (likelyhood) of each class. | |||
target: ground-truth labels, 1d tensor of int32. | |||
topk: specifies the topk values, could be an int or tuple of ints. Default: 1 | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10)) | |||
target = tensor(np.arange(8, dtype=np.int32)) | |||
top1, top5 = F.metric.topk_accuracy(logits, target, (1, 5)) | |||
print(top1.numpy(), top5.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
0.0 0.375 | |||
Returns: | |||
tensor(s) of classification accuracy between 0.0 and 1.0. | |||
""" | |||
if isinstance(topk, int): | |||
topk = (topk,) | |||
@@ -28,32 +28,28 @@ def conv_bias_activation( | |||
conv_mode="cross_correlation", | |||
compute_mode="default", | |||
) -> Tensor: | |||
""" | |||
Convolution bias with activation operation, only for inference. | |||
:param inp: feature map of the convolution operation. | |||
:param weight: convolution kernel. | |||
:param bias: bias added to the result of convolution | |||
:param stride: stride of the 2D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides | |||
of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 2D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and the shape of weight should be `(groups, out_channel // groups, | |||
in_channels // groups, height, width)`. | |||
:type conv_mode: string or :class:`Convolution.Mode`. | |||
:param conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
'cross_correlation' | |||
:param dtype: support for ``np.dtype``, Default: np.int8 | |||
:type compute_mode: string or | |||
:class:`Convolution.ComputeMode`. | |||
:param compute_mode: when set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, | |||
but only effective when input and output are of float16 dtype. | |||
r"""Convolution bias with activation operation, only for inference. | |||
Args: | |||
inp: feature map of the convolution operation. | |||
weight: convolution kernel. | |||
bias: bias added to the result of convolution | |||
stride: stride of the 2D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides | |||
of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 2D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and the shape of weight should be `(groups, out_channel // groups, | |||
in_channels // groups, height, width)`. | |||
conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
'cross_correlation' | |||
dtype: support for ``np.dtype``, Default: np.int8 | |||
compute_mode: when set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, | |||
but only effective when input and output are of float16 dtype. | |||
""" | |||
ph, pw = _pair(padding) | |||
sh, sw = _pair_nonzero(stride) | |||
@@ -91,32 +87,28 @@ def batch_conv_bias_activation( | |||
conv_mode="cross_correlation", | |||
compute_mode="default", | |||
) -> Tensor: | |||
""" | |||
Batch convolution bias with activation operation, only for inference. | |||
:param inp: feature map of the convolution operation. | |||
:param weight: convolution kernel in batched way. | |||
:param bias: bias added to the result of convolution | |||
:param stride: stride of the 2D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides | |||
of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 2D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and the shape of weight should be `(groups, out_channel // groups, | |||
in_channels // groups, height, width)`. | |||
:type conv_mode: string or :class:`Convolution.Mode`. | |||
:param conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
'cross_correlation' | |||
:param dtype: support for ``np.dtype``, Default: np.int8 | |||
:type compute_mode: string or | |||
:class:`Convolution.ComputeMode`. | |||
:param compute_mode: when set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, | |||
but only effective when input and output are of float16 dtype. | |||
r"""Batch convolution bias with activation operation, only for inference. | |||
Args: | |||
inp: feature map of the convolution operation. | |||
weight: convolution kernel in batched way. | |||
bias: bias added to the result of convolution | |||
stride: stride of the 2D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides | |||
of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 2D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and the shape of weight should be `(groups, out_channel // groups, | |||
in_channels // groups, height, width)`. | |||
conv_mode: supports 'cross_correlation' or 'convolution'. Default: | |||
'cross_correlation' | |||
dtype: support for ``np.dtype``, Default: np.int8 | |||
compute_mode: when set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, | |||
but only effective when input and output are of float16 dtype. | |||
""" | |||
ph, pw = _pair(padding) | |||
sh, sw = _pair_nonzero(stride) | |||
@@ -19,37 +19,36 @@ __all__ = ["topk_accuracy"] | |||
def _assert_equal( | |||
expect: Tensor, actual: Tensor, *, maxerr: float = 0.0001, verbose: bool = False | |||
): | |||
r""" | |||
Asserts two tensors equal and returns expected value (first input). | |||
r"""Asserts two tensors equal and returns expected value (first input). | |||
It is a variant of python assert which is symbolically traceable (similar to ``numpy.testing.assert_equal``). | |||
If we want to verify the correctness of model, just ``assert`` its states and outputs. | |||
While sometimes we need to verify the correctness at different backends for *dumped* model | |||
(or in :class:`~jit.trace` context), and no python code could be executed in that case. | |||
Thus we have to use :func:`~functional.utils._assert_equal` instead. | |||
:param expect: expected tensor value | |||
:param actual: tensor to check value | |||
:param maxerr: max allowed error; error is defined as the minimal of absolute and relative error | |||
:param verbose: whether to print maxerr to stdout during opr exec | |||
:return: expected tensor | |||
Args: | |||
expect: expected tensor value | |||
actual: tensor to check value | |||
maxerr: max allowed error; error is defined as the minimal of absolute and relative error | |||
verbose: whether to print maxerr to stdout during opr exec | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
x = tensor([1, 2, 3], np.float32) | |||
y = tensor([1, 2, 3], np.float32) | |||
print(F.utils._assert_equal(x, y, maxerr=0).numpy()) | |||
x = tensor([1, 2, 3], np.float32) | |||
y = tensor([1, 2, 3], np.float32) | |||
print(F.utils._assert_equal(x, y, maxerr=0).numpy()) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
.. testoutput:: | |||
[1. 2. 3.] | |||
[1. 2. 3.] | |||
""" | |||
err = ( | |||
abs(expect - actual) | |||
@@ -21,31 +21,32 @@ from .tensor import broadcast_to, concat, expand_dims, reshape, transpose | |||
def cvt_color(inp: Tensor, mode: str = ""): | |||
r""" | |||
Convert images from one format to another | |||
r"""Convert images from one format to another | |||
:param inp: input images. | |||
:param mode: format mode. | |||
:return: convert result. | |||
Args: | |||
inp: input images. | |||
mode: format mode. | |||
Examples: | |||
Returns: | |||
convert result. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
.. testcode:: | |||
x = mge.tensor(np.array([[[[-0.58675045, 1.7526233, 0.10702174]]]]).astype(np.float32)) | |||
y = F.vision.cvt_color(x, mode="RGB2GRAY") | |||
print(y.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.functional as F | |||
Outputs: | |||
x = mge.tensor(np.array([[[[-0.58675045, 1.7526233, 0.10702174]]]]).astype(np.float32)) | |||
y = F.vision.cvt_color(x, mode="RGB2GRAY") | |||
print(y.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[[0.86555195]]]] | |||
.. testoutput:: | |||
[[[[0.86555195]]]] | |||
""" | |||
mode = mode.upper() | |||
assert mode in builtin.CvtColor.Mode.__dict__, "unspport mode for cvt_color" | |||
@@ -63,37 +64,38 @@ def roi_pooling( | |||
mode: str = "max", | |||
scale: float = 1.0, | |||
) -> Tensor: | |||
""" | |||
Applies roi pooling on input feature. | |||
r"""Applies roi pooling on input feature. | |||
:param inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||
:param rois: `(K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||
:param output_shape: `(height, width)` of output rois feature. | |||
:param mode: "max" or "average", use max/average align just like max/average pooling. Default: "max" | |||
:param scale: scale the input boxes by this number. Default: 1.0 | |||
:return: `(K, C, output_shape[0], output_shape[1])` feature of rois. | |||
Args: | |||
inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||
rois: K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||
output_shape: height, width)` of output rois feature. | |||
mode: max" or "average", use max/average align just like max/average pooling. Default: "max" | |||
scale: scale the input boxes by this number. Default: 1.0 | |||
Examples: | |||
Returns: | |||
``K, C, output_shape[0], output_shape[1])`` feature of rois. | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
np.random.seed(42) | |||
inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
rois = tensor(np.random.random((4, 5))) | |||
y = F.vision.roi_pooling(inp, rois, (2, 2)) | |||
print(y.numpy()[0].round(decimals=4)) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
np.random.seed(42) | |||
inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
rois = tensor(np.random.random((4, 5))) | |||
y = F.vision.roi_pooling(inp, rois, (2, 2)) | |||
print(y.numpy()[0].round(decimals=4)) | |||
.. testoutput:: | |||
Outputs: | |||
[[[-0.1383 -0.1383] | |||
[-0.5035 -0.5035]]] | |||
.. testoutput:: | |||
[[[-0.1383 -0.1383] | |||
[-0.5035 -0.5035]]] | |||
""" | |||
assert mode.lower() in ["max", "average"], "only max/average mode is supported" | |||
if isinstance(output_shape, int): | |||
@@ -116,17 +118,17 @@ def correlation( | |||
pad_size: int = 0, | |||
is_multiply: bool = True, | |||
) -> Tensor: | |||
""" Applies correlation to inputs. | |||
:param data1: Input data1 to the correlation. format must be nchw | |||
:param data2: Input data2 to the correlation. format must be nchw | |||
:param kernel_size: (int (non-negative), optional, default=1) – kernel size for Correlation must be an odd number | |||
:param max_displacement: (int (non-negative), optional, default=1) – Max displacement of Correlation | |||
:param stride1: (int (non-negative), optional, default=1) – stride1 quantize data1 globally | |||
:param stride2: (int (non-negative), optional, default=1) – stride2 quantize data2 within the neighborhood centered around data1 | |||
:param pad_size: (int (non-negative), optional, default=0) – pad for Correlation | |||
:param is_multiply: (boolean, optional, default=True) – operation type is either multiplication or absolute difference | |||
r"""Applies correlation to inputs. | |||
Args: | |||
data1: Input data1 to the correlation. format must be nchw | |||
data2: Input data2 to the correlation. format must be nchw | |||
kernel_size: int (non-negative), optional, default=1) – kernel size for Correlation must be an odd number | |||
max_displacement: int (non-negative), optional, default=1) – Max displacement of Correlation | |||
stride1: int (non-negative), optional, default=1) – stride1 quantize data1 globally | |||
stride2: int (non-negative), optional, default=1) – stride2 quantize data2 within the neighborhood centered around data1 | |||
pad_size: int (non-negative), optional, default=0) – pad for Correlation | |||
is_multiply: boolean, optional, default=True) – operation type is either multiplication or absolute difference | |||
""" | |||
op = builtin.Correlation( | |||
@@ -152,41 +154,42 @@ def roi_align( | |||
sample_points: Union[int, tuple, list] = 2, | |||
aligned: bool = True, | |||
) -> Tensor: | |||
""" | |||
Applies roi align on input feature. | |||
:param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||
:param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||
:param output_shape: `(height, width)` shape of output rois feature. | |||
:param mode: "max" or "average", use max/average align just like max/average pooling. Default: "average" | |||
:param spatial_scale: scale the input boxes by this number. Default: 1.0 | |||
:param sample_points: number of inputs samples to take for each output sample. | |||
0 to take samples densely. Default: 2 | |||
:param aligned: wheather to align the input feature, with `aligned=True`, | |||
we first appropriately scale the ROI and then shift it by -0.5. Default: True | |||
:return: output tensor. | |||
r"""Applies roi align on input feature. | |||
Args: | |||
inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||
rois: N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||
output_shape: height, width)` shape of output rois feature. | |||
mode: max" or "average", use max/average align just like max/average pooling. Default: "average" | |||
spatial_scale: scale the input boxes by this number. Default: 1.0 | |||
sample_points: number of inputs samples to take for each output sample. | |||
0 to take samples densely. Default: 2 | |||
aligned: wheather to align the input feature, with `aligned=True`, | |||
we first appropriately scale the ROI and then shift it by -0.5. Default: True | |||
Returns: | |||
output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
np.random.seed(42) | |||
inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
rois = tensor(np.random.random((4, 5))) | |||
y = F.vision.roi_align(inp, rois, (2, 2)) | |||
print(y.numpy()[0].round(decimals=4)) | |||
Outputs: | |||
np.random.seed(42) | |||
inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
rois = tensor(np.random.random((4, 5))) | |||
y = F.vision.roi_align(inp, rois, (2, 2)) | |||
print(y.numpy()[0].round(decimals=4)) | |||
.. testoutput:: | |||
Outputs: | |||
[[[0.175 0.175 ] | |||
[0.1359 0.1359]]] | |||
.. testoutput:: | |||
[[[0.175 0.175 ] | |||
[0.1359 0.1359]]] | |||
""" | |||
if inp.dtype != np.float32: | |||
inp = inp.astype(np.float32) | |||
@@ -217,43 +220,43 @@ def roi_align( | |||
def nms( | |||
boxes: Tensor, scores: Tensor, iou_thresh: float, max_output: Optional[int] = None | |||
) -> Tensor: | |||
r""" | |||
Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). | |||
r"""Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). | |||
:param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. | |||
:param iou_thresh: IoU threshold for overlapping. | |||
:param scores: tensor of shape `(N,)`, the score of boxes. | |||
:param max_output: the maximum number of boxes to keep; it is optional if this operator is not traced | |||
otherwise it required to be specified; if it is not specified, all boxes are kept. | |||
:return: indices of the elements that have been kept by NMS, sorted by scores. | |||
Args: | |||
boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. | |||
iou_thresh: IoU threshold for overlapping. | |||
scores: tensor of shape `(N,)`, the score of boxes. | |||
max_output: the maximum number of boxes to keep; it is optional if this operator is not traced | |||
otherwise it required to be specified; if it is not specified, all boxes are kept. | |||
.. note:: | |||
Returns: | |||
indices of the elements that have been kept by NMS, sorted by scores. | |||
max_output should be specified and should have valid positive value under tracing | |||
Note: | |||
max_output should be specified and should have valid positive value under tracing. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
x = np.zeros((100,4)) | |||
np.random.seed(42) | |||
x[:,:2] = np.random.rand(100,2)*20 | |||
x[:,2:] = np.random.rand(100,2)*20 + 100 | |||
scores = tensor(np.random.rand(100)) | |||
inp = tensor(x) | |||
result = F.vision.nms(inp, scores, iou_thresh=0.7) | |||
print(result.numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
x = np.zeros((100,4)) | |||
np.random.seed(42) | |||
x[:,:2] = np.random.rand(100,2)*20 | |||
x[:,2:] = np.random.rand(100,2)*20 + 100 | |||
scores = tensor(np.random.rand(100)) | |||
inp = tensor(x) | |||
result = F.vision.nms(inp, scores, iou_thresh=0.7) | |||
print(result.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[75 69] | |||
.. testoutput:: | |||
[75 69] | |||
""" | |||
assert ( | |||
boxes.ndim == 2 and boxes.shape[1] == 4 | |||
@@ -286,45 +289,46 @@ def remap( | |||
scalar: float = 0.0, | |||
interp_mode: str = "linear", | |||
) -> Tensor: | |||
r""" | |||
Applies remap transformation to batched 2D images. | |||
r"""Applies remap transformation to batched 2D images. | |||
The input images are transformed to the output images by the tensor map_xy. | |||
The output's H and W are same as map_xy's H and W. | |||
:param inp: input image | |||
:param map_xy: (batch, oh, ow, 2) transformation matrix | |||
:param border_mode: pixel extrapolation method. | |||
Default: "replicate". Currently also support "constant", "reflect", | |||
"reflect_101", "wrap". | |||
:param scalar: value used in case of a constant border. Default: 0 | |||
:param interp_mode: interpolation methods. | |||
Default: "linear". Currently only support "linear" mode. | |||
:return: output tensor. | |||
Args: | |||
inp: input image | |||
map_xy: batch, oh, ow, 2) transformation matrix | |||
border_mode: pixel extrapolation method. | |||
Default: "replicate". Currently also support "constant", "reflect", | |||
"reflect_101", "wrap". | |||
scalar: value used in case of a constant border. Default: 0 | |||
interp_mode: interpolation methods. | |||
Default: "linear". Currently only support "linear" mode. | |||
Returns: | |||
output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
inp_shape = (1, 1, 4, 4) | |||
inp = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
map_xy_shape = (1, 2, 2, 2) | |||
map_xy = tensor(np.array([[[1., 0.],[0., 1.]], | |||
[[0., 1.],[0., 1.]]], | |||
dtype=np.float32).reshape(map_xy_shape)) | |||
out = F.vision.remap(inp, map_xy) | |||
print(out.numpy()) | |||
Outputs: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
inp_shape = (1, 1, 4, 4) | |||
inp = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
map_xy_shape = (1, 2, 2, 2) | |||
map_xy = tensor(np.array([[[1., 0.],[0., 1.]], | |||
[[0., 1.],[0., 1.]]], | |||
dtype=np.float32).reshape(map_xy_shape)) | |||
out = F.vision.remap(inp, map_xy) | |||
print(out.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[[1. 4.] | |||
[4. 4.]]]] | |||
.. testoutput:: | |||
[[[[1. 4.] | |||
[4. 4.]]]] | |||
""" | |||
op = builtin.Remap( | |||
@@ -344,27 +348,28 @@ def warp_affine( | |||
format: str = "NHWC", | |||
interp_mode: str = "linear", | |||
) -> Tensor: | |||
""" | |||
Batched affine transform on 2D images. | |||
:param inp: input image. | |||
:param mat: `(batch, 2, 3)` transformation matrix. | |||
:param out_shape: output tensor shape. | |||
:param border_mode: pixel extrapolation method. | |||
Default: "wrap". Currently "constant", "reflect", | |||
"reflect_101", "isolated", "wrap", "replicate", "transparent" are supported. | |||
:param border_val: value used in case of a constant border. Default: 0 | |||
:param format: "NHWC" as default based on historical concerns, | |||
"NCHW" is also supported. Default: "NHWC". | |||
:param interp_mode: interpolation methods. Could be "linear", "nearest", "cubic", "area". | |||
Default: "linear". | |||
:return: output tensor. | |||
.. note:: | |||
Here all available options for params are listed, | |||
however it does not mean that you can use all the combinations. | |||
On different platforms, different combinations are supported. | |||
r"""Batched affine transform on 2D images. | |||
Args: | |||
inp: input image. | |||
mat: batch, 2, 3)` transformation matrix. | |||
out_shape: output tensor shape. | |||
border_mode: pixel extrapolation method. | |||
Default: "wrap". Currently "constant", "reflect", | |||
"reflect_101", "isolated", "wrap", "replicate", "transparent" are supported. | |||
border_val: value used in case of a constant border. Default: 0 | |||
format: NHWC" as default based on historical concerns, | |||
"NCHW" is also supported. Default: "NHWC". | |||
interp_mode: interpolation methods. Could be "linear", "nearest", "cubic", "area". | |||
Default: "linear". | |||
Returns: | |||
output tensor. | |||
Note: | |||
Here all available options for params are listed, | |||
however it does not mean that you can use all the combinations. | |||
On different platforms, different combinations are supported. | |||
""" | |||
op = builtin.WarpAffine( | |||
border_mode=border_mode, | |||
@@ -387,8 +392,7 @@ def warp_perspective( | |||
format: str = "NCHW", | |||
interp_mode: str = "linear", | |||
) -> Tensor: | |||
r""" | |||
Applies perspective transformation to batched 2D images. | |||
r"""Applies perspective transformation to batched 2D images. | |||
The input images are transformed to the output images by the transformation matrix: | |||
@@ -401,48 +405,49 @@ def warp_perspective( | |||
Optionally, we can set `mat_idx` to assign different transformations to the same image, | |||
otherwise the input images and transformations should be one-to-one correnspondence. | |||
:param inp: input image. | |||
:param mat: `(batch, 3, 3)` transformation matrix. | |||
:param out_shape: `(h, w)` size of the output image. | |||
:param mat_idx: `(batch, )` image batch idx assigned to each matrix. Default: None | |||
:param border_mode: pixel extrapolation method. | |||
Default: "replicate". Currently also support "constant", "reflect", | |||
"reflect_101", "wrap". | |||
:param border_val: value used in case of a constant border. Default: 0 | |||
:param format: "NHWC" is also supported. Default: "NCHW". | |||
:param interp_mode: interpolation methods. | |||
Default: "linear". Currently only support "linear" mode. | |||
:return: output tensor. | |||
.. note:: | |||
The transformation matrix is the inverse of that used by `cv2.warpPerspective`. | |||
Args: | |||
inp: input image. | |||
mat: batch, 3, 3)` transformation matrix. | |||
out_shape: h, w)` size of the output image. | |||
mat_idx: batch, )` image batch idx assigned to each matrix. Default: None | |||
border_mode: pixel extrapolation method. | |||
Default: "replicate". Currently also support "constant", "reflect", | |||
"reflect_101", "wrap". | |||
border_val: value used in case of a constant border. Default: 0 | |||
format: NHWC" is also supported. Default: "NCHW". | |||
interp_mode: interpolation methods. | |||
Default: "linear". Currently only support "linear" mode. | |||
Returns: | |||
output tensor. | |||
Note: | |||
The transformation matrix is the inverse of that used by `cv2.warpPerspective`. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
inp_shape = (1, 1, 4, 4) | |||
x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
M_shape = (1, 3, 3) | |||
# M defines a translation: dst(1, 1, h, w) = rst(1, 1, h+1, w+1) | |||
M = tensor(np.array([[1., 0., 1.], | |||
[0., 1., 1.], | |||
[0., 0., 1.]], dtype=np.float32).reshape(M_shape)) | |||
out = F.vision.warp_perspective(x, M, (2, 2)) | |||
print(out.numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
inp_shape = (1, 1, 4, 4) | |||
x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) | |||
M_shape = (1, 3, 3) | |||
# M defines a translation: dst(1, 1, h, w) = rst(1, 1, h+1, w+1) | |||
M = tensor(np.array([[1., 0., 1.], | |||
[0., 1., 1.], | |||
[0., 0., 1.]], dtype=np.float32).reshape(M_shape)) | |||
out = F.vision.warp_perspective(x, M, (2, 2)) | |||
print(out.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[[ 5. 6.] | |||
[ 9. 10.]]]] | |||
.. testoutput:: | |||
[[[[ 5. 6.] | |||
[ 9. 10.]]]] | |||
""" | |||
if inp.dtype == np.float32: | |||
mat = mat.astype("float32") | |||
@@ -467,48 +472,48 @@ def interpolate( | |||
mode: str = "bilinear", | |||
align_corners: Optional[bool] = None, | |||
) -> Tensor: | |||
r""" | |||
Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
:param inp: input tensor. | |||
:param size: size of the output tensor. Default: None | |||
:param scale_factor: scaling factor of the output tensor. Default: None | |||
:param mode: interpolation methods, acceptable values are: | |||
"bilinear", "linear", "bicubic" and "nearest". Default: "bilinear" | |||
:param align_corners: This only has an effect when `mode` | |||
is "bilinear" or "linear". Geometrically, we consider the pixels of the input | |||
and output as squares rather than points. If set to ``True``, the input | |||
and output tensors are aligned by the center points of their corner | |||
pixels, preserving the values at the corner pixels. If set to ``False``, | |||
the input and output tensors are aligned by the corner points of their | |||
corner pixels, and the interpolation uses edge value padding for | |||
out-of-boundary values, making this operation *independent* of input size | |||
:return: output tensor. | |||
r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
Args: | |||
inp: input tensor. | |||
size: size of the output tensor. Default: None | |||
scale_factor: scaling factor of the output tensor. Default: None | |||
mode: interpolation methods, acceptable values are: | |||
"bilinear", "linear", "bicubic" and "nearest". Default: "bilinear" | |||
align_corners: This only has an effect when `mode` | |||
is "bilinear" or "linear". Geometrically, we consider the pixels of the input | |||
and output as squares rather than points. If set to ``True``, the input | |||
and output tensors are aligned by the center points of their corner | |||
pixels, preserving the values at the corner pixels. If set to ``False``, | |||
the input and output tensors are aligned by the corner points of their | |||
corner pixels, and the interpolation uses edge value padding for | |||
out-of-boundary values, making this operation *independent* of input size | |||
Returns: | |||
output tensor. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
.. testcode:: | |||
x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) | |||
out = F.vision.interpolate(x, [4, 4], align_corners=False) | |||
print(out.numpy()) | |||
out2 = F.vision.interpolate(x, scale_factor=2.) | |||
np.testing.assert_allclose(out.numpy(), out2.numpy()) | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
Outputs: | |||
x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) | |||
out = F.vision.interpolate(x, [4, 4], align_corners=False) | |||
print(out.numpy()) | |||
out2 = F.vision.interpolate(x, scale_factor=2.) | |||
np.testing.assert_allclose(out.numpy(), out2.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[[1. 1.25 1.75 2. ] | |||
[1.5 1.75 2.25 2.5 ] | |||
[2.5 2.75 3.25 3.5 ] | |||
[3. 3.25 3.75 4. ]]]] | |||
.. testoutput:: | |||
[[[[1. 1.25 1.75 2. ] | |||
[1.5 1.75 2.25 2.5 ] | |||
[2.5 2.75 3.25 3.5 ] | |||
[3. 3.25 3.75 4. ]]]] | |||
""" | |||
mode = mode.lower() | |||
if mode not in ["bilinear", "linear", "bicubic", "nearest"]: | |||
@@ -623,15 +628,15 @@ def interpolate( | |||
def nvof(src: Tensor, precision: int = 1) -> Tensor: | |||
r""" | |||
Implements NVIDIA Optical Flow SDK. | |||
r"""Implements NVIDIA Optical Flow SDK. | |||
Args: | |||
src: input tensor with shape (n, t, h, w, c4) and unit8 dtype. | |||
precision: 0:NV_OF_PERF_LEVEL_SLOW 1:NV_OF_PERF_LEVEL_MEDIUM 2:NV_OF_PERF_LEVEL_FAST. | |||
:src shape: input tensor with shape (n, t, h, w, c4). | |||
:src dtype: uint8. | |||
:param precision: 0:NV_OF_PERF_LEVEL_SLOW 1:NV_OF_PERF_LEVEL_MEDIUM 2:NV_OF_PERF_LEVEL_FAST. | |||
:output shape: ``(n, t-1, (h+out_grid_size-1)//out_grid_size, (w+out_grid_size-1)//out_grid_size, c2)``. | |||
By default, out_grid_size = 4. | |||
:output dtype: int16. | |||
Returns: | |||
output tensor with shape: ``(n, t-1, (h+out_grid_size-1)//out_grid_size, (w+out_grid_size-1)//out_grid_size, c2)``. | |||
By default, out_grid_size = 4. dtype: int16. | |||
.. code-block:: python | |||
@@ -643,7 +648,6 @@ def nvof(src: Tensor, precision: int = 1) -> Tensor: | |||
src = tensor(x) | |||
result = F.nn.nvof(src, precision=1) | |||
print(result.numpy()) | |||
""" | |||
assert src.ndim == 5 and src.shape[4] == 4 | |||
@@ -7,24 +7,24 @@ | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
class FetcherError(Exception): | |||
"""Base class for fetch related error.""" | |||
r"""Base class for fetch related error.""" | |||
class InvalidRepo(FetcherError): | |||
"""The repo provided was somehow invalid.""" | |||
r"""The repo provided was somehow invalid.""" | |||
class InvalidGitHost(FetcherError): | |||
"""The git host provided was somehow invalid.""" | |||
r"""The git host provided was somehow invalid.""" | |||
class GitPullError(FetcherError): | |||
"""A git pull error occurred.""" | |||
r"""A git pull error occurred.""" | |||
class GitCheckoutError(FetcherError): | |||
"""A git checkout error occurred.""" | |||
r"""A git checkout error occurred.""" | |||
class InvalidProtocol(FetcherError): | |||
"""The protocol provided was somehow invalid.""" | |||
r"""The protocol provided was somehow invalid.""" |
@@ -102,24 +102,18 @@ class GitSSHFetcher(RepoFetcherBase): | |||
commit: str = None, | |||
silent: bool = True, | |||
) -> str: | |||
""" | |||
Fetches git repo by SSH protocol | |||
:param git_host: | |||
host address of git repo. | |||
Example: github.com | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. | |||
Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
:param use_cache: | |||
whether to use locally fetched code or completely re-fetch. | |||
:param commit: | |||
commit id on github or gitlab. | |||
:param silent: | |||
whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
displaying on the screen. | |||
:return: | |||
"""Fetches git repo by SSH protocol | |||
Args: | |||
git_host: host address of git repo. Eg: github.com | |||
repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
use_cache: whether to use locally fetched code or completely re-fetch. | |||
commit: commit id on github or gitlab. | |||
silent: whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
displaying on the screen. | |||
Returns: | |||
directory where the repo code is stored. | |||
""" | |||
if not cls._check_git_host(git_host): | |||
@@ -217,24 +211,19 @@ class GitHTTPSFetcher(RepoFetcherBase): | |||
commit: str = None, | |||
silent: bool = True, | |||
) -> str: | |||
""" | |||
Fetches git repo by HTTPS protocol. | |||
:param git_host: | |||
host address of git repo. | |||
Example: github.com | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. | |||
Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
:param use_cache: | |||
whether to use locally cached code or completely re-fetch. | |||
:param commit: | |||
commit id on github or gitlab. | |||
:param silent: | |||
whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
displaying on the screen. | |||
:return: | |||
"""Fetches git repo by HTTPS protocol. | |||
Args: | |||
git_host: host address of git repo. Eg: github.com | |||
repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
use_cache: whether to use locally cached code or completely re-fetch. | |||
commit: commit id on github or gitlab. | |||
silent: whether to accept the stdout and stderr of the subprocess with PIPE, instead of | |||
displaying on the screen. | |||
Returns: | |||
directory where the repo code is stored. | |||
""" | |||
if not cls._check_git_host(git_host): | |||
@@ -43,9 +43,7 @@ PROTOCOLS = { | |||
def _get_megengine_home() -> str: | |||
""" | |||
MGE_HOME setting complies with the XDG Base Directory Specification | |||
""" | |||
r"""MGE_HOME setting complies with the XDG Base Directory Specification""" | |||
megengine_home = os.path.expanduser( | |||
os.getenv( | |||
ENV_MGE_HOME, | |||
@@ -95,24 +93,18 @@ def _init_hub( | |||
commit: str = None, | |||
protocol: str = DEFAULT_PROTOCOL, | |||
): | |||
""" | |||
Imports hubmodule like python import. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. | |||
Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
:param git_host: | |||
host address of git repo. | |||
Example: github.com | |||
:param use_cache: | |||
whether to use locally cached code or completely re-fetch. | |||
:param commit: | |||
commit id on github or gitlab. | |||
:param protocol: | |||
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
:return: | |||
r"""Imports hubmodule like python import. | |||
Args: | |||
repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
git_host: host address of git repo. Eg: github.com | |||
use_cache: whether to use locally cached code or completely re-fetch. | |||
commit: commit id on github or gitlab. | |||
protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
Returns: | |||
a python module. | |||
""" | |||
cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) | |||
@@ -139,24 +131,18 @@ def list( | |||
commit: str = None, | |||
protocol: str = DEFAULT_PROTOCOL, | |||
) -> List[str]: | |||
""" | |||
Lists all entrypoints available in repo hubconf. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. | |||
Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
:param git_host: | |||
host address of git repo. | |||
Example: github.com | |||
:param use_cache: | |||
whether to use locally cached code or completely re-fetch. | |||
:param commit: | |||
commit id on github or gitlab. | |||
:param protocol: | |||
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
:return: | |||
r"""Lists all entrypoints available in repo hubconf. | |||
Args: | |||
repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
git_host: host address of git repo. Eg: github.com | |||
use_cache: whether to use locally cached code or completely re-fetch. | |||
commit: commit id on github or gitlab. | |||
protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
Returns: | |||
all entrypoint names of the model. | |||
""" | |||
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | |||
@@ -178,26 +164,19 @@ def load( | |||
protocol: str = DEFAULT_PROTOCOL, | |||
**kwargs | |||
) -> Any: | |||
""" | |||
Loads model from github or gitlab repo, with pretrained weights. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. | |||
Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
:param entry: | |||
an entrypoint defined in hubconf. | |||
:param git_host: | |||
host address of git repo. | |||
Example: github.com | |||
:param use_cache: | |||
whether to use locally cached code or completely re-fetch. | |||
:param commit: | |||
commit id on github or gitlab. | |||
:param protocol: | |||
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
:return: | |||
r"""Loads model from github or gitlab repo, with pretrained weights. | |||
Args: | |||
repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
entry: an entrypoint defined in hubconf. | |||
git_host: host address of git repo. Eg: github.com | |||
use_cache: whether to use locally cached code or completely re-fetch. | |||
commit: commit id on github or gitlab. | |||
protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
Returns: | |||
a single model with corresponding pretrained weights. | |||
""" | |||
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | |||
@@ -219,30 +198,23 @@ def help( | |||
commit: str = None, | |||
protocol: str = DEFAULT_PROTOCOL, | |||
) -> str: | |||
""" | |||
This function returns docstring of entrypoint ``entry`` by following steps: | |||
r"""This function returns docstring of entrypoint ``entry`` by following steps: | |||
1. Pull the repo code specified by git and repo_info. | |||
2. Load the entry defined in repo's hubconf.py | |||
3. Return docstring of function entry. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. | |||
Example: ``"brain_sdk/MegBrain[:hub]"`` | |||
:param entry: | |||
an entrypoint defined in hubconf.py | |||
:param git_host: | |||
host address of git repo. | |||
Example: github.com | |||
:param use_cache: | |||
whether to use locally cached code or completely re-fetch. | |||
:param commit: | |||
commit id on github or gitlab. | |||
:param protocol: | |||
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
:return: | |||
Args: | |||
repo_info: a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
tag/branch. The default branch is ``master`` if not specified. Eg: ``"brain_sdk/MegBrain[:hub]"`` | |||
entry: an entrypoint defined in hubconf.py | |||
git_host: host address of git repo. Eg: github.com | |||
use_cache: whether to use locally cached code or completely re-fetch. | |||
commit: commit id on github or gitlab. | |||
protocol: which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | |||
The value should be one of HTTPS, SSH. | |||
Returns: | |||
docstring of entrypoint ``entry``. | |||
""" | |||
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | |||
@@ -255,16 +227,17 @@ def help( | |||
def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||
""" | |||
Loads MegEngine serialized object from the given URL. | |||
"""Loads MegEngine serialized object from the given URL. | |||
If the object is already present in ``model_dir``, it's deserialized and | |||
returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | |||
:param url: url to serialized object. | |||
:param model_dir: dir to cache target serialized file. | |||
Args: | |||
url: url to serialized object. | |||
model_dir: dir to cache target serialized file. | |||
:return: loaded object. | |||
Returns: | |||
loaded object. | |||
""" | |||
if model_dir is None: | |||
model_dir = os.path.join(_get_megengine_home(), "serialized") | |||
@@ -297,8 +270,7 @@ def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||
class pretrained: | |||
r""" | |||
Decorator which helps to download pretrained weights from the given url. | |||
r"""Decorator which helps to download pretrained weights from the given url. | |||
For example, we can decorate a resnet18 function as follows | |||
@@ -306,10 +278,10 @@ class pretrained: | |||
@hub.pretrained("https://url/to/pretrained_resnet18.pkl") | |||
def resnet18(**kwargs): | |||
return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) | |||
When decorated function is called with ``pretrained=True``, MegEngine will automatically | |||
download and fill the returned model with pretrained weights. | |||
Returns: | |||
When decorated function is called with ``pretrained=True``, MegEngine will automatically | |||
download and fill the returned model with pretrained weights. | |||
""" | |||
def __init__(self, url): | |||
@@ -14,11 +14,11 @@ from typing import Iterator | |||
def load_module(name: str, path: str) -> types.ModuleType: | |||
""" | |||
Loads module specified by name and path. | |||
r"""Loads module specified by name and path. | |||
:param name: module name. | |||
:param path: module path. | |||
Args: | |||
name: module name. | |||
path: module path. | |||
""" | |||
spec = importlib.util.spec_from_file_location(name, path) | |||
module = importlib.util.module_from_spec(spec) | |||
@@ -27,20 +27,20 @@ def load_module(name: str, path: str) -> types.ModuleType: | |||
def check_module_exists(module: str) -> bool: | |||
""" | |||
Checks whether python module exists or not. | |||
r"""Checks whether python module exists or not. | |||
:param module: name of module. | |||
Args: | |||
module: name of module. | |||
""" | |||
return importlib.util.find_spec(module) is not None | |||
@contextmanager | |||
def cd(target: str) -> Iterator[None]: | |||
""" | |||
Changes current directory to target. | |||
"""Changes current directory to target. | |||
:param target: target directory. | |||
Args: | |||
target: target directory. | |||
""" | |||
prev = os.getcwd() | |||
os.chdir(os.path.expanduser(target)) | |||
@@ -9,12 +9,12 @@ | |||
class GraphOptimizationConfig: | |||
r""" | |||
Configuration for graph optimization: False for OFF, True for ON. The default value | |||
r"""Configuration for graph optimization: False for OFF, True for ON. The default value | |||
None means that opt_level will decide whther this optimization will be applied or not. | |||
:param jit_fuse_dimshuffle: whether to fuse dimshuffle in JIT optimization | |||
:param jit_fuse_reduce: whether to fuse reduce in JIT optimization | |||
Args: | |||
jit_fuse_dimshuffle: whether to fuse dimshuffle in JIT optimization | |||
jit_fuse_reduce: whether to fuse reduce in JIT optimization | |||
""" | |||
def __init__(self): | |||
@@ -10,26 +10,26 @@ from ..device import get_device_count | |||
class SublinearMemoryConfig: | |||
r""" | |||
Configuration for sublinear memory optimization. | |||
:param thresh_nr_try: number of samples both for searching in linear space | |||
and around current thresh in sublinear memory optimization. Default: 10. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'. | |||
:param genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm. | |||
Default: 0. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'. | |||
:param genetic_pool_size: number of samples for the crossover random selection | |||
during genetic optimization. Default: 20. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | |||
:param lb_memory_mb: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||
It can be used to perform manual tradeoff between memory and speed. Default: 0. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | |||
:param num_worker: number of thread workers to search the optimum checkpoints | |||
in sublinear memory optimization. Default: half of cpu number in the system. | |||
Note: the value must be greater or equal to one. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'. | |||
r"""Configuration for sublinear memory optimization. | |||
Args: | |||
thresh_nr_try: number of samples both for searching in linear space | |||
and around current thresh in sublinear memory optimization. Default: 10. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'. | |||
genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm. | |||
Default: 0. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'. | |||
genetic_pool_size: number of samples for the crossover random selection | |||
during genetic optimization. Default: 20. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | |||
lb_memory_mb: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||
It can be used to perform manual tradeoff between memory and speed. Default: 0. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | |||
num_worker: number of thread workers to search the optimum checkpoints | |||
in sublinear memory optimization. Default: half of cpu number in the system. | |||
Note: the value must be greater or equal to one. | |||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'. | |||
Note that the environmental variable MGB_COMP_GRAPH_OPT must be set to 'enable_sublinear_memory_opt=1' | |||
in order for the above environmental variable to be effective. | |||
""" | |||
@@ -120,21 +120,21 @@ _io_op_types = {AssertEqual, CollectiveComm, RemoteSend, RemoteRecv} | |||
class trace: | |||
""" | |||
Wraps a callable and provide: | |||
"""Wraps a callable and provide: | |||
* tracing via :meth:`.trace` and :meth:`.dump` | |||
* accelerated evalutaion via :meth:`.__call__` | |||
:param function: the function will be traced. | |||
:param symbolic: whether to apply symbolic execution for tracing. Default: False | |||
:param capture_as_const: capture global vars or closures as const value. Default: False | |||
:param sublinear_memory_config: configuration for sublinear memory optimization. | |||
If not None, it enables sublinear memory optimization with given setting. | |||
:param profiling: whether to profile compiled trace. Default: False | |||
:param opt_level: optimization level for compiling trace. Default: 2 | |||
:param graph_opt_config: configuration for graph optimization. Default: None | |||
:param symbolic_shape: whether to use symbolic shape for tracing. Default: True | |||
Args: | |||
function: the function will be traced. | |||
symbolic: whether to apply symbolic execution for tracing. Default: False | |||
capture_as_const: capture global vars or closures as const value. Default: False | |||
sublinear_memory_config: configuration for sublinear memory optimization. | |||
If not None, it enables sublinear memory optimization with given setting. | |||
profiling: whether to profile compiled trace. Default: False | |||
opt_level: optimization level for compiling trace. Default: 2 | |||
graph_opt_config: configuration for graph optimization. Default: None | |||
symbolic_shape: whether to use symbolic shape for tracing. Default: True | |||
""" | |||
def __new__(cls, *args, **kwargs): | |||
@@ -696,75 +696,74 @@ class trace: | |||
enable_metadata: bool = True, | |||
**kwargs | |||
): | |||
r""" | |||
Serializes trace to file system. | |||
:param file: output file, could be file object or filename. | |||
:param arg_names: names of the input tensors in the traced function. | |||
:param output_names: names of the output tensors in the traced function, | |||
use the default name if not specified. | |||
:param append: whether output is appended to ``file``. | |||
Only works when ``file`` is str. | |||
:param keep_var_name: level for keeping variable names: | |||
* 0: none of the names are kept | |||
* 1: (default)keep names of output vars | |||
* 2: keep names of all (output and internal) vars | |||
:param keep_opr_name: whether to keep operator names. | |||
:param keep_param_name: whether to keep param names, so param values can be | |||
easily manipulated after loading model | |||
:param keep_opr_priority: whether to keep priority setting for operators | |||
:param strip_info_file: a string for path or a file handler. if is not None, | |||
then the dump information for code strip would be written to ``strip_info_file`` | |||
:param append_json: will be check when `strip_info_file` is not None. if set | |||
true, the information for code strip will be append to strip_info_file. | |||
if set false, will rewrite strip_info_file | |||
:param optimize_for_inference: enbale optmizations, | |||
will skip all optimize options if this is False. Default: True | |||
:param user_info: any type object, which will be pickled to bytes. | |||
:param enable_metadata: whether to save metadata into output file. | |||
:Keyword Arguments: | |||
* enable_io16xc32 -- | |||
whether to use float16 for I/O between oprs and use | |||
float32 as internal computation precision. Note the output var would be | |||
changed to float16. | |||
* enable_ioc16 -- | |||
whether to use float16 for both I/O and computation | |||
precision. | |||
* enable_hwcd4 -- | |||
whether to use NHWCD4 data layout. This is faster on some | |||
OpenCL backend. | |||
* enable_nchw88 -- | |||
whether to use NCHW88 data layout, currently | |||
used in X86 AVX backend. | |||
* enable_nchw44 -- | |||
whether to use NCHW44 data layout, currently | |||
used in arm backend. | |||
* enable_nchw44_dot -- | |||
whether to use NCHW44_dot data layout, currently | |||
used in armv8.2+dotprod backend. | |||
* enable_nchw4 -- | |||
whether to use NCHW4 data layout, currently | |||
used in nvidia backend(based on cudnn). | |||
* enable_nchw32 -- | |||
whether to use NCHW32 data layout, currently | |||
used in nvidia backend with tensorcore(based on cudnn). | |||
* enable_chwn4 -- | |||
whether to use CHWN4 data layout, currently | |||
used in nvidia backend with tensorcore. | |||
* enable_nchw64 -- | |||
whether to use NCHW64 data layout, used for fast int4 | |||
support on Nvidia GPU. | |||
* enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
into one opr. | |||
* enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
input for inference on nvidia backend(this optimization pass will | |||
result in mismatch of the precision of output of training and | |||
inference) | |||
r"""Serializes trace to file system. | |||
Args: | |||
file: output file, could be file object or filename. | |||
arg_names: names of the input tensors in the traced function. | |||
output_names: names of the output tensors in the traced function, | |||
use the default name if not specified. | |||
append: whether output is appended to ``file``. | |||
Only works when ``file`` is str. | |||
keep_var_name: level for keeping variable names: | |||
* 0: none of the names are kept | |||
* 1: (default)keep names of output vars | |||
* 2: keep names of all (output and internal) vars | |||
keep_opr_name: whether to keep operator names. | |||
keep_param_name: whether to keep param names, so param values can be | |||
easily manipulated after loading model | |||
keep_opr_priority: whether to keep priority setting for operators | |||
strip_info_file: a string for path or a file handler. if is not None, | |||
then the dump information for code strip would be written to ``strip_info_file`` | |||
append_json: will be check when `strip_info_file` is not None. if set | |||
true, the information for code strip will be append to strip_info_file. | |||
if set false, will rewrite strip_info_file | |||
optimize_for_inference: enbale optmizations, | |||
will skip all optimize options if this is False. Default: True | |||
user_info: any type object, which will be pickled to bytes. | |||
enable_metadata: whether to save metadata into output file. | |||
Keyword Arguments: | |||
* enable_io16xc32 -- | |||
whether to use float16 for I/O between oprs and use | |||
float32 as internal computation precision. Note the output var would be | |||
changed to float16. | |||
* enable_ioc16 -- | |||
whether to use float16 for both I/O and computation | |||
precision. | |||
* enable_hwcd4 -- | |||
whether to use NHWCD4 data layout. This is faster on some | |||
OpenCL backend. | |||
* enable_nchw88 -- | |||
whether to use NCHW88 data layout, currently | |||
used in X86 AVX backend. | |||
* enable_nchw44 -- | |||
whether to use NCHW44 data layout, currently | |||
used in arm backend. | |||
* enable_nchw44_dot -- | |||
whether to use NCHW44_dot data layout, currently | |||
used in armv8.2+dotprod backend. | |||
* enable_nchw4 -- | |||
whether to use NCHW4 data layout, currently | |||
used in nvidia backend(based on cudnn). | |||
* enable_nchw32 -- | |||
whether to use NCHW32 data layout, currently | |||
used in nvidia backend with tensorcore(based on cudnn). | |||
* enable_chwn4 -- | |||
whether to use CHWN4 data layout, currently | |||
used in nvidia backend with tensorcore. | |||
* enable_nchw64 -- | |||
whether to use NCHW64 data layout, used for fast int4 | |||
support on Nvidia GPU. | |||
* enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty | |||
into one opr. | |||
* enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z | |||
input for inference on nvidia backend(this optimization pass will | |||
result in mismatch of the precision of output of training and | |||
inference) | |||
""" | |||
if not self._capture_as_const: | |||
raise ValueError( | |||
@@ -1033,10 +1032,10 @@ class trace: | |||
) | |||
def get_profile(self): | |||
""" | |||
Get profiling result for compiled trace. | |||
r"""Get profiling result for compiled trace. | |||
:return: a json compatible object. | |||
Return: | |||
a json compatible object. | |||
""" | |||
if not self._profiler: | |||
raise RuntimeError("trace is not set with profiling=True") | |||
@@ -1050,9 +1049,7 @@ class trace: | |||
class CompiledTensorProxy: | |||
""" | |||
Duck-typed RawTensor | |||
""" | |||
r"""Duck-typed RawTensor""" | |||
def __init__(self, handle): | |||
self.__handle = handle | |||
@@ -17,14 +17,11 @@ _default_level = logging.getLevelName(_default_level_name.upper()) | |||
def set_log_file(fout, mode="a"): | |||
r""" | |||
Sets log output file. | |||
:type fout: str or file-like | |||
:param fout: file-like object that supports write and flush, or string for | |||
the filename | |||
:type mode: str | |||
:param mode: specify the mode to open log file if *fout* is a string | |||
r"""Sets log output file. | |||
Args: | |||
fout: file-like object that supports write and flush, or string for the filename | |||
mode: specify the mode to open log file if *fout* is a string | |||
""" | |||
if isinstance(fout, str): | |||
fout = open(fout, mode) | |||
@@ -39,45 +36,31 @@ class MegEngineLogFormatter(logging.Formatter): | |||
max_lines = 256 | |||
def _color_exc(self, msg): | |||
r""" | |||
Sets the color of message as the execution type. | |||
""" | |||
r"""Sets the color of message as the execution type.""" | |||
return "\x1b[34m{}\x1b[0m".format(msg) | |||
def _color_dbg(self, msg): | |||
r""" | |||
Sets the color of message as the debugging type. | |||
""" | |||
r"""Sets the color of message as the debugging type.""" | |||
return "\x1b[36m{}\x1b[0m".format(msg) | |||
def _color_warn(self, msg): | |||
r""" | |||
Sets the color of message as the warning type. | |||
""" | |||
r"""Sets the color of message as the warning type.""" | |||
return "\x1b[1;31m{}\x1b[0m".format(msg) | |||
def _color_err(self, msg): | |||
r""" | |||
Sets the color of message as the error type. | |||
""" | |||
r"""Sets the color of message as the error type.""" | |||
return "\x1b[1;4;31m{}\x1b[0m".format(msg) | |||
def _color_omitted(self, msg): | |||
r""" | |||
Sets the color of message as the omitted type. | |||
""" | |||
r"""Sets the color of message as the omitted type.""" | |||
return "\x1b[35m{}\x1b[0m".format(msg) | |||
def _color_normal(self, msg): | |||
r""" | |||
Sets the color of message as the normal type. | |||
""" | |||
r"""Sets the color of message as the normal type.""" | |||
return msg | |||
def _color_date(self, msg): | |||
r""" | |||
Sets the color of message the same as date. | |||
""" | |||
r"""Sets the color of message the same as date.""" | |||
return "\x1b[32m{}\x1b[0m".format(msg) | |||
def format(self, record): | |||
@@ -150,9 +133,7 @@ class MegEngineLogFormatter(logging.Formatter): | |||
def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
r""" | |||
Gets megengine logger with given name. | |||
""" | |||
r"""Gets megengine logger with given name.""" | |||
logger = logging.getLogger(name) | |||
if getattr(logger, "_init_done__", None): | |||
@@ -170,12 +151,11 @@ def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
def set_log_level(level, update_existing=True): | |||
""" | |||
Sets default logging level. | |||
r"""Sets default logging level. | |||
:type level: int e.g. logging.INFO | |||
:param level: loggin level given by python :mod:`logging` module | |||
:param update_existing: whether to update existing loggers | |||
Args: | |||
level: loggin level given by python :mod:`logging` module | |||
update_existing: whether to update existing loggers | |||
""" | |||
global _default_level # pylint: disable=global-statement | |||
_default_level = level | |||
@@ -202,12 +182,13 @@ try: | |||
_imperative_rt_logger.set_log_handler(_megbrain_logger) | |||
def set_mgb_log_level(level): | |||
r""" | |||
Sets megbrain log level | |||
r"""Sets megbrain log level | |||
Args: | |||
level: new log level | |||
:type level: int e.g. logging.INFO | |||
:param level: new log level | |||
:return: original log level | |||
Returns: | |||
original log level | |||
""" | |||
_megbrain_logger.setLevel(level) | |||
if level == logging.getLevelName("ERROR"): | |||
@@ -235,11 +216,10 @@ except ImportError as exc: | |||
@contextlib.contextmanager | |||
def replace_mgb_log_level(level): | |||
r""" | |||
Replaces megbrain log level in a block and restore after exiting. | |||
r"""Replaces megbrain log level in a block and restore after exiting. | |||
:type level: int e.g. logging.INFO | |||
:param level: new log level | |||
Args: | |||
level: new log level | |||
""" | |||
old = set_mgb_log_level(level) | |||
try: | |||
@@ -249,8 +229,6 @@ def replace_mgb_log_level(level): | |||
def enable_debug_log(): | |||
r""" | |||
Sets logging level to debug for all components. | |||
""" | |||
r"""Sets logging level to debug for all components.""" | |||
set_log_level(logging.DEBUG) | |||
set_mgb_log_level(logging.DEBUG) |
@@ -14,8 +14,7 @@ from .module import Module | |||
class Softmax(Module): | |||
r""" | |||
Applies a softmax function. Softmax is defined as: | |||
r"""Applies a softmax function. Softmax is defined as: | |||
.. math:: | |||
\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)} | |||
@@ -23,29 +22,29 @@ class Softmax(Module): | |||
It is applied to all elements along axis, and rescales elements so that | |||
they stay in the range `[0, 1]` and sum to 1. | |||
:param axis: Along which axis softmax will be applied. By default, | |||
softmax will apply along the highest ranked axis. | |||
Args: | |||
axis: Along which axis softmax will be applied. By default, | |||
softmax will apply along the highest ranked axis. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-2,-1,0,1,2]).astype(np.float32)) | |||
softmax = M.Softmax() | |||
output = softmax(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
Outputs: | |||
data = mge.tensor(np.array([-2,-1,0,1,2]).astype(np.float32)) | |||
softmax = M.Softmax() | |||
output = softmax(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[0.011656 0.031685 0.086129 0.234122 0.636409] | |||
.. testoutput:: | |||
[0.011656 0.031685 0.086129 0.234122 0.636409] | |||
""" | |||
def __init__(self, axis=None, **kwargs): | |||
@@ -60,32 +59,31 @@ class Softmax(Module): | |||
class Sigmoid(Module): | |||
r""" | |||
Applies the element-wise function: | |||
r"""Applies the element-wise function: | |||
.. math:: | |||
\text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)} | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
.. testcode:: | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
sigmoid = M.Sigmoid() | |||
output = sigmoid(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
Outputs: | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
sigmoid = M.Sigmoid() | |||
output = sigmoid(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[0.119203 0.268941 0.5 0.731059 0.880797] | |||
.. testoutput:: | |||
[0.119203 0.268941 0.5 0.731059 0.880797] | |||
""" | |||
def forward(self, inputs): | |||
@@ -93,32 +91,31 @@ class Sigmoid(Module): | |||
class SiLU(Module): | |||
r""" | |||
Applies the element-wise function: | |||
r"""Applies the element-wise function: | |||
.. math:: | |||
\text{SiLU}(x) = \frac{x}{1 + \exp(-x)} | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
silu = M.SiLU() | |||
output = silu(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
Outputs: | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
silu = M.SiLU() | |||
output = silu(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[-0.238406 -0.268941 0. 0.731059 1.761594] | |||
.. testoutput:: | |||
[-0.238406 -0.268941 0. 0.731059 1.761594] | |||
""" | |||
def forward(self, inputs): | |||
@@ -126,8 +123,7 @@ class SiLU(Module): | |||
class GELU(Module): | |||
r""" | |||
Applies the element-wise function: | |||
r"""Applies the element-wise function: | |||
.. math:: | |||
\text{GELU}(x) = x\Phi(x) | |||
@@ -136,24 +132,23 @@ class GELU(Module): | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
gelu = M.GELU() | |||
output = gelu(data) | |||
with np.printoptions(precision=4): | |||
print(output.numpy()) | |||
Outputs: | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
gelu = M.GELU() | |||
output = gelu(data) | |||
with np.printoptions(precision=4): | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[-0.0455 -0.1587 0. 0.8413 1.9545] | |||
.. testoutput:: | |||
[-0.0455 -0.1587 0. 0.8413 1.9545] | |||
""" | |||
def forward(self, inputs): | |||
@@ -161,31 +156,29 @@ class GELU(Module): | |||
class ReLU(Module): | |||
r""" | |||
Applies the element-wise function: | |||
r"""Applies the element-wise function: | |||
.. math:: | |||
\text{ReLU}(x) = \max(x, 0) | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
relu = M.ReLU() | |||
output = relu(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
.. testcode:: | |||
Outputs: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) | |||
relu = M.ReLU() | |||
output = relu(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[0. 0. 0. 1. 2.] | |||
.. testoutput:: | |||
[0. 0. 0. 1. 2.] | |||
""" | |||
def forward(self, x): | |||
@@ -193,8 +186,7 @@ class ReLU(Module): | |||
class PReLU(Module): | |||
r""" | |||
Applies the element-wise function: | |||
r"""Applies the element-wise function: | |||
.. math:: | |||
\text{PReLU}(x) = \max(0,x) + a * \min(0,x) | |||
@@ -211,28 +203,28 @@ class PReLU(Module): | |||
Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses | |||
a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, each input channle will has it's own :math:`a`. | |||
:param num_parameters: number of :math:`a` to learn, there is only two | |||
values are legitimate: 1, or the number of channels at input. Default: 1 | |||
:param init: the initial value of :math:`a`. Default: 0.25 | |||
Args: | |||
num_parameters: number of :math:`a` to learn, there is only two | |||
values are legitimate: 1, or the number of channels at input. Default: 1 | |||
init: the initial value of :math:`a`. Default: 0.25 | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-1.2, -3.7, 2.7]).astype(np.float32)) | |||
prelu = M.PReLU() | |||
output = prelu(data) | |||
print(output.numpy()) | |||
Outputs: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-1.2, -3.7, 2.7]).astype(np.float32)) | |||
prelu = M.PReLU() | |||
output = prelu(data) | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[-0.3 -0.925 2.7 ] | |||
.. testoutput:: | |||
[-0.3 -0.925 2.7 ] | |||
""" | |||
def __init__(self, num_parameters: int = 1, init: float = 0.25, **kwargs): | |||
@@ -257,8 +249,7 @@ class PReLU(Module): | |||
class LeakyReLU(Module): | |||
r""" | |||
Applies the element-wise function: | |||
r"""Applies the element-wise function: | |||
.. math:: | |||
\text{LeakyReLU}(x) = \max(0,x) + negative\_slope \times \min(0,x) | |||
@@ -274,23 +265,22 @@ class LeakyReLU(Module): | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32)) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32)) | |||
leakyrelu = M.LeakyReLU(0.01) | |||
output = leakyrelu(data) | |||
print(output.numpy()) | |||
Outputs: | |||
leakyrelu = M.LeakyReLU(0.01) | |||
output = leakyrelu(data) | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[-0.08 -0.12 6. 10. ] | |||
.. testoutput:: | |||
[-0.08 -0.12 6. 10. ] | |||
""" | |||
def __init__(self, negative_slope: float = 0.01, **kwargs): | |||
@@ -25,8 +25,7 @@ class _AdaptivePoolNd(Module): | |||
class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
r""" | |||
Applies a 2D max adaptive pooling over an input. | |||
r"""Applies a 2D max adaptive pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
an output shape :math:`(OH, OW)`, this layer generates the output of | |||
@@ -40,29 +39,30 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
\end{aligned} | |||
``kernel_size`` and ``stride`` can be inferred from input shape and out shape: | |||
* padding: (0, 0) | |||
* stride: (floor(IH / OH), floor(IW / OW)) | |||
* kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
m = M.AdaptiveMaxPool2d((2, 2)) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
m = M.AdaptiveMaxPool2d((2, 2)) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
.. testoutput:: | |||
[[[[ 5. 7.] | |||
[13. 15.]]]] | |||
[[[[ 5. 7.] | |||
[13. 15.]]]] | |||
""" | |||
@@ -71,8 +71,7 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
r""" | |||
Applies a 2D average pooling over an input. | |||
r"""Applies a 2D average pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
an output shape :math:`(OH, OW)`, this layer generates the output of | |||
@@ -84,29 +83,30 @@ class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) | |||
``kernel_size`` and ``stride`` can be inferred from input shape and out shape: | |||
* padding: (0, 0) | |||
* stride: (floor(IH / OH), floor(IW / OW)) | |||
* kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
m = M.AdaptiveAvgPool2d((2, 2)) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
m = M.AdaptiveAvgPool2d((2, 2)) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
.. testoutput:: | |||
[[[[ 2.5 4.5] | |||
[10.5 12.5]]]] | |||
[[[[ 2.5 4.5] | |||
[10.5 12.5]]]] | |||
""" | |||
@@ -14,9 +14,7 @@ from .module import Module | |||
class BatchMatMulActivation(Module): | |||
r""" | |||
Batched :func:`~.matmul` with activation(only :func:`~.relu` supported), no transpose anywhere. | |||
""" | |||
r"""Batched :func:`~.matmul` with activation(only :func:`~.relu` supported), no transpose anywhere.""" | |||
def __init__( | |||
self, | |||
@@ -141,37 +141,29 @@ class _BatchNorm(Module): | |||
class SyncBatchNorm(_BatchNorm): | |||
r""" | |||
Applies Synchronized Batch Normalization for distributed training. | |||
:type num_features: int | |||
:param num_features: usually :math:`C` from an input of shape | |||
:math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
less than 4D. | |||
:type eps: float | |||
:param eps: a value added to the denominator for numerical stability. | |||
Default: 1e-5 | |||
:type momentum: float | |||
:param momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
Default: 0.9 | |||
:type affine: bool | |||
:param affine: a boolean value that when set to True, this module has | |||
learnable affine parameters. Default: True | |||
:type track_running_stats: bool | |||
:param track_running_stats: when set to True, this module tracks the | |||
running mean and variance. When set to False, this module does not | |||
track such statistics and always uses batch statistics in both training | |||
and eval modes. Default: True | |||
:type freeze: bool | |||
:param freeze: when set to True, this module does not update the | |||
running mean and variance, and uses the running mean and variance instead of | |||
the batch mean and batch variance to normalize the input. The parameter takes effect | |||
only when the module is initilized with track_running_stats as True. | |||
Default: False | |||
:type group: :class:`~megengine.distributed.Group` | |||
:param group: communication group, caculate mean and variance between this group. | |||
Default: :obj:`~megengine.distributed.WORLD` | |||
:return: output tensor. | |||
r"""Applies Synchronized Batch Normalization for distributed training. | |||
Args: | |||
num_features: usually :math:`C` from an input of shape | |||
:math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
less than 4D. | |||
eps: a value added to the denominator for numerical stability. | |||
Default: 1e-5 | |||
momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
Default: 0.9 | |||
affine: a boolean value that when set to True, this module has | |||
learnable affine parameters. Default: True | |||
track_running_stats: when set to True, this module tracks the | |||
running mean and variance. When set to False, this module does not | |||
track such statistics and always uses batch statistics in both training | |||
and eval modes. Default: True | |||
freeze: when set to True, this module does not update the | |||
running mean and variance, and uses the running mean and variance instead of | |||
the batch mean and batch variance to normalize the input. The parameter takes effect | |||
only when the module is initilized with track_running_stats as True. | |||
Default: False | |||
group: communication group, caculate mean and variance between this group. | |||
Default: :obj:`~.distributed.WORLD` | |||
""" | |||
def __init__( | |||
@@ -249,8 +241,7 @@ class SyncBatchNorm(_BatchNorm): | |||
class BatchNorm1d(_BatchNorm): | |||
r""" | |||
Applies Batch Normalization over a 2D/3D tensor. | |||
r"""Applies Batch Normalization over a 2D/3D tensor. | |||
Refer to :class:`~.BatchNorm2d` for more information. | |||
""" | |||
@@ -263,8 +254,7 @@ class BatchNorm1d(_BatchNorm): | |||
class BatchNorm2d(_BatchNorm): | |||
r""" | |||
Applies Batch Normalization over a 4D tensor. | |||
r"""Applies Batch Normalization over a 4D tensor. | |||
.. math:: | |||
@@ -287,56 +277,50 @@ class BatchNorm2d(_BatchNorm): | |||
statistics on `(N, H, W)` slices, it's common terminology to call this | |||
Spatial Batch Normalization. | |||
:type num_features: int | |||
:param num_features: usually :math:`C` from an input of shape | |||
:math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
less than 4D. | |||
:type eps: float | |||
:param eps: a value added to the denominator for numerical stability. | |||
Default: 1e-5 | |||
:type momentum: float | |||
:param momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
Default: 0.9 | |||
:type affine: bool | |||
:param affine: a boolean value that when set to True, this module has | |||
learnable affine parameters. Default: True | |||
:type track_running_stats: bool | |||
:param track_running_stats: when set to True, this module tracks the | |||
running mean and variance. When set to False, this module does not | |||
track such statistics and always uses batch statistics in both training | |||
and eval modes. Default: True | |||
:type freeze: bool | |||
:param freeze: when set to True, this module does not update the | |||
running mean and variance, and uses the running mean and variance instead of | |||
the batch mean and batch variance to normalize the input. The parameter takes effect | |||
only when the module is initilized with track_running_stats as True. | |||
Default: False | |||
Args: | |||
num_features: usually :math:`C` from an input of shape | |||
:math:`(N, C, H, W)` or the highest ranked dimension of an input | |||
less than 4D. | |||
eps: a value added to the denominator for numerical stability. | |||
Default: 1e-5 | |||
momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||
Default: 0.9 | |||
affine: a boolean value that when set to True, this module has | |||
learnable affine parameters. Default: True | |||
track_running_stats: when set to True, this module tracks the | |||
running mean and variance. When set to False, this module does not | |||
track such statistics and always uses batch statistics in both training | |||
and eval modes. Default: True | |||
freeze: when set to True, this module does not update the | |||
running mean and variance, and uses the running mean and variance instead of | |||
the batch mean and batch variance to normalize the input. The parameter takes effect | |||
only when the module is initilized with track_running_stats as True. | |||
Default: False | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
# With Learnable Parameters | |||
m = M.BatchNorm2d(4) | |||
inp = mge.tensor(np.random.rand(1, 4, 3, 3).astype("float32")) | |||
oup = m(inp) | |||
print(m.weight.numpy().flatten(), m.bias.numpy().flatten()) | |||
# Without L`e`arnable Parameters | |||
m = M.BatchNorm2d(4, affine=False) | |||
oup = m(inp) | |||
print(m.weight, m.bias) | |||
# With Learnable Parameters | |||
m = M.BatchNorm2d(4) | |||
inp = mge.tensor(np.random.rand(1, 4, 3, 3).astype("float32")) | |||
oup = m(inp) | |||
print(m.weight.numpy().flatten(), m.bias.numpy().flatten()) | |||
# Without L`e`arnable Parameters | |||
m = M.BatchNorm2d(4, affine=False) | |||
oup = m(inp) | |||
print(m.weight, m.bias) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
.. testoutput:: | |||
[1. 1. 1. 1.] [0. 0. 0. 0.] | |||
None None | |||
[1. 1. 1. 1.] [0. 0. 0. 0.] | |||
None None | |||
""" | |||
def _check_input_ndim(self, inp): | |||
@@ -13,8 +13,7 @@ from .module import Module | |||
class Concat(Module): | |||
r""" | |||
A :class:`~.Module` to do functional :func:`~.concat`. Could be replaced with :class:`~.QATModule` | |||
r"""A :class:`~.Module` to do functional :func:`~.concat`. Could be replaced with :class:`~.QATModule` | |||
version :class:`~.qat.Concat` using :func:`~.quantize.quantize_qat`. | |||
""" | |||
@@ -97,8 +97,7 @@ class _ConvNd(Module): | |||
class Conv1d(_ConvNd): | |||
r""" | |||
Applies a 1D convolution over an input tensor. | |||
r"""Applies a 1D convolution over an input tensor. | |||
For instance, given an input of the size :math:`(N, C_{\text{in}}, H)`, | |||
this layer generates an output of the size | |||
@@ -121,52 +120,49 @@ class Conv1d(_ConvNd): | |||
a depthwise convolution with a depthwise multiplier `K`, can be constructed | |||
by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param kernel_size: size of weight on spatial dimensions. | |||
:param stride: stride of the 1D convolution operation. | |||
:param padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 1D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
:param bias: whether to add a bias onto the result of convolution. Default: | |||
True | |||
:param conv_mode: Supports `cross_correlation`. Default: | |||
`cross_correlation` | |||
:param compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
.. note:: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, kernel_size)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, kernel_size)`` | |||
* ``bias`` usually has shape ``(1, out_channels, 1)`` | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
kernel_size: size of weight on spatial dimensions. | |||
stride: stride of the 1D convolution operation. | |||
padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 1D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
bias: whether to add a bias onto the result of convolution. Default: True | |||
conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
Note: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, kernel_size)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, kernel_size)`` | |||
* ``bias`` usually has shape ``(1, out_channels, 1)`` | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
m = M.Conv1d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 24).astype("float32").reshape(2, 3, 4)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
m = M.Conv1d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 24).astype("float32").reshape(2, 3, 4)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
(2, 1, 2) | |||
.. testoutput:: | |||
(2, 1, 2) | |||
""" | |||
def __init__( | |||
@@ -245,8 +241,7 @@ class Conv1d(_ConvNd): | |||
class Conv2d(_ConvNd): | |||
r""" | |||
Applies a 2D convolution over an input tensor. | |||
r"""Applies a 2D convolution over an input tensor. | |||
For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, | |||
this layer generates an output of the size | |||
@@ -284,54 +279,51 @@ class Conv2d(_ConvNd): | |||
a depthwise convolution with a depthwise multiplier `K`, can be constructed | |||
by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
:param stride: stride of the 2D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 2D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
:param bias: whether to add a bias onto the result of convolution. Default: | |||
True | |||
:param conv_mode: Supports `cross_correlation`. Default: | |||
`cross_correlation` | |||
:param compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
.. note:: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
stride: stride of the 2D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 2D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
bias: whether to add a bias onto the result of convolution. Default: True | |||
conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
Note: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
.. testcode:: | |||
m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
Outputs: | |||
m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
.. testoutput:: | |||
Outputs: | |||
(2, 1, 2, 2) | |||
.. testoutput:: | |||
(2, 1, 2, 2) | |||
""" | |||
def __init__( | |||
@@ -411,8 +403,7 @@ class Conv2d(_ConvNd): | |||
class Conv3d(_ConvNd): | |||
r""" | |||
Applies a 3D convolution over an input tensor. | |||
r"""Applies a 3D convolution over an input tensor. | |||
For instance, given an input of the size :math:`(N, C_{\text{in}}, T, H, W)`, | |||
this layer generates an output of the size | |||
@@ -434,50 +425,47 @@ class Conv3d(_ConvNd): | |||
a depthwise convolution with a depthwise multiplier `K`, can be constructed | |||
by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
`(kernel_size, kernel_size, kernel_size)`. | |||
:param stride: stride of the 3D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 3D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
:param bias: whether to add a bias onto the result of convolution. Default: | |||
True | |||
:param conv_mode: Supports `cross_correlation`. Default: | |||
`cross_correlation` | |||
.. note:: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, depth, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, depth, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
`(kernel_size, kernel_size, kernel_size)`. | |||
stride: stride of the 3D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 3D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
bias: whether to add a bias onto the result of convolution. Default: True | |||
conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
Note: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, depth, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, depth, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
.. testcode:: | |||
m = M.Conv3d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 384).astype("float32").reshape(2, 3, 4, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
Outputs: | |||
m = M.Conv3d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 384).astype("float32").reshape(2, 3, 4, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
.. testoutput:: | |||
Outputs: | |||
(2, 1, 2, 2, 2) | |||
.. testoutput:: | |||
(2, 1, 2, 2, 2) | |||
""" | |||
def __init__( | |||
@@ -551,8 +539,7 @@ class Conv3d(_ConvNd): | |||
class ConvTranspose2d(_ConvNd): | |||
r""" | |||
Applies a 2D transposed convolution over an input tensor. | |||
r"""Applies a 2D transposed convolution over an input tensor. | |||
This module is also known as a deconvolution or a fractionally-strided convolution. | |||
:class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | |||
@@ -562,35 +549,32 @@ class ConvTranspose2d(_ConvNd): | |||
the opposite way, transforming a smaller input to a larger output while preserving the | |||
connectivity pattern. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
:param stride: stride of the 2D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 2D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
:param bias: wether to add a bias onto the result of convolution. Default: | |||
True | |||
:param conv_mode: Supports `cross_correlation`. Default: | |||
`cross_correlation` | |||
:param compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
.. note:: | |||
* ``weight`` usually has shape ``(in_channels, out_channels, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, in_channels // groups, out_channels // groups, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
stride: stride of the 2D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 2D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
bias: wether to add a bias onto the result of convolution. Default: True | |||
conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
Note: | |||
* ``weight`` usually has shape ``(in_channels, out_channels, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, in_channels // groups, out_channels // groups, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
""" | |||
def __init__( | |||
@@ -669,30 +653,28 @@ class ConvTranspose2d(_ConvNd): | |||
class LocalConv2d(Conv2d): | |||
r""" | |||
Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
It is also known as the locally connected layer. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param input_height: the height of the input images. | |||
:param input_width: the width of the input images. | |||
:param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
:param stride: stride of the 2D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``. Default: 1 | |||
.. note:: | |||
* ``weight`` usually has shape ``(out_height, out_width, in_channels, height, width, in_channels)`` , | |||
if groups is not 1, shape will be ``(groups, out_height, out_width, in_channels // groups, height, width, out_channels // groups)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
input_height: the height of the input images. | |||
input_width: the width of the input images. | |||
kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
stride: stride of the 2D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``. Default: 1 | |||
Note: | |||
* ``weight`` usually has shape ``(out_height, out_width, in_channels, height, width, in_channels)`` , | |||
if groups is not 1, shape will be ``(groups, out_height, out_width, in_channels // groups, height, width, out_channels // groups)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
""" | |||
def __init__( | |||
@@ -755,8 +737,7 @@ class LocalConv2d(Conv2d): | |||
class ConvRelu2d(Conv2d): | |||
r""" | |||
A fused :class:`~.Module` including :class:`~.module.Conv2d` and :func:`~.relu`. | |||
r"""A fused :class:`~.Module` including :class:`~.module.Conv2d` and :func:`~.relu`. | |||
Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvRelu2d` using :func:`~.quantize.quantize_qat`. | |||
""" | |||
@@ -765,38 +746,34 @@ class ConvRelu2d(Conv2d): | |||
class DeformableConv2d(_ConvNd): | |||
""" | |||
Deformable Convolution. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
:param stride: stride of the 2D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 2D convolution operation. Default: 1 | |||
:param groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
:param bias: whether to add a bias onto the result of convolution. Default: | |||
True | |||
:param conv_mode: Supports `cross_correlation`. Default: | |||
`cross_correlation` | |||
:param compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
.. note:: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
r"""Deformable Convolution. | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
kernel_size: size of weight on spatial dimensions. If kernel_size is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size)``. | |||
stride: stride of the 2D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on both sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 2D convolution operation. Default: 1 | |||
groups: number of groups into which the input and output channels are divided, | |||
so as to perform a "grouped convolution". When ``groups`` is not 1, | |||
``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||
and there would be an extra dimension at the beginning of the weight's | |||
shape. Default: 1 | |||
bias: whether to add a bias onto the result of convolution. Default: True | |||
conv_mode: Supports `cross_correlation`. Default: `cross_correlation` | |||
compute_mode: When set to "default", no special requirements will be | |||
placed on the precision of intermediate results. When set to "float32", | |||
"float32" would be used for accumulator and intermediate result, but only | |||
effective when input and output are of float16 dtype. | |||
Note: | |||
* ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||
if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
""" | |||
def __init__( | |||
@@ -877,8 +854,7 @@ class DeformableConv2d(_ConvNd): | |||
class ConvTranspose3d(_ConvNd): | |||
r""" | |||
Applies a 3D transposed convolution over an input tensor. | |||
r"""Applies a 3D transposed convolution over an input tensor. | |||
Only support the case that groups = 1 and conv_mode = "cross_correlation". | |||
@@ -889,23 +865,21 @@ class ConvTranspose3d(_ConvNd): | |||
works the opposite way, transforming a smaller input to a larger output while | |||
preserving the connectivity pattern. | |||
:param in_channels: number of input channels. | |||
:param out_channels: number of output channels. | |||
:param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size, kernel_size)``. | |||
:param stride: stride of the 3D convolution operation. Default: 1 | |||
:param padding: size of the paddings added to the input on all sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
:param dilation: dilation of the 3D convolution operation. Default: 1 | |||
:param bias: wether to add a bias onto the result of convolution. Default: | |||
True | |||
.. note:: | |||
* ``weight`` usually has shape ``(in_channels, out_channels, depth, height, width)`` . | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
Args: | |||
in_channels: number of input channels. | |||
out_channels: number of output channels. | |||
kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is | |||
an :class:`int`, the actual kernel size would be | |||
``(kernel_size, kernel_size, kernel_size)``. | |||
stride: stride of the 3D convolution operation. Default: 1 | |||
padding: size of the paddings added to the input on all sides of its | |||
spatial dimensions. Only zero-padding is supported. Default: 0 | |||
dilation: dilation of the 3D convolution operation. Default: 1 | |||
bias: wether to add a bias onto the result of convolution. Default: True | |||
Note: | |||
* ``weight`` usually has shape ``(in_channels, out_channels, depth, height, width)`` . | |||
* ``bias`` usually has shape ``(1, out_channels, *1)`` | |||
""" | |||
def __init__( | |||
@@ -50,8 +50,7 @@ class _ConvBnActivation2d(Module): | |||
class ConvBn2d(_ConvBnActivation2d): | |||
r""" | |||
A fused :class:`~.Module` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d`. | |||
r"""A fused :class:`~.Module` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d`. | |||
Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvBn2d` using | |||
:func:`~.quantize.quantize_qat`. | |||
""" | |||
@@ -61,8 +60,7 @@ class ConvBn2d(_ConvBnActivation2d): | |||
class ConvBnRelu2d(_ConvBnActivation2d): | |||
r""" | |||
A fused :class:`~.Module` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu`. | |||
r"""A fused :class:`~.Module` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu`. | |||
Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvBnRelu2d` using :func:`~.quantize.quantize_qat`. | |||
""" | |||
@@ -11,13 +11,13 @@ from .module import Module | |||
class Dropout(Module): | |||
r""" | |||
Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
Commonly used in large networks to prevent overfitting. | |||
Note that we perform dropout only during training, we also rescale(multiply) the output tensor | |||
by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | |||
:param drop_prob: The probability to drop (set to zero) each single element | |||
Args: | |||
drop_prob: The probability to drop (set to zero) each single element | |||
""" | |||
def __init__(self, drop_prob=0.0, **kwargs): | |||
@@ -11,67 +11,12 @@ from .module import Module | |||
class Elemwise(Module): | |||
r""" | |||
A :class:`~.Module` to do :mod:`~.functional.elemwise` operator. Could be replaced with :class:`~.QATModule` | |||
r"""A :class:`~.Module` to do :mod:`~.functional.elemwise` operator. Could be replaced with :class:`~.QATModule` | |||
version :class:`~.qat.Elemwise` using :func:`~.quantize.quantize_qat`. | |||
:param method: the elemwise method, support the following string. | |||
It will do the normal elemwise operator for float. | |||
* "add": a + b | |||
* "fuse_add_relu": max(x+y, 0) | |||
* "mul": x * y | |||
* "min": min(x, y) | |||
* "max": max(x, y) | |||
* "sub": x - y | |||
* "true_div": x / y | |||
* "fuse_add_sigmoid": sigmoid(x + y) | |||
* "fuse_add_tanh": tanh(x + y) | |||
* "relu": x > 0 ? x : 0 | |||
* "silu": silu(x) | |||
* "gelu": gelu(x) | |||
* "abs": x > 0 ? x : -x | |||
* "sigmoid": sigmoid(x) | |||
* "exp": exp(x) | |||
* "tanh": tanh(x) | |||
* "fuse_mul_add3": x * y + z | |||
* "fast_tanh": x * (27. + x * x) / (27. + 9. * x * x) | |||
* "negate": -x | |||
* "acos": acos(x) | |||
* "asin": asin(x) | |||
* "ceil": ceil(x) | |||
* "cos": cos(x) | |||
* "expm1": expm1(x) | |||
* "floor": floor(x) | |||
* "log": log(x) | |||
* "log1p": log1p(x) | |||
* "sin": sin(x) | |||
* "round": round(x) | |||
* "erf": erf(x) | |||
* "erfinv": erfinv(x) | |||
* "erfc": erfc(x) | |||
* "erfcinv": erfcinv(x) | |||
* "abs_grad": abs_grad | |||
* "floor_div": floor_div | |||
* "mod": mod | |||
* "sigmoid_grad": sigmoid_grad | |||
* "switch_gt0": switch_gt0 | |||
* "tanh_grad": tanh_grad | |||
* "lt": less | |||
* "leq": leq | |||
* "eq": equal | |||
* "pow": pow | |||
* "log_sum_exp": log_sum_exp | |||
* "fast_tanh_grad": fast_tanh_grad | |||
* "atan2": atan2 | |||
* "cond_leq_mov": cond_leq_mov | |||
* "h_swish": h_swish | |||
* "fuse_add_h_swish": h_swish(x+y) | |||
* "h_swish_grad": h_swish_grad | |||
* "and": bool binary: x && y | |||
* "or": bool binary: x || y | |||
* "xor": bool binary: x ^ y | |||
* "not": bool unary: ~x | |||
Args: | |||
method: the elemwise method, support the following string. | |||
It will do the normal elemwise operator for float. | |||
""" | |||
def __init__(self, method, **kwargs): | |||
@@ -17,42 +17,41 @@ from .module import Module | |||
class Embedding(Module): | |||
r""" | |||
A simple lookup table that stores embeddings of a fixed dictionary and size. | |||
r"""A simple lookup table that stores embeddings of a fixed dictionary and size. | |||
This module is often used to store word embeddings and retrieve them using indices. | |||
The input to the module is a list of indices, and the output is the corresponding word embeddings. | |||
The indices should less than num_embeddings. | |||
:param num_embeddings: size of embedding dictionary. | |||
:param embedding_dim: size of each embedding vector. | |||
:param padding_idx: should be set to None, not supportted now. | |||
:param max_norm: should be set to None, not supportted now. | |||
:param norm_type: should be set to None, not supportted now. | |||
:param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). | |||
Args: | |||
num_embeddings: size of embedding dictionary. | |||
embedding_dim: size of each embedding vector. | |||
padding_idx: should be set to None, not supportted now. | |||
max_norm: should be set to None, not supportted now. | |||
norm_type: should be set to None, not supportted now. | |||
initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
.. testcode:: | |||
embedding = M.Embedding(1, 5, initial_weight=weight) | |||
output = embedding(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
Outputs: | |||
embedding = M.Embedding(1, 5, initial_weight=weight) | |||
output = embedding(data) | |||
with np.printoptions(precision=6): | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[1.2 2.3 3.4 4.5 5.6] | |||
[1.2 2.3 3.4 4.5 5.6]]] | |||
.. testoutput:: | |||
[[[1.2 2.3 3.4 4.5 5.6] | |||
[1.2 2.3 3.4 4.5 5.6]]] | |||
""" | |||
def __init__( | |||
@@ -110,36 +109,35 @@ class Embedding(Module): | |||
max_norm: Optional[float] = None, | |||
norm_type: Optional[float] = None, | |||
): | |||
r""" | |||
Creates Embedding instance from given 2-dimensional FloatTensor. | |||
r"""Creates Embedding instance from given 2-dimensional FloatTensor. | |||
:param embeddings: tensor contained weight for the embedding. | |||
:param freeze: if ``True``, the weight does not get updated during the learning process. Default: True. | |||
:param padding_idx: should be set to None, not support Now. | |||
:param max_norm: should be set to None, not support Now. | |||
:param norm_type: should be set to None, not support Now. | |||
Args: | |||
embeddings: tensor contained weight for the embedding. | |||
freeze: if ``True``, the weight does not get updated during the learning process. Default: True. | |||
padding_idx: should be set to None, not support Now. | |||
max_norm: should be set to None, not support Now. | |||
norm_type: should be set to None, not support Now. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
.. testcode:: | |||
embedding = M.Embedding.from_pretrained(weight, freeze=False) | |||
output = embedding(data) | |||
print(output.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6)], dtype=np.float32)) | |||
data = mge.tensor(np.array([(0,0)], dtype=np.int32)) | |||
Outputs: | |||
embedding = M.Embedding.from_pretrained(weight, freeze=False) | |||
output = embedding(data) | |||
print(output.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[1.2 2.3 3.4 4.5 5.6] | |||
[1.2 2.3 3.4 4.5 5.6]]] | |||
.. testoutput:: | |||
[[[1.2 2.3 3.4 4.5 5.6] | |||
[1.2 2.3 3.4 4.5 5.6]]] | |||
""" | |||
embeddings_shape = embeddings.shape | |||
embeddings_dim = len(embeddings_shape) | |||
@@ -19,7 +19,7 @@ from .module import Module | |||
class TensorrtRuntimeSubgraph(Module): | |||
r"""Load a serialized TensorrtRuntime subgraph. | |||
See :func:`~.tensorrt_runtime_opr` for more details. | |||
""" | |||
@@ -41,7 +41,7 @@ class TensorrtRuntimeSubgraph(Module): | |||
class CambriconRuntimeSubgraph(Module): | |||
r"""Load a serialized CambriconRuntime subgraph. | |||
See :func:`~.cambricon_runtime_opr` for more details. | |||
""" | |||
@@ -68,7 +68,7 @@ class CambriconRuntimeSubgraph(Module): | |||
class AtlasRuntimeSubgraph(Module): | |||
r"""Load a serialized AtlasRuntime subgraph. | |||
See :func:`~.atlas_runtime_opr` for more details. | |||
""" | |||
@@ -18,53 +18,53 @@ from ..tensor import Tensor | |||
def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||
""" | |||
Fills the given ``tensor`` with value ``val``. | |||
"""Fills the given ``tensor`` with value ``val``. | |||
:param tensor: tensor to be initialized. | |||
:param val: value to be filled throughout the tensor. | |||
Args: | |||
tensor: tensor to be initialized. | |||
val: value to be filled throughout the tensor. | |||
""" | |||
tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) | |||
def zeros_(tensor: Tensor) -> None: | |||
""" | |||
Fills the given ``tensor`` with scalar value `0`. | |||
"""Fills the given ``tensor`` with scalar value `0`. | |||
:param tensor: tensor to be initialized. | |||
Args: | |||
tensor: tensor to be initialized. | |||
""" | |||
fill_(tensor, 0) | |||
def ones_(tensor: Tensor) -> None: | |||
""" | |||
Fills the given ``tensor`` with the scalar value `1`. | |||
"""Fills the given ``tensor`` with the scalar value `1`. | |||
:param tensor: tensor to be initialized. | |||
Args: | |||
tensor: tensor to be initialized. | |||
""" | |||
fill_(tensor, 1) | |||
def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||
r""" | |||
Fills the given ``tensor`` with random value sampled from uniform distribution | |||
r"""Fills the given ``tensor`` with random value sampled from uniform distribution | |||
:math:`\mathcal{U}(\text{a}, \text{b})`. | |||
:param tensor: tensor to be initialized. | |||
:param a: lower bound of the sampling interval. | |||
:param b: upper bound of the sampling interval. | |||
Args: | |||
tensor: tensor to be initialized. | |||
a: lower bound of the sampling interval. | |||
b: upper bound of the sampling interval. | |||
""" | |||
tensor._reset(uniform(size=tensor.shape, low=a, high=b).astype(tensor.dtype)) | |||
def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
r""" | |||
Fills the given ``tensor`` with random value sampled from normal distribution | |||
r"""Fills the given ``tensor`` with random value sampled from normal distribution | |||
:math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | |||
:param tensor: tensor to be initialized. | |||
:param mean: mean of the normal distribution. | |||
:param std: standard deviation of the normal distribution. | |||
Args: | |||
tensor: tensor to be initialized. | |||
mean: mean of the normal distribution. | |||
std: standard deviation of the normal distribution. | |||
""" | |||
tensor._reset(normal(size=tensor.shape, mean=mean, std=std).astype(tensor.dtype)) | |||
@@ -72,10 +72,9 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
def calculate_gain( | |||
nonlinearity: str, param: Optional[Union[int, float]] = None | |||
) -> float: | |||
r""" | |||
Returns a recommended gain value (see the table below) for the given nonlinearity | |||
r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||
function. | |||
================= ==================================================== | |||
nonlinearity gain | |||
================= ==================================================== | |||
@@ -87,10 +86,10 @@ def calculate_gain( | |||
Leaky Relu :math:`\sqrt{\frac{2}{1 + {\text{negative}_\text{slope}}^2}}` | |||
================= ==================================================== | |||
:param nonlinearity: name of the non-linear function. | |||
:param param: optional parameter for leaky_relu. Only effective when | |||
``nonlinearity`` is "leaky_relu". | |||
Args: | |||
nonlinearity: name of the non-linear function. | |||
param: optional parameter for leaky_relu. Only effective when | |||
``nonlinearity`` is "leaky_relu". | |||
""" | |||
linear_fns = [ | |||
"linear", | |||
@@ -124,11 +123,11 @@ def calculate_gain( | |||
def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
""" | |||
Calculates fan_in / fan_out value for given weight tensor. This function assumes | |||
r"""Calculates fan_in / fan_out value for given weight tensor. This function assumes | |||
input tensor is stored in ``NCHW`` format. | |||
:param tensor: weight tensor in ``NCHW`` format. | |||
Args: | |||
tensor: weight tensor in ``NCHW`` format. | |||
""" | |||
shape = tensor.shape | |||
ndim = len(shape) | |||
@@ -153,14 +152,14 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
""" | |||
Calculates fan_in / fan_out value for given weight tensor, depending on given | |||
r"""Calculates fan_in / fan_out value for given weight tensor, depending on given | |||
``mode``. | |||
See :func:`calculate_fan_in_and_fan_out` for details. | |||
:param tensor: weight tensor in ``NCHW`` format. | |||
:param mode: "fan_in" or "fan_out". | |||
Args: | |||
tensor: weight tensor in ``NCHW`` format. | |||
mode: fan_in" or "fan_out". | |||
""" | |||
mode = mode.lower() | |||
valid_modes = ["fan_in", "fan_out"] | |||
@@ -174,19 +173,20 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
r""" | |||
Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
where | |||
.. math:: | |||
a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} | |||
a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} | |||
Also known as Glorot initialization. Detailed information can be retrieved from | |||
`Understanding the difficulty of training deep feedforward neural networks` - | |||
Glorot, X. & Bengio, Y. (2010). | |||
:param tensor: tensor to be initialized. | |||
:param gain: scaling factor for :math:`a`. | |||
Args: | |||
tensor: tensor to be initialized. | |||
gain: scaling factor for :math:`a`. | |||
""" | |||
fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | |||
std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | |||
@@ -195,19 +195,20 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
r""" | |||
Fills tensor with random values sampled from | |||
r"""Fills tensor with random values sampled from | |||
:math:`\mathcal{N}(0, \text{std}^2)` where | |||
.. math:: | |||
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} | |||
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} | |||
Also known as Glorot initialization. Detailed information can be retrieved from | |||
`Understanding the difficulty of training deep feedforward neural networks` - | |||
Glorot, X. & Bengio, Y. (2010). | |||
:param tensor: tensor to be initialized. | |||
:param gain: scaling factor for :math:`std`. | |||
Args: | |||
tensor: tensor to be initialized. | |||
gain: scaling factor for :math:`std`. | |||
""" | |||
fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | |||
std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | |||
@@ -217,25 +218,26 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
def msra_uniform_( | |||
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
) -> None: | |||
r""" | |||
Fills tensor wilth random values sampled from | |||
r"""Fills tensor wilth random values sampled from | |||
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where | |||
.. math:: | |||
\text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} | |||
\text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} | |||
Detailed information can be retrieved from | |||
`Delving deep into rectifiers: Surpassing human-level performance on ImageNet | |||
classification` | |||
:param tensor: tensor to be initialized. | |||
:param a: optional parameter for calculating gain for leaky_relu. See | |||
:func:`calculate_gain` for details. | |||
:param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for | |||
details. | |||
:param nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
See :func:`calculate_gain` for details. | |||
Args: | |||
tensor: tensor to be initialized. | |||
a: optional parameter for calculating gain for leaky_relu. See | |||
:func:`calculate_gain` for details. | |||
mode: fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for | |||
details. | |||
nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
See :func:`calculate_gain` for details. | |||
""" | |||
fan = calculate_correct_fan(tensor, mode) | |||
gain = calculate_gain(nonlinearity, a) | |||
@@ -247,25 +249,26 @@ def msra_uniform_( | |||
def msra_normal_( | |||
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
) -> None: | |||
r""" | |||
Fills tensor wilth random values sampled from | |||
r"""Fills tensor wilth random values sampled from | |||
:math:`\mathcal{N}(0, \text{std}^2)` where | |||
.. math:: | |||
\text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} | |||
\text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} | |||
Detailed information can be retrieved from | |||
`Delving deep into rectifiers: Surpassing human-level performance on ImageNet | |||
classification` | |||
:param tensor: tensor to be initialized | |||
:param a: optional parameter for calculating gain for leaky_relu. See | |||
:func:`calculate_gain` for details. | |||
:param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for | |||
details. | |||
:param nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
See :func:`calculate_gain` for details. | |||
Args: | |||
tensor: tensor to be initialized | |||
a: optional parameter for calculating gain for leaky_relu. See | |||
:func:`calculate_gain` for details. | |||
mode: fan_in" or "fan_out", used to calculate :math:`gain`, the | |||
scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for | |||
details. | |||
nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||
See :func:`calculate_gain` for details. | |||
""" | |||
fan = calculate_correct_fan(tensor, mode) | |||
gain = calculate_gain(nonlinearity, a) | |||
@@ -14,8 +14,7 @@ from .module import Module | |||
class Linear(Module): | |||
r""" | |||
Applies a linear transformation to the input. For instance, if input | |||
r"""Applies a linear transformation to the input. For instance, if input | |||
is x, then output y is: | |||
.. math:: | |||
@@ -24,30 +23,31 @@ class Linear(Module): | |||
where :math:`y_i= \sum_j W_{ij} x_j + b_i` | |||
:param in_features: size of each input sample. | |||
:param out_features: size of each output sample. | |||
:param bias: if it's ``False``, the layer will not learn an additional ``bias``. | |||
Default: ``True`` | |||
Args: | |||
in_features: size of each input sample. | |||
out_features: size of each output sample. | |||
bias: if it's ``False``, the layer will not learn an additional ``bias``. | |||
Default: ``True`` | |||
Examples: | |||
.. testcode:: | |||
Examples: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
.. testcode:: | |||
m = M.Linear(in_features=3, out_features=1) | |||
inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
Outputs: | |||
m = M.Linear(in_features=3, out_features=1) | |||
inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | |||
oup = m(inp) | |||
print(oup.numpy().shape) | |||
.. testoutput:: | |||
Outputs: | |||
(2, 1) | |||
.. testoutput:: | |||
(2, 1) | |||
""" | |||
def __init__( | |||
@@ -84,15 +84,14 @@ def _get_XNorm_typeclass(): | |||
class Module(metaclass=ABCMeta): | |||
""" | |||
Base Module class. | |||
r"""Base Module class. | |||
Args: | |||
name: module's name, can be initialized by the ``kwargs`` parameter | |||
of child class. | |||
""" | |||
def __init__(self, name=None): | |||
""" | |||
:param name: module's name, can be initialized by the ``kwargs`` parameter | |||
of child class. | |||
""" | |||
self._modules = [] | |||
if name is not None: | |||
@@ -118,18 +117,19 @@ class Module(metaclass=ABCMeta): | |||
pass | |||
def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | |||
""" | |||
Registers a hook to handle forward inputs. `hook` should be a function. | |||
"""Registers a hook to handle forward inputs. `hook` should be a function. | |||
:param hook: a function that receive `module` and `inputs`, then return | |||
a modified `inputs` or `None`. | |||
:return: a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
Args: | |||
hook: a function that receive `module` and `inputs`, then return | |||
a modified `inputs` or `None`. | |||
Returns: | |||
a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
""" | |||
return HookHandler(self._forward_pre_hooks, hook) | |||
def register_forward_hook(self, hook: Callable) -> HookHandler: | |||
""" | |||
Registers a hook to handle forward results. `hook` should be a function that | |||
"""Registers a hook to handle forward results. `hook` should be a function that | |||
receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | |||
This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
@@ -164,19 +164,19 @@ class Module(metaclass=ABCMeta): | |||
predicate: Callable[[Any], bool] = lambda _: True, | |||
seen: Optional[Set[int]] = None | |||
) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: | |||
""" | |||
Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
"""Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
and :class:`~.Module` attributes that agree with the ``predicate``. For multiple | |||
calls of this function with same arguments, the order of objects within the | |||
returned iterable is guaranteed to be identical, as long as all the involved | |||
module objects' ``__dict__`` does not change thoughout those calls. | |||
:param recursive: whether to recursively scan all the submodules. | |||
:param with_key: whether to yield keys along with yielded objects. | |||
:param with_parent: whether to yield ``self`` along with yielded objects. | |||
:param prefix: prefix appended to the yielded keys. | |||
:param predicate: the predication function applied to scanned objects. | |||
:param seen: a dict that records whether a module has been traversed yet. | |||
Args: | |||
recursive: whether to recursively scan all the submodules. | |||
with_key: whether to yield keys along with yielded objects. | |||
with_parent: whether to yield ``self`` along with yielded objects. | |||
prefix: prefix appended to the yielded keys. | |||
predicate: the predication function applied to scanned objects. | |||
seen: a dict that records whether a module has been traversed yet. | |||
""" | |||
if seen is None: | |||
seen = set([id(self)]) | |||
@@ -212,12 +212,12 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: | |||
r""" | |||
Returns an iterable for the :class:`~.Parameter` of the module. | |||
r"""Returns an iterable for the :class:`~.Parameter` of the module. | |||
:param recursive: If ``True``, returns all :class:`~.Parameter` within this | |||
module, else only returns :class:`~.Parameter` that are direct attributes | |||
of this module. | |||
Args: | |||
recursive: If ``True``, returns all :class:`~.Parameter` within this | |||
module, else only returns :class:`~.Parameter` that are direct attributes | |||
of this module. | |||
""" | |||
if "requires_grad" in kwargs: | |||
@@ -237,14 +237,14 @@ class Module(metaclass=ABCMeta): | |||
def named_parameters( | |||
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
) -> Iterable[Tuple[str, Parameter]]: | |||
""" | |||
Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
r"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
``key`` is the dotted path from this module to the :class:`~.Parameter`. | |||
:param prefix: prefix prepended to the keys. | |||
:param recursive: if ``True``, returns all :class:`~.Parameter` within this | |||
module, else only returns :class:`~.Parameter` that are direct attributes | |||
of this module. | |||
Args: | |||
prefix: prefix prepended to the keys. | |||
recursive: if ``True``, returns all :class:`~.Parameter` within this | |||
module, else only returns :class:`~.Parameter` that are direct attributes | |||
of this module. | |||
""" | |||
if "requires_grad" in kwargs: | |||
@@ -266,14 +266,13 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: | |||
""" | |||
Returns an iterable for the buffers of the module. | |||
r"""Returns an iterable for the buffers of the module. | |||
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
:param recursive: if ``True``, returns all buffers within this | |||
module, else only returns buffers that are direct attributes | |||
of this module. | |||
Args: | |||
recursive: if ``True``, returns all buffers within this | |||
module, else only returns buffers that are direct attributes | |||
""" | |||
yield from self._flatten( | |||
with_key=False, predicate=_is_buffer, recursive=recursive, **kwargs | |||
@@ -282,16 +281,17 @@ class Module(metaclass=ABCMeta): | |||
def named_buffers( | |||
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
) -> Iterable[Tuple[str, Tensor]]: | |||
""" | |||
Returns an iterable for key buffer pairs of the module, where | |||
r"""Returns an iterable for key buffer pairs of the module, where | |||
``key`` is the dotted path from this module to the buffer. | |||
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
:param prefix: prefix prepended to the keys. | |||
:param recursive: if ``True``, returns all buffers within this | |||
module, else only returns buffers that are direct attributes | |||
of this module. | |||
Args: | |||
prefix: prefix prepended to the keys. | |||
recursive: if ``True``, returns all buffers within this | |||
module, else only returns buffers that are direct attributes | |||
of this module. | |||
prefix: Optional[str]: | |||
""" | |||
yield from self._flatten( | |||
with_key=True, | |||
@@ -302,8 +302,7 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def children(self, **kwargs) -> "Iterable[Module]": | |||
""" | |||
Returns an iterable for all the submodules that are direct attributes of this | |||
r"""Returns an iterable for all the submodules that are direct attributes of this | |||
module. | |||
""" | |||
yield from self._flatten( | |||
@@ -311,8 +310,7 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": | |||
""" | |||
Returns an iterable of key-submodule pairs for all the submodules that are | |||
r"""Returns an iterable of key-submodule pairs for all the submodules that are | |||
direct attributes of this module, where 'key' is the attribute name of | |||
submodules. | |||
""" | |||
@@ -321,9 +319,7 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def modules(self, **kwargs) -> "Iterable[Module]": | |||
""" | |||
Returns an iterable for all the modules within this module, including itself. | |||
""" | |||
r"""Returns an iterable for all the modules within this module, including itself.""" | |||
if "with_parent" in kwargs and kwargs["with_parent"]: | |||
yield self, None | |||
else: | |||
@@ -333,12 +329,12 @@ class Module(metaclass=ABCMeta): | |||
def named_modules( | |||
self, prefix: Optional[str] = None, **kwargs | |||
) -> "Iterable[Tuple[str, Module]]": | |||
""" | |||
Returns an iterable of key-module pairs for all the modules within this | |||
r"""Returns an iterable of key-module pairs for all the modules within this | |||
module, including itself, where 'key' is the dotted path from this module to the | |||
submodules. | |||
:param prefix: prefix prepended to the path. | |||
Args: | |||
prefix: prefix prepended to the path. | |||
""" | |||
if "with_parent" in kwargs and kwargs["with_parent"]: | |||
yield ("" if prefix is None else prefix), self, None | |||
@@ -349,33 +345,31 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def apply(self, fn: "Callable[[Module], Any]") -> None: | |||
""" | |||
Applies function ``fn`` to all the modules within this module, including | |||
r"""Applies function ``fn`` to all the modules within this module, including | |||
itself. | |||
:param fn: the function to be applied on modules. | |||
Args: | |||
fn: the function to be applied on modules. | |||
""" | |||
for it in self.modules(): | |||
fn(it) | |||
@deprecated(version="1.0") | |||
def zero_grad(self) -> None: | |||
""" | |||
Sets all parameters' grads to zero | |||
""" | |||
r"""Sets all parameters' grads to zero""" | |||
for param in self.parameters(): | |||
if param.grad is not None: | |||
param.grad.reset_zero() | |||
def train(self, mode: bool = True, recursive: bool = True) -> None: | |||
""" | |||
Sets training mode of all the modules within this module (including itself) to | |||
r"""Sets training mode of all the modules within this module (including itself) to | |||
``mode``. This effectively sets the ``training`` attributes of those modules | |||
to ``mode``, but only has effect on certain modules (e.g. | |||
:class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | |||
:param mode: the training mode to be set on modules. | |||
:param recursive: whether to recursively call submodules' ``train()``. | |||
Args: | |||
mode: the training mode to be set on modules. | |||
recursive: whether to recursively call submodules' ``train()``. | |||
""" | |||
if not recursive: | |||
self.training = mode | |||
@@ -387,15 +381,13 @@ class Module(metaclass=ABCMeta): | |||
self.apply(fn) | |||
def eval(self) -> None: | |||
""" | |||
Sets training mode of all the modules within this module (including itself) to | |||
r"""Sets training mode of all the modules within this module (including itself) to | |||
``False``. See :meth:`~.Module.train` for details. | |||
""" | |||
self.train(False) | |||
def disable_quantize(self, value=True): | |||
r""" | |||
Sets ``module``'s ``quantize_disabled`` attribute and return ``module``. | |||
r"""Sets ``module``'s ``quantize_disabled`` attribute and return ``module``. | |||
Could be used as a decorator. | |||
""" | |||
@@ -408,8 +400,7 @@ class Module(metaclass=ABCMeta): | |||
def replace_param( | |||
self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | |||
): | |||
""" | |||
Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
r"""Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
speedup multimachine training. | |||
""" | |||
offset = 0 | |||
@@ -447,9 +438,7 @@ class Module(metaclass=ABCMeta): | |||
return rst | |||
def _state_dict(self, rst=None, prefix="", keep_var=False): | |||
r""" | |||
Returns a dictionary containing whole states of the module. | |||
""" | |||
r"""Returns a dictionary containing whole states of the module.""" | |||
def is_state(obj): | |||
return _is_parameter(obj) or _is_buffer(obj) | |||
@@ -479,8 +468,7 @@ class Module(metaclass=ABCMeta): | |||
state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | |||
strict=True, | |||
): | |||
r""" | |||
Loads a given dictionary created by :func:`state_dict` into this module. | |||
r"""Loads a given dictionary created by :func:`state_dict` into this module. | |||
If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | |||
returned by :func:`state_dict`. | |||
@@ -515,8 +503,7 @@ class Module(metaclass=ABCMeta): | |||
if 'bias' in k: | |||
M.init.zero_(v) | |||
if 'conv' in k: | |||
return v.numpy() * (np.abs(v.numpy()) > 1e-3).astype("float32) | |||
model.load_state_dict(reinit_and_pruning, strict=False) | |||
""" | |||
unused = [] | |||
if isinstance(state_dict, dict): | |||
@@ -558,8 +545,7 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def _load_state_dict_with_closure(self, closure): | |||
""" | |||
Advance state_dict load through callable ``closure`` whose signature is | |||
r"""Advance state_dict load through callable ``closure`` whose signature is | |||
``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` | |||
""" | |||
XNorm_typeclass = _get_XNorm_typeclass() | |||
@@ -642,9 +628,7 @@ class Module(metaclass=ABCMeta): | |||
super().__delattr__(name) | |||
def _module_info_string(self) -> str: | |||
r""" | |||
Set the extra representation of the module. | |||
""" | |||
r"""Set the extra representation of the module.""" | |||
return "" | |||
def __repr__(self): | |||
@@ -15,8 +15,7 @@ from .module import Module | |||
class GroupNorm(Module): | |||
""" | |||
Simple implementation of GroupNorm. Only support 4d tensor now. | |||
"""Simple implementation of GroupNorm. Only support 4d tensor now. | |||
Reference: https://arxiv.org/pdf/1803.08494.pdf. | |||
""" | |||
@@ -64,8 +63,7 @@ class GroupNorm(Module): | |||
class InstanceNorm(Module): | |||
""" | |||
Simple implementation of InstanceNorm. Only support 4d tensor now. | |||
"""Simple implementation of InstanceNorm. Only support 4d tensor now. | |||
Reference: https://arxiv.org/abs/1607.08022. | |||
Note that InstanceNorm equals using GroupNome with num_groups=num_channels. | |||
""" | |||
@@ -108,8 +106,7 @@ class InstanceNorm(Module): | |||
class LayerNorm(Module): | |||
""" | |||
Simple implementation of LayerNorm. Support tensor of any shape as input. | |||
"""Simple implementation of LayerNorm. Support tensor of any shape as input. | |||
Reference: https://arxiv.org/pdf/1803.08494.pdf. | |||
""" | |||
@@ -37,14 +37,14 @@ class _PoolNd(Module): | |||
class MaxPool2d(_PoolNd): | |||
r""" | |||
Applies a 2D max pooling over an input. | |||
r"""Applies a 2D max pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
:attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
the size :math:`(N, C, H_{out}, W_{out})` through a process described as: | |||
.. math:: | |||
\begin{aligned} | |||
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} | |||
\text{input}(N_i, C_j, \text{stride[0]} \times h + m, | |||
@@ -54,30 +54,30 @@ class MaxPool2d(_PoolNd): | |||
If :attr:`padding` is non-zero, then the input is implicitly zero-padded on | |||
both sides for :attr:`padding` number of points. | |||
:param kernel_size: the size of the window to take a max over. | |||
:param stride: the stride of the window. Default value is kernel_size. | |||
:param padding: implicit zero padding to be added on both sides. | |||
Args: | |||
kernel_size: the size of the window to take a max over. | |||
stride: the stride of the window. Default value is kernel_size. | |||
padding: implicit zero padding to be added on both sides. | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
.. testcode:: | |||
m = M.MaxPool2d(kernel_size=3, stride=1, padding=0) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
Outputs: | |||
m = M.MaxPool2d(kernel_size=3, stride=1, padding=0) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[[10. 11.] | |||
[14. 15.]]]] | |||
.. testoutput:: | |||
[[[[10. 11.] | |||
[14. 15.]]]] | |||
""" | |||
def forward(self, inp): | |||
@@ -85,8 +85,7 @@ class MaxPool2d(_PoolNd): | |||
class AvgPool2d(_PoolNd): | |||
r""" | |||
Applies a 2D average pooling over an input. | |||
r"""Applies a 2D average pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
:attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
@@ -100,33 +99,13 @@ class AvgPool2d(_PoolNd): | |||
If :attr:`padding` is non-zero, then the input is implicitly zero-padded on | |||
both sides for :attr:`padding` number of points. | |||
:param kernel_size: the size of the window. | |||
:param stride: the stride of the window. Default value is kernel_size。 | |||
:param padding: implicit zero padding to be added on both sides. | |||
:param mode: whether to count padding values. "average" mode will do counting and | |||
"average_count_exclude_padding" mode won't do counting. | |||
Default: "average_count_exclude_padding" | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
m = M.AvgPool2d(kernel_size=3, stride=1, padding=0) | |||
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||
oup = m(inp) | |||
print(oup.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
[[[[ 5. 6.] | |||
[ 9. 10.]]]] | |||
Args: | |||
kernel_size: the size of the window. | |||
stride: the stride of the window. Default value is kernel_size。 | |||
padding: implicit zero padding to be added on both sides. | |||
mode: whether to count padding values. "average" mode will do counting and | |||
"average_count_exclude_padding" mode won't do counting. | |||
Default: "average_count_exclude_padding" | |||
""" | |||
def __init__( | |||
@@ -10,9 +10,7 @@ from .module import QATModule | |||
class BatchMatMulActivation(Float.BatchMatMulActivation, QATModule): | |||
r""" | |||
A :class:`~.QATModule` :class:`~.module.BatchMatMulActivation` with QAT support. | |||
""" | |||
r"""A :class:`~.QATModule` :class:`~.module.BatchMatMulActivation` with QAT support.""" | |||
def forward(self, inp): | |||
w_qat = self.apply_quant_weight(self.weight) | |||
@@ -13,8 +13,7 @@ from .module import QATModule | |||
class Concat(Float.Concat, QATModule): | |||
r""" | |||
A :class:`~.QATModule` to do functional :func:`~.concat` with QAT support. | |||
r"""A :class:`~.QATModule` to do functional :func:`~.concat` with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
""" | |||
@@ -23,8 +22,4 @@ class Concat(Float.Concat, QATModule): | |||
@classmethod | |||
def from_float_module(cls, float_module): | |||
r""" | |||
Return a :class:`~.QATModule` instance converted from | |||
a float :class:`~.Module` instance. | |||
""" | |||
return cls(name=float_module.name) |
@@ -11,8 +11,7 @@ from .module import QATModule | |||
class Conv2d(Float.Conv2d, QATModule): | |||
r""" | |||
A :class:`~.QATModule` :class:`~.module.Conv2d` with QAT support. | |||
r"""A :class:`~.QATModule` :class:`~.module.Conv2d` with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
""" | |||
@@ -50,8 +49,7 @@ class Conv2d(Float.Conv2d, QATModule): | |||
class ConvRelu2d(Conv2d): | |||
r""" | |||
A :class:`~.QATModule` include :class:`~.module.Conv2d` and :func:`~.relu` with QAT support. | |||
r"""A :class:`~.QATModule` include :class:`~.module.Conv2d` and :func:`~.relu` with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
""" | |||
@@ -60,8 +58,7 @@ class ConvRelu2d(Conv2d): | |||
class ConvTranspose2d(Float.ConvTranspose2d, QATModule): | |||
r""" | |||
A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. | |||
r"""A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
""" | |||
@@ -136,10 +136,6 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule): | |||
@classmethod | |||
def from_float_module(cls, float_module: Float._ConvBnActivation2d): | |||
r""" | |||
Return a :class:`~.QATModule` instance converted from | |||
a float :class:`~.Module` instance. | |||
""" | |||
qat_module = cls( | |||
float_module.conv.in_channels, | |||
float_module.conv.out_channels, | |||
@@ -160,8 +156,7 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule): | |||
class ConvBn2d(_ConvBnActivation2d): | |||
r""" | |||
A fused :class:`~.QATModule` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d` with QAT support. | |||
r"""A fused :class:`~.QATModule` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d` with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
""" | |||
@@ -170,8 +165,7 @@ class ConvBn2d(_ConvBnActivation2d): | |||
class ConvBnRelu2d(_ConvBnActivation2d): | |||
r""" | |||
A fused :class:`~.QATModule` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu` with QAT support. | |||
r"""A fused :class:`~.QATModule` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu` with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
""" | |||
@@ -10,11 +10,8 @@ from .module import QATModule | |||
class Elemwise(Float.Elemwise, QATModule): | |||
r""" | |||
A :class:`~.QATModule` to do :mod:`~.functional.elemwise` operator with QAT support. | |||
r"""A :class:`~.QATModule` to do :mod:`~.functional.elemwise` operator with QAT support. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
:param method: the elemwise method, see :class:`~.module.Elemwise` for detail. | |||
""" | |||
with_weight = False | |||
@@ -10,15 +10,14 @@ from .module import QATModule | |||
class Linear(Float.Linear, QATModule): | |||
r""" | |||
A :class:`~.QATModule` version of :class:`~.module.Linear`. | |||
r"""A :class:`~.QATModule` version of :class:`~.module.Linear`. | |||
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||
:param in_features: size of each input sample. | |||
:param out_features: size of each output sample. | |||
:param bias: If set to ``False``, the layer will not learn an additive bias. | |||
Default: True | |||
Args: | |||
in_features: size of each input sample. | |||
out_features: size of each output sample. | |||
bias: If set to ``False``, the layer will not learn an additive bias. | |||
Default: True | |||
""" | |||
def forward(self, inp): | |||
@@ -17,12 +17,11 @@ from ..module import Module | |||
class QATModule(Module): | |||
r""" | |||
Base class of quantized-float related :class:`~.Module`, basically for QAT and Calibration. | |||
r"""Base class of quantized-float related :class:`~.Module`, basically for QAT and Calibration. | |||
Use :meth:`from_float_module` to generate a instance from float :class:`~.Module`. | |||
Or use :func:`~.quantize.quantize_qat` to do it recursively and automatically. | |||
Can also be converted to :class:`~.QuantizedModule` for deployment using | |||
:func:`~.quantize.quantize` further. | |||
""" | |||
@@ -43,8 +42,7 @@ class QATModule(Module): | |||
return "QAT." + super().__repr__() | |||
def set_qconfig(self, qconfig: QConfig): | |||
r""" | |||
Set quantization related configs with ``qconfig``, including | |||
r"""Set quantization related configs with ``qconfig``, including | |||
observer and fake_quant for weight and activation. | |||
""" | |||
@@ -96,24 +94,19 @@ class QATModule(Module): | |||
return oup | |||
def apply_quant_weight(self, target: Tensor): | |||
r""" | |||
Apply weight's observer and fake_quant from ``qconfig`` on ``target``. | |||
""" | |||
r"""Apply weight's observer and fake_quant from ``qconfig`` on ``target``.""" | |||
return self._apply_fakequant_with_observer( | |||
target, self.weight_fake_quant, self.weight_observer | |||
) | |||
def apply_quant_activation(self, target: Tensor): | |||
r""" | |||
Apply weight's observer and fake_quant from ``qconfig`` on ``target``. | |||
""" | |||
r"""Apply weight's observer and fake_quant from ``qconfig`` on ``target``.""" | |||
return self._apply_fakequant_with_observer( | |||
target, self.act_fake_quant, self.act_observer | |||
) | |||
def apply_quant_bias(self, target: Tensor, inp: Tensor, w_qat: Tensor): | |||
r""" | |||
Use :func:`~.fake_quant_bias` to process ``target``. Only valid when | |||
r"""Use :func:`~.fake_quant_bias` to process ``target``. Only valid when | |||
``act_fake_quant`` and ``weight_fake_quant`` are both enabled. | |||
""" | |||
# bias should have the same dtype as activation, so act_fake_quant can also | |||
@@ -139,33 +132,25 @@ class QATModule(Module): | |||
return None | |||
def get_weight_dtype(self): | |||
r""" | |||
Get weight's quantization dtype as the method from ``qconfig``. | |||
""" | |||
r"""Get weight's quantization dtype as the method from ``qconfig``.""" | |||
return self._get_method_result( | |||
"get_quantized_dtype", self.weight_fake_quant, self.weight_observer | |||
) | |||
def get_activation_dtype(self): | |||
r""" | |||
Get activation's quantization dtype as the method from ``qconfig``. | |||
""" | |||
r"""Get activation's quantization dtype as the method from ``qconfig``.""" | |||
return self._get_method_result( | |||
"get_quantized_dtype", self.act_fake_quant, self.act_observer | |||
) | |||
def get_weight_qparams(self): | |||
r""" | |||
Get weight's quantization parameters. | |||
""" | |||
r"""Get weight's quantization parameters.""" | |||
return self._get_method_result( | |||
"get_qparams", self.weight_fake_quant, self.weight_observer | |||
) | |||
def get_activation_qparams(self): | |||
r""" | |||
Get activation's quantization parameters. | |||
""" | |||
r"""Get activation's quantization parameters.""" | |||
return self._get_method_result( | |||
"get_qparams", self.act_fake_quant, self.act_observer | |||
) | |||
@@ -173,7 +158,6 @@ class QATModule(Module): | |||
@classmethod | |||
@abstractmethod | |||
def from_float_module(cls, float_module: Module): | |||
r""" | |||
Return a :class:`~.QATModule` instance converted from | |||
r"""Return a :class:`~.QATModule` instance converted from | |||
a float :class:`~.Module` instance. | |||
""" |
@@ -10,8 +10,7 @@ from .module import QATModule | |||
class QuantStub(Float.QuantStub, QATModule): | |||
r""" | |||
A helper :class:`~.QATModule` simply return input, but will quantize | |||
r"""A helper :class:`~.QATModule` simply return input, but will quantize | |||
input after converted to :class:`~.QuantizedModule`. | |||
""" | |||
@@ -30,8 +29,7 @@ class QuantStub(Float.QuantStub, QATModule): | |||
class DequantStub(Float.DequantStub, QATModule): | |||
r""" | |||
A helper :class:`~.QATModule` simply return input, but will de-quantize | |||
r"""A helper :class:`~.QATModule` simply return input, but will de-quantize | |||
input after converted to :class:`~.QuantizedModule`. | |||
""" | |||
@@ -9,8 +9,7 @@ from .module import Module | |||
class QuantStub(Module): | |||
r""" | |||
A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
r"""A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
version :class:`~.qat.QuantStub` using :func:`~.quantize.quantize_qat`. | |||
""" | |||
@@ -19,8 +18,7 @@ class QuantStub(Module): | |||
class DequantStub(Module): | |||
r""" | |||
A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
r"""A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` | |||
version :class:`~.qat.DequantStub` using :func:`~.quantize.quantize_qat`. | |||
""" | |||
@@ -14,9 +14,7 @@ from .module import QuantizedModule | |||
class Concat(QuantizedModule): | |||
r""" | |||
A :class:`~.QuantizedModule` to do quantized :func:`~.concat`, used for inference only. | |||
""" | |||
r"""A :class:`~.QuantizedModule` to do quantized :func:`~.concat`, used for inference only.""" | |||
def __init__(self, dtype=None, **kwargs): | |||
super().__init__(**kwargs) | |||
@@ -75,7 +75,7 @@ class Conv2d(Float.Conv2d, QuantizedModule): | |||
@classmethod | |||
def from_qat_module(cls, qat_module: QAT.Conv2d): | |||
r""" | |||
return a :class:`~.QuantizedModule` instance converted from a | |||
Return a :class:`~.QuantizedModule` instance converted from a | |||
:class:`~.QATModule` instance. | |||
""" | |||
output_dtype = qat_module.get_activation_dtype() | |||
@@ -119,7 +119,8 @@ class ConvTranspose2d(Float.ConvTranspose2d, QuantizedModule): | |||
The parameter is same with :class:`~.module.ConvTranspose2d` but dtype. | |||
:param dtype: data type of the output, should be qint8. | |||
Args: | |||
dtype: data type of the output, should be qint8. | |||
""" | |||
def __init__( | |||
@@ -11,10 +11,7 @@ from .conv import Conv2d | |||
class _ConvBnActivation2d(Conv2d): | |||
r""" | |||
Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
The parameter is same with :class: `~.module.Conv2d`. | |||
r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
""" | |||
@classmethod | |||
@@ -12,8 +12,7 @@ from ..qat import QATModule | |||
class QuantizedModule(Module): | |||
r""" | |||
Base class of quantized :class:`~.Module`, | |||
r"""Base class of quantized :class:`~.Module`, | |||
which should be converted from :class:`~.QATModule` and not support traning. | |||
""" | |||
@@ -29,6 +28,6 @@ class QuantizedModule(Module): | |||
@abstractmethod | |||
def from_qat_module(cls, qat_module: QATModule): | |||
r""" | |||
Return a :class:`~.QuantizedModule` instance converted from a | |||
:class:`~.QATModule` instance. | |||
Return a :class:`~.QATModule` instance converted from | |||
a float :class:`~.Module` instance. | |||
""" |
@@ -10,8 +10,7 @@ from .module import QuantizedModule | |||
class QuantStub(QuantizedModule): | |||
r""" | |||
Quantized version of :class:`~.qat.QuantStub`, | |||
r"""Quantized version of :class:`~.qat.QuantStub`, | |||
will convert input to quantized dtype. | |||
""" | |||
@@ -24,16 +23,11 @@ class QuantStub(QuantizedModule): | |||
@classmethod | |||
def from_qat_module(cls, qat_module: QAT.QuantStub): | |||
r""" | |||
Return a :class:`~.QuantizedModule` instance converted from a | |||
:class:`~.QATModule` instance. | |||
""" | |||
return cls(qat_module.get_activation_dtype(), name=qat_module.name) | |||
class DequantStub(QuantizedModule): | |||
r""" | |||
Quantized version of :class:`~.qat.DequantStub`, | |||
r"""Quantized version of :class:`~.qat.DequantStub`, | |||
will restore quantized input to float32 dtype. | |||
""" | |||
@@ -42,8 +36,4 @@ class DequantStub(QuantizedModule): | |||
@classmethod | |||
def from_qat_module(cls, qat_module: QAT.DequantStub): | |||
r""" | |||
Return a :class:`~.QuantizedModule` instance converted from a | |||
:class:`~.QATModule` instance. | |||
""" | |||
return cls(name=qat_module.name) |
@@ -12,38 +12,35 @@ from .module import Module | |||
class Sequential(Module): | |||
r""" | |||
A sequential container. | |||
r"""A sequential container. | |||
Modules will be added to it in the order they are passed in the constructor. | |||
Alternatively, an ordered dict of modules can also be passed in. | |||
To make it easier to understand, here is a small example: | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import megengine.functional as F | |||
from collections import OrderedDict | |||
batch_size = 64 | |||
data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||
label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | |||
net0 = M.Sequential( | |||
M.Linear(28 * 28, 320), | |||
M.Linear(320, 10) | |||
) | |||
pred0 = net0(data) | |||
modules = OrderedDict() | |||
modules["fc0"] = M.Linear(28 * 28, 320) | |||
modules["fc1"] = M.Linear(320, 10) | |||
net1 = M.Sequential(modules) | |||
pred1 = net1(data) | |||
.. testcode:: | |||
import numpy as np | |||
import megengine as mge | |||
import megengine.module as M | |||
import megengine.functional as F | |||
from collections import OrderedDict | |||
batch_size = 64 | |||
data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||
label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | |||
net0 = M.Sequential( | |||
M.Linear(28 * 28, 320), | |||
M.Linear(320, 10) | |||
) | |||
pred0 = net0(data) | |||
modules = OrderedDict() | |||
modules["fc0"] = M.Linear(28 * 28, 320) | |||
modules["fc1"] = M.Linear(320, 10) | |||
net1 = M.Sequential(modules) | |||
pred1 = net1(data) | |||
""" | |||
def __init__(self, *args, **kwargs): | |||
@@ -13,8 +13,7 @@ from .module import Module | |||
class SlidingWindow(Module): | |||
r""" | |||
Apply a sliding window to input tensor and copy content in the window to | |||
r"""Apply a sliding window to input tensor and copy content in the window to | |||
corresponding output location. Assume input shape is :math:`(N, C, IH, IW)`, | |||
then output shape would be :math:`(N, C, OH, OW, window_h, window_w)` where | |||
:math:`(OH, OW)` would be computed from padding, stride, window and | |||
@@ -26,46 +25,45 @@ class SlidingWindow(Module): | |||
\text{where } & ih=-pad_h+oh \times stride_h + (wh-1) \times (dilation_h-1) \\ | |||
& iw=-pad_w+ow \times stride_w + (ww-1) \times (dilation_w-1) | |||
:param kernel_size: the size of the window to take a max over. | |||
:param padding: implicit zero padding to be added on both sides. Default: 0 | |||
:param stride: the stride of the window. Default: 1 | |||
:param dilation: the dilation of the window. Default: 1 | |||
Args: | |||
kernel_size: the size of the window to take a max over. | |||
padding: implicit zero padding to be added on both sides. Default: 0 | |||
stride: the stride of the window. Default: 1 | |||
dilation: the dilation of the window. Default: 1 | |||
Example: | |||
.. testcode:: | |||
from megengine import tensor | |||
import megengine.module as M | |||
import numpy as np | |||
.. testcode:: | |||
inp = tensor(np.arange(30).reshape(1,1,5,6)) | |||
op = M.SlidingWindow(kernel_size=3, padding=1, stride=2, dilation=2) | |||
out = op(inp) | |||
print(out.numpy()) | |||
from megengine import tensor | |||
import megengine.module as M | |||
import numpy as np | |||
Outputs: | |||
inp = tensor(np.arange(30).reshape(1,1,5,6)) | |||
op = M.SlidingWindow(kernel_size=3, padding=1, stride=2, dilation=2) | |||
out = op(inp) | |||
print(out.numpy()) | |||
.. testoutput:: | |||
Outputs: | |||
[[[[[[ 0 0 0] | |||
[ 0 7 9] | |||
[ 0 19 21]] | |||
.. testoutput:: | |||
[[ 0 0 0] | |||
[ 7 9 11] | |||
[19 21 23]]] | |||
[[[[[[ 0 0 0] | |||
[ 0 7 9] | |||
[ 0 19 21]] | |||
[[ 0 0 0] | |||
[ 7 9 11] | |||
[19 21 23]]] | |||
[[[ 0 7 9] | |||
[ 0 19 21] | |||
[ 0 0 0]] | |||
[[ 7 9 11] | |||
[19 21 23] | |||
[ 0 0 0]]]]]] | |||
[[[ 0 7 9] | |||
[ 0 19 21] | |||
[ 0 0 0]] | |||
[[ 7 9 11] | |||
[19 21 23] | |||
[ 0 0 0]]]]]] | |||
""" | |||
def __init__( | |||
@@ -89,21 +87,20 @@ class SlidingWindow(Module): | |||
class SlidingWindowTranspose(Module): | |||
r""" | |||
Opposite opration of SlidingWindow, sum over the sliding windows on the | |||
corresponding input location. Given an input of the size | |||
:math:`(N, C, IH, IW, window_h, window_w)` and :attr:`output_size`, the | |||
r"""Opposite opration of SlidingWindow, sum over the sliding windows on the | |||
corresponding input location. Given an input of the size | |||
:math:`(N, C, IH, IW, window_h, window_w)` and :attr:`output_size`, the | |||
output shape would be :math:`(N, C, output\_size_{h}, output\_size_{w})` and the | |||
arguments must satisfy | |||
.. math:: | |||
\text{IH} = \lfloor \frac{\text{output_size}_{h} + 2 * \text{padding}_{h} - | |||
\text{IH} = \lfloor \frac{\text{output_size}_{h} + 2 * \text{padding}_{h} - | |||
\text{dilation}_{h} * (\text{kernel_size}_{h} - 1) - 1}{\text{stride}_{h}} + 1 \rfloor | |||
.. math:: | |||
\text{IW} = \lfloor \frac{\text{output_size}_{w} + 2 * \text{padding}_{w} - | |||
\text{IW} = \lfloor \frac{\text{output_size}_{w} + 2 * \text{padding}_{w} - | |||
\text{dilation}_{w} * (\text{kernel_size}_{w} - 1) - 1}{\text{stride}_{w}} + 1 \rfloor | |||
For each output location, we have: | |||
.. math:: | |||
@@ -113,36 +110,13 @@ class SlidingWindowTranspose(Module): | |||
\text{location}(n, c, ih, iw, wh, ww) &= (n, c, oh+wh, ow+ww) \\ | |||
\text{where } & oh=-pad_h+ih \times stride_h + (wh-1) \times (dilation_h-1) \\ | |||
& ow=-pad_w+iw \times stride_w + (ww-1) \times (dilation_w-1) | |||
:param output_size: the size of the output tensor. | |||
:param kernel_size: the size of the window to take a max over. | |||
:param padding: implicit zero padding to be added on both sides. Default: 0 | |||
:param stride: the stride of the window. Default: 1 | |||
:param dilation: the dilation of the window. Default: 1 | |||
Example: | |||
.. testcode:: | |||
from megengine import tensor | |||
import megengine.module as M | |||
import numpy as np | |||
inp = tensor(np.arange(20).reshape(1,1,4,5)) | |||
unfold = M.SlidingWindow(kernel_size=3, padding=0, stride=1, dilation=1) | |||
fold = M.SlidingWindowTranspose((4,5), kernel_size=3, padding=0, stride=1, dilation=1) | |||
out = fold(unfold(inp)) | |||
print(out.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
[[[[ 0 2 6 6 4] | |||
[10 24 42 32 18] | |||
[20 44 72 52 28] | |||
[15 32 51 36 19]]]] | |||
Args: | |||
output_size: the size of the output tensor. | |||
kernel_size: the size of the window to take a max over. | |||
padding: implicit zero padding to be added on both sides. Default: 0 | |||
stride: the stride of the window. Default: 1 | |||
dilation: the dilation of the window. Default: 1 | |||
""" | |||
def __init__( | |||
@@ -15,20 +15,20 @@ from .optimizer import Optimizer | |||
class Adadelta(Optimizer): | |||
r""" | |||
Implements Adadelta algorithm. | |||
r"""Implements Adadelta algorithm. | |||
It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" <https://arxiv.org/abs/1212.5701>`_. | |||
:param params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
:param lr: coefficient that scales delta before it is applied | |||
to the parameters. Default: 1.0 | |||
:param rho: coefficient used for computing a running average | |||
of squared gradients. Default: 0.9 | |||
:param eps: term added to the denominator to improve | |||
numerical stability. Default: 1e-6 | |||
:param weight_decay: weight decay (L2 penalty). Default: 0 | |||
Args: | |||
params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
lr: coefficient that scales delta before it is applied | |||
to the parameters. Default: 1.0 | |||
rho: coefficient used for computing a running average | |||
of squared gradients. Default: 0.9 | |||
eps: term added to the denominator to improve | |||
numerical stability. Default: 1e-6 | |||
weight_decay: weight decay (L2 penalty). Default: 0 | |||
""" | |||
def __init__( | |||
@@ -15,20 +15,20 @@ from .optimizer import Optimizer | |||
class Adagrad(Optimizer): | |||
r""" | |||
Implements Adagrad algorithm. | |||
r"""Implements Adagrad algorithm. | |||
It has been proposed in `"Adaptive Subgradient Methods for Online Learning | |||
and Stochastic Optimization" <http://jmlr.org/papers/v12/duchi11a.html>`_. | |||
:param params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
:param lr: coefficient that scales delta before it is applied | |||
to the parameters. Default: 1e-2 | |||
:param lr_decay: learning rate decay. Default: 0 | |||
:param eps: term added to the denominator to improve | |||
numerical stability. Default: 1e-10 | |||
:param weight_decay: weight decay (L2 penalty). Default: 0 | |||
Args: | |||
params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
lr: coefficient that scales delta before it is applied | |||
to the parameters. Default: 1e-2 | |||
lr_decay: learning rate decay. Default: 0 | |||
eps: term added to the denominator to improve | |||
numerical stability. Default: 1e-10 | |||
weight_decay: weight decay (L2 penalty). Default: 0 | |||
""" | |||
def __init__( | |||
@@ -15,17 +15,16 @@ from .optimizer import Optimizer | |||
class Adam(Optimizer): | |||
r""" | |||
Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
:param params: iterable of parameters to optimize or dicts defining | |||
Args: | |||
params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
:param lr: learning rate. | |||
:param betas: coefficients used for computing running averages of gradient | |||
and its square. Default: (0.9, 0.999) | |||
:param eps: term added to the denominator to improve numerical stability | |||
Default: 1e-8 | |||
:param weight_decay: weight decay (L2 penalty). Default: 0 | |||
lr: learning rate. | |||
betas: coefficients used for computing running averages of gradient | |||
and its square. Default: (0.9, 0.999) | |||
eps: term added to the denominator to improve numerical stability. Default: 1e-8 | |||
weight_decay: weight decay (L2 penalty). Default: 0 | |||
""" | |||
def __init__( | |||
@@ -15,17 +15,16 @@ from .optimizer import Optimizer | |||
class AdamW(Optimizer): | |||
r""" | |||
Implements AdamW algorithm proposed in `"Decoupled Weight Decay Regularization" <https://arxiv.org/abs/1711.05101>`_. | |||
r"""Implements AdamW algorithm proposed in `"Decoupled Weight Decay Regularization" <https://arxiv.org/abs/1711.05101>`_. | |||
:param params: iterable of parameters to optimize or dicts defining | |||
Args: | |||
params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
:param lr: learning rate. | |||
:param betas: coefficients used for computing running averages of gradient | |||
and its square. Default: (0.9, 0.999) | |||
:param eps: term added to the denominator to improve numerical stability | |||
Default: 1e-8 | |||
:param weight_decay: weight decay (L2 penalty). Default: 1e-2 | |||
lr: learning rate. | |||
betas: coefficients used for computing running averages of gradient | |||
and its square. Default: (0.9, 0.999) | |||
eps: term added to the denominator to improve numerical stability. Default: 1e-8 | |||
weight_decay: weight decay (L2 penalty). Default: 1e-2 | |||
""" | |||
def __init__( | |||
@@ -23,10 +23,13 @@ def clip_grad_norm( | |||
The norm is computed over all gradients together, as if they were | |||
concatenated into a single vector. Gradients are modified in-place. | |||
:param tensors: an iterable of Tensors or a single Tensor. | |||
:param max_norm: max norm of the gradients. | |||
:param ord: type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
:return: total norm of the parameters (viewed as a single vector). | |||
Args: | |||
tensors: an iterable of Tensors or a single Tensor. | |||
max_norm: max norm of the gradients. | |||
ord: type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
Returns: | |||
total norm of the parameters (viewed as a single vector). | |||
""" | |||
push_scope("clip_grad_norm") | |||
if isinstance(tensors, Tensor): | |||
@@ -53,14 +56,15 @@ def clip_grad_value( | |||
): | |||
r"""Clips gradient of an iterable of parameters to a specified lower and | |||
upper. Gradients are modified in-place. | |||
The gradients are clipped in the range: | |||
.. math:: \left[\text{lower}, \text{upper}\right] | |||
:param tensors: an iterable of Tensors or a single Tensor. | |||
:param lower: minimum allowed value of the gradients. | |||
:param upper: maximum allowed value of the gradients. | |||
Args: | |||
tensors: an iterable of Tensors or a single Tensor. | |||
lower: minimum allowed value of the gradients. | |||
upper: maximum allowed value of the gradients. | |||
""" | |||
push_scope("clip_grad_value") | |||
if isinstance(tensors, Tensor): | |||
@@ -12,11 +12,11 @@ from .optimizer import Optimizer | |||
class LRScheduler(metaclass=ABCMeta): | |||
r""" | |||
Base class for all learning rate based schedulers. | |||
r"""Base class for all learning rate based schedulers. | |||
:param optimizer: wrapped optimizer. | |||
:param current_epoch: the index of current epoch. Default: -1 | |||
Args: | |||
optimizer: wrapped optimizer. | |||
current_epoch: the index of current epoch. Default: -1 | |||
""" | |||
def __init__( # pylint: disable=too-many-branches | |||
@@ -45,25 +45,22 @@ class LRScheduler(metaclass=ABCMeta): | |||
self.step() | |||
def state_dict(self): | |||
r""" | |||
Returns the state of the scheduler as a :class:`dict`. | |||
r"""Returns the state of the scheduler as a :class:`dict`. | |||
It contains an entry for every variable in self.__dict__ which | |||
is not the optimizer. | |||
""" | |||
raise NotImplementedError | |||
def load_state_dict(self, state_dict): | |||
r""" | |||
Loads the schedulers state. | |||
r"""Loads the schedulers state. | |||
:type state_dict: dict | |||
:param state_dict: scheduler state. | |||
Args: | |||
state_dict: scheduler state. | |||
""" | |||
raise NotImplementedError | |||
def get_lr(self): | |||
r""" Compute current learning rate for the scheduler. | |||
""" | |||
r"""Compute current learning rate for the scheduler.""" | |||
raise NotImplementedError | |||
def step(self, epoch=None): | |||
@@ -14,16 +14,14 @@ from .optimizer import Optimizer | |||
class MultiStepLR(LRScheduler): | |||
r""" | |||
Decays the learning rate of each parameter group by gamma once the | |||
r"""Decays the learning rate of each parameter group by gamma once the | |||
number of epoch reaches one of the milestones. | |||
:param optimizer: wrapped optimizer. | |||
:type milestones: list | |||
:param milestones: list of epoch indices which should be increasing. | |||
:type gamma: float | |||
:param gamma: multiplicative factor of learning rate decay. Default: 0.1 | |||
:param current_epoch: the index of current epoch. Default: -1 | |||
Args: | |||
optimizer: wrapped optimizer. | |||
milestones: list of epoch indices which should be increasing. | |||
gamma: multiplicative factor of learning rate decay. Default: 0.1 | |||
current_epoch: the index of current epoch. Default: -1 | |||
""" | |||
def __init__( | |||
@@ -45,8 +43,7 @@ class MultiStepLR(LRScheduler): | |||
super().__init__(optimizer, current_epoch) | |||
def state_dict(self): | |||
r""" | |||
Returns the state of the scheduler as a :class:`dict`. | |||
r"""Returns the state of the scheduler as a :class:`dict`. | |||
It contains an entry for every variable in self.__dict__ which | |||
is not the optimizer. | |||
""" | |||
@@ -57,11 +54,10 @@ class MultiStepLR(LRScheduler): | |||
} | |||
def load_state_dict(self, state_dict): | |||
r""" | |||
Loads the schedulers state. | |||
r"""Loads the schedulers state. | |||
:type state_dict: dict | |||
:param state_dict: scheduler state. | |||
Args: | |||
state_dict: scheduler state. | |||
""" | |||
tmp_dict = {} | |||
for key in ["milestones", "gamma", "current_epoch"]: | |||
@@ -30,11 +30,11 @@ required = _RequiredParameter() | |||
class Optimizer(metaclass=ABCMeta): | |||
r""" | |||
Base class for all optimizers. | |||
r"""Base class for all optimizers. | |||
:param params: specifies what Tensors should be optimized. | |||
:param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | |||
Args: | |||
params: specifies what Tensors should be optimized. | |||
defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | |||
""" | |||
def __init__( # pylint: disable=too-many-branches | |||
@@ -76,14 +76,13 @@ class Optimizer(metaclass=ABCMeta): | |||
self._create_state(group) | |||
def add_param_group(self, param_group: dict): | |||
r""" | |||
Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
r"""Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
This can be useful when fine tuning a pre-trained network as frozen layers can be made | |||
trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. | |||
:param param_group: specifies what tensors should be optimized along with group. | |||
Args: | |||
param_group: specifies what tensors should be optimized along with group. | |||
""" | |||
assert isinstance(param_group, dict), "param group must be a dict" | |||
@@ -143,10 +142,7 @@ class Optimizer(metaclass=ABCMeta): | |||
return params | |||
def step(self): | |||
r""" | |||
Performs a single optimization step. | |||
""" | |||
r"""Performs a single optimization step.""" | |||
# set the globle state `_enable_convert_inputs` to `False` to disable | |||
# the `convert_inputs` for param updates | |||
set_option("record_computing_path", 0) | |||
@@ -176,9 +172,7 @@ class Optimizer(metaclass=ABCMeta): | |||
param.grad.reset_zero() | |||
def clear_grad(self): | |||
r""" | |||
Set the grad attribute to None for all parameters. | |||
""" | |||
r"""Set the grad attribute to None for all parameters.""" | |||
for param_group in self.param_groups: | |||
push_scope("clear_grad") | |||
for param in param_group["params"]: | |||
@@ -186,10 +180,10 @@ class Optimizer(metaclass=ABCMeta): | |||
pop_scope("clear_grad") | |||
def state_dict(self, keep_var=False) -> Dict: | |||
r""" | |||
Export the optimizer state. | |||
r"""Export the optimizer state. | |||
:return: optimizer state. Can be loaded by :meth:`load_state_dict`. | |||
Return: | |||
optimizer state. Can be loaded by :meth:`load_state_dict`. | |||
""" | |||
param_groups = [] | |||
state = dict() | |||
@@ -217,10 +211,10 @@ class Optimizer(metaclass=ABCMeta): | |||
return {"param_groups": param_groups, "state": state} | |||
def load_state_dict(self, state: dict): | |||
r""" | |||
Loads the optimizer state. | |||
r"""Loads the optimizer state. | |||
:param state: optimizer state. Should be an object returned | |||
Args: | |||
state: optimizer state. Should be an object returned | |||
from a call to :meth:`state_dict`. | |||
""" | |||
if len(self.param_groups) != len(state["param_groups"]): | |||
@@ -15,17 +15,17 @@ from .optimizer import Optimizer | |||
class SGD(Optimizer): | |||
r""" | |||
Implements stochastic gradient descent. | |||
r"""Implements stochastic gradient descent. | |||
Nesterov momentum is based on the formula from | |||
`"On the importance of initialization and momentum in deep learning" <http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf>`_ . | |||
:param params: iterable of parameters to optimize or dicts defining | |||
Args: | |||
params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
:param lr: learning rate. | |||
:param momentum: momentum factor. Default: 0.0 | |||
:param weight_decay: weight decay (L2 penalty). Default: 0.0 | |||
lr: learning rate. | |||
momentum: momentum factor. Default: 0.0 | |||
weight_decay: weight decay (L2 penalty). Default: 0.0 | |||
""" | |||
def __init__( | |||
@@ -72,13 +72,13 @@ class _FakeQuantize(Module): | |||
class TQT(_FakeQuantize, QParamsModuleMixin): | |||
r""" | |||
TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds | |||
r"""TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds | |||
for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks. | |||
:param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
quantization dtype of input. | |||
:param enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
Args: | |||
dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
quantization dtype of input. | |||
enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
""" | |||
def __init__( | |||
@@ -104,12 +104,12 @@ class TQT(_FakeQuantize, QParamsModuleMixin): | |||
class FakeQuantize(_FakeQuantize): | |||
r""" | |||
A module to do quant and dequant according to observer's scale and zero_point. | |||
r"""A module to do quant and dequant according to observer's scale and zero_point. | |||
:param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
quantization dtype of input. | |||
:param enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
Args: | |||
dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
quantization dtype of input. | |||
enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
""" | |||
def fake_quant_forward(self, inp, qparams: QParams = None): | |||
@@ -122,14 +122,14 @@ class FakeQuantize(_FakeQuantize): | |||
class LSQ(_FakeQuantize, QParamsModuleMixin): | |||
r""" | |||
LSQ: https://arxiv.org/pdf/1902.08153.pdf Estimating and scaling the | |||
r"""LSQ: https://arxiv.org/pdf/1902.08153.pdf Estimating and scaling the | |||
task loss gradient at each weight and activation layer's quantizer step size | |||
:param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
quantization dtype of input. | |||
:param enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
:param eps:a small value to avoid division by zero. Default: 1e-5 | |||
Args: | |||
dtype: a string or :class:`~.QuantDtypeMeta` indicating the target | |||
quantization dtype of input. | |||
enable: whether do ``normal_forward`` or ``fake_quant_forward``. | |||
eps: a small value to avoid division by zero. Default: 1e-5 | |||
""" | |||
def __init__( | |||
@@ -25,11 +25,11 @@ logger = get_logger(__name__) | |||
class Observer(Module, QParamsModuleMixin): | |||
r""" | |||
A base class for Observer Module. Used to record input tensor's statistics for | |||
r"""A base class for Observer Module. Used to record input tensor's statistics for | |||
quantization. | |||
:param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
Args: | |||
dtype: a string indicating which dtype to collect scale and zero_point of. | |||
""" | |||
def __init__(self, dtype: Union[str, QuantDtypeMeta], **kwargs): | |||
@@ -73,12 +73,12 @@ class Observer(Module, QParamsModuleMixin): | |||
class MinMaxObserver(Observer): | |||
r""" | |||
A Observer Module records input tensor's running min and max values to calc scale. | |||
r"""A Observer Module records input tensor's running min and max values to calc scale. | |||
:param mode: set quantization mode. | |||
:param eps: a initial maximum value to avoid division by zero problem. | |||
:param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
Args: | |||
mode: set quantization mode. | |||
eps: a initial maximum value to avoid division by zero problem. | |||
dtype: a string indicating which dtype to collect scale and zero_point of. | |||
""" | |||
def __init__( | |||
@@ -128,12 +128,12 @@ class MinMaxObserver(Observer): | |||
class SyncMinMaxObserver(MinMaxObserver): | |||
r""" | |||
A distributed version of :class:`~.MinMaxObserver`. | |||
r"""A distributed version of :class:`~.MinMaxObserver`. | |||
:param mode: set quantization mode. | |||
:param eps: a initial maximum value to avoid division by zero problem. | |||
:param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
Args: | |||
mode: set quantization mode. | |||
eps: a initial maximum value to avoid division by zero problem. | |||
dtype: a string indicating which dtype to collect scale and zero_point of. | |||
""" | |||
def forward(self, x_orig): | |||
@@ -151,13 +151,13 @@ class SyncMinMaxObserver(MinMaxObserver): | |||
class ExponentialMovingAverageObserver(MinMaxObserver): | |||
r""" | |||
A :class:`~.MinMaxObserver` with momentum support for min/max updating. | |||
r"""A :class:`~.MinMaxObserver` with momentum support for min/max updating. | |||
:param momentum: momentum ratio for min/max updating. | |||
:param mode: set quantization mode. | |||
:param eps: a initial maximum value to avoid division by zero problem. | |||
:param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
Args: | |||
momentum: momentum ratio for min/max updating. | |||
mode: set quantization mode. | |||
eps: a initial maximum value to avoid division by zero problem. | |||
dtype: a string indicating which dtype to collect scale and zero_point of. | |||
""" | |||
def __init__( | |||
@@ -196,13 +196,13 @@ class ExponentialMovingAverageObserver(MinMaxObserver): | |||
class SyncExponentialMovingAverageObserver(ExponentialMovingAverageObserver): | |||
r""" | |||
A distributed version of :class:`~.ExponentialMovingAverageObserver`. | |||
r"""A distributed version of :class:`~.ExponentialMovingAverageObserver`. | |||
:param momentum: momentum ratio for min/max updating. | |||
:param mode: set quantization mode. | |||
:param eps: a initial maximum value to avoid division by zero problem. | |||
:param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
Args: | |||
momentum: momentum ratio for min/max updating. | |||
mode: set quantization mode. | |||
eps: a initial maximum value to avoid division by zero problem. | |||
dtype: a string indicating which dtype to collect scale and zero_point of. | |||
""" | |||
def forward(self, x_orig): | |||
@@ -227,15 +227,15 @@ class SyncExponentialMovingAverageObserver(ExponentialMovingAverageObserver): | |||
class HistogramObserver(MinMaxObserver): | |||
r""" | |||
A :class:`~.MinMaxObserver` using running histogram of tensor values | |||
r"""A :class:`~.MinMaxObserver` using running histogram of tensor values | |||
for min/max updating. Usually used for calibration quantization. | |||
:param bins: number of bins to use for the histogram. | |||
:param upsample_rate: which ratio to interpolate histograms in. | |||
:param mode: set quantization mode. | |||
:param eps: a initial maximum value to avoid division by zero problem. | |||
:param dtype: a string indicating which dtype to collect scale and zero_point of. | |||
Args: | |||
bins: number of bins to use for the histogram. | |||
upsample_rate: which ratio to interpolate histograms in. | |||
mode: set quantization mode. | |||
eps: a initial maximum value to avoid division by zero problem. | |||
dtype: a string indicating which dtype to collect scale and zero_point of. | |||
""" | |||
def __init__( | |||
@@ -256,8 +256,7 @@ class HistogramObserver(MinMaxObserver): | |||
self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32") | |||
def _non_linear_param_search(self): | |||
r""" | |||
Non-linear parameter search. | |||
r"""Non-linear parameter search. | |||
An approximation for L2 error minimization for selecting min/max. | |||
By selecting new min/max, we filter out outliers in input distribution. | |||
""" | |||
@@ -269,8 +268,7 @@ class HistogramObserver(MinMaxObserver): | |||
bin_width = (np_max_val - np_min_val) / self.bins | |||
def _get_norm(delta_begin, delta_end, density, norm_type): | |||
r""" | |||
Compute the norm of the values uniformaly distributed between | |||
r"""Compute the norm of the values uniformaly distributed between | |||
delta_begin and delta_end. | |||
norm = density * (integral_{begin, end} x^2) | |||
= density * (end^3 - begin^3) / 3 | |||
@@ -285,8 +283,7 @@ class HistogramObserver(MinMaxObserver): | |||
return density * norm | |||
def _compute_quantization_error(next_start_bin, next_end_bin, norm_type): | |||
r""" | |||
Compute the quantization error if we use start_bin to end_bin as the | |||
r"""Compute the quantization error if we use start_bin to end_bin as the | |||
min and max to do the quantization. | |||
""" | |||
@@ -488,9 +485,7 @@ class HistogramObserver(MinMaxObserver): | |||
class PassiveObserver(Observer): | |||
r""" | |||
An Observer that supports setting :attr:`scale` directly. | |||
""" | |||
r"""An Observer that supports setting :attr:`scale` directly.""" | |||
def __init__(self, dtype: Union[str, QuantDtypeMeta], **kwargs): | |||
super().__init__(dtype, **kwargs) | |||
@@ -510,8 +505,10 @@ class PassiveObserver(Observer): | |||
return self.qparams | |||
def set_qparams(self, qparams: QParams): | |||
""" | |||
:param qparams: used to set initial scale. | |||
r"""set the ``qparams``. | |||
Args: | |||
qparams: used to set initial scale. | |||
""" | |||
self.qparams = deepcopy(qparams) | |||
if qparams.scale is None: | |||
@@ -527,7 +524,5 @@ class PassiveObserver(Observer): | |||
self.orig_scale = qparams.scale.numpy() | |||
def forward(self, x): | |||
r""" | |||
Just return input because :attr:`qparams` is set by :func:`~.apply_easy_quant`. | |||
""" | |||
r"""Just return input because :attr:`qparams` is set by :func:`~.apply_easy_quant`.""" | |||
return x |
@@ -27,33 +27,33 @@ class QConfig( | |||
["weight_observer", "act_observer", "weight_fake_quant", "act_fake_quant"], | |||
) | |||
): | |||
r""" | |||
A config class indicating how to do quantize toward :class:`~.QATModule` 's | |||
r"""A config class indicating how to do quantize toward :class:`~.QATModule` 's | |||
``activation`` and ``weight``. See :meth:`~.QATModule.set_qconfig` for detail usage. | |||
:param weight_observer: interface to instantiate an :class:`~.Observer` indicating | |||
how to collect scales and zero_point of wegiht. | |||
:param act_observer: similar to ``weight_observer`` but toward activation. | |||
:param weight_fake_quant: interface to instantiate a :class:`~.FakeQuantize` indicating | |||
how to do fake_quant calculation. | |||
:param act_observer: similar to ``weight_fake_quant`` but toward activation. | |||
Args: | |||
weight_observer: interface to instantiate an :class:`~.Observer` indicating | |||
how to collect scales and zero_point of wegiht. | |||
act_observer: similar to ``weight_observer`` but toward activation. | |||
weight_fake_quant: interface to instantiate a :class:`~.FakeQuantize` indicating | |||
how to do fake_quant calculation. | |||
act_observer: similar to ``weight_fake_quant`` but toward activation. | |||
Examples: | |||
.. code-block:: | |||
# Default EMA QConfig for QAT. | |||
ema_fakequant_qconfig = QConfig( | |||
weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"), | |||
act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"), | |||
weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"), | |||
act_fake_quant=partial(FakeQuantize, dtype="qint8"), | |||
) | |||
.. code-block:: | |||
# Default EMA QConfig for QAT. | |||
ema_fakequant_qconfig = QConfig( | |||
weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"), | |||
act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"), | |||
weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"), | |||
act_fake_quant=partial(FakeQuantize, dtype="qint8"), | |||
) | |||
Each parameter is a ``class`` rather than an instance. And we recommand using ``functools.partial`` | |||
to add initialization parameters of the ``class``, so that don't need to provide parameters in | |||
:meth:`~.QATModule.set_qconfig`. | |||
Usually we choose narrow version dtype (like ``qint8_narrow``) for weight related | |||
paramters and normal version for activation related ones. For the result of | |||
multiplication and addition as ``a * b + c * d``, if four variables are all -128 of | |||
@@ -57,14 +57,14 @@ qat_modules = tuple(_qat2quantized_dict.keys()) | |||
def quantize(module: Module, inplace: bool = True, mapping: dict = None): | |||
r""" | |||
Recursively convert :class:`~.QATModule` to :class:`~.QuantizedModule` | |||
r"""Recursively convert :class:`~.QATModule` to :class:`~.QuantizedModule` | |||
through :meth:`~.Module.apply`. | |||
:param module: root module to do convert recursively. | |||
:param inplace: whether to convert submodules in-place. | |||
:param mapping: a dict indicating how to convert custom modules from QATModule to | |||
QuantizedModule. Will be combined with internal default convert mapping dict. | |||
Args: | |||
module: root module to do convert recursively. | |||
inplace: whether to convert submodules in-place. | |||
mapping: a dict indicating how to convert custom modules from QATModule to | |||
QuantizedModule. Will be combined with internal default convert mapping dict. | |||
""" | |||
if not inplace: | |||
@@ -94,16 +94,16 @@ def quantize_qat( | |||
qconfig: QConfig = ema_fakequant_qconfig, | |||
mapping: dict = None, | |||
): | |||
r""" | |||
Recursively convert float :class:`~.Module` to :class:`~.QATModule` | |||
r"""Recursively convert float :class:`~.Module` to :class:`~.QATModule` | |||
through :meth:`~.Module.apply` and set qconfig relatively. | |||
:param module: root module to do convert recursively. | |||
:param inplace: whether to convert submodules in-place. | |||
:param qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
default is ``ema_fakequant_qconfig``. | |||
:param mapping: a dict indicating how to convert custom modules from Module to QATModule. | |||
Will be combined with internal default convert mapping dict. | |||
Args: | |||
module: root module to do convert recursively. | |||
inplace: whether to convert submodules in-place. | |||
qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
default is ``ema_fakequant_qconfig``. | |||
mapping: a dict indicating how to convert custom modules from Module to QATModule. | |||
Will be combined with internal default convert mapping dict. | |||
""" | |||
if not inplace: | |||
@@ -133,12 +133,12 @@ def quantize_qat( | |||
def reset_qconfig(module: Module, qconfig: QConfig, inplace: bool = True): | |||
r""" | |||
Reset :class:`~._FakeQuantize` and :class:`~.Observer` according to ``qconfig`` | |||
r"""Reset :class:`~._FakeQuantize` and :class:`~.Observer` according to ``qconfig`` | |||
:param module: root module to reset recursively. | |||
:param qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
:param inplace: whether to reset submodules in-place. | |||
Args: | |||
module: root module to reset recursively. | |||
qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
inplace: whether to reset submodules in-place. | |||
""" | |||
if not inplace: | |||
@@ -175,19 +175,17 @@ def _propagate(module: Module, func_str: str, *args, **kargs): | |||
def propagate_qconfig(module: QATModule, qconfig: QConfig): | |||
r""" | |||
Recursively set ``module``'s qconfig through :meth:`~.Module.apply`. | |||
r"""Recursively set ``module``'s qconfig through :meth:`~.Module.apply`. | |||
:param module: root module to traverse recursively. | |||
:param qconfig: a instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
Args: | |||
module: root module to traverse recursively. | |||
qconfig: a instance of :class:`~.QConfig` to be set as submodules' qconfig. | |||
""" | |||
_propagate(module, "set_qconfig", qconfig) | |||
def hook_qat_module(module: Module, func: Callable): | |||
r""" | |||
Add hooks for all :class:`~.QATModule` submodule | |||
""" | |||
r"""Add hooks for all :class:`~.QATModule` submodule""" | |||
def is_qat(mod: Module): | |||
return isinstance(mod, QATModule) | |||
@@ -202,15 +200,16 @@ def hook_qat_module(module: Module, func: Callable): | |||
def apply_easy_quant( | |||
module: Module, data: Tensor, start: float = 0.8, stop: float = 1.2, num: int = 40 | |||
): | |||
r""" | |||
Implementation of ``EasyQuant``: https://arxiv.org/pdf/2006.16669. | |||
r"""Implementation of ``EasyQuant``: https://arxiv.org/pdf/2006.16669. | |||
Search for optimal scales. | |||
:param module: root module. | |||
:param data: input tensor used to search optimal scale. | |||
:param start: lower bound of the search interval. | |||
:param stop: upper bound of the search interval. | |||
:param num: number of samples to search. | |||
Args: | |||
module: root module. | |||
data: input tensor used to search optimal scale. | |||
start: lower bound of the search interval. | |||
stop: upper bound of the search interval. | |||
num: number of samples to search. | |||
module: Module: | |||
""" | |||
batch_size = data.shape[0] | |||
@@ -267,40 +266,40 @@ def apply_easy_quant( | |||
def disable_fake_quant(module: Module): | |||
r""" | |||
Recursively disable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
r"""Recursively disable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
:param module: root module to do disable fake quantization recursively. | |||
Args: | |||
module: root module to do disable fake quantization recursively. | |||
""" | |||
_propagate(module, "set_fake_quant", False) | |||
def disable_observer(module: Module): | |||
r""" | |||
Recursively disable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
r"""Recursively disable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
:param module: root module to do disable observer recursively. | |||
Args: | |||
module: root module to do disable observer recursively. | |||
""" | |||
_propagate(module, "set_observer", False) | |||
def enable_fake_quant(module: Module): | |||
r""" | |||
Recursively enable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
r"""Recursively enable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` | |||
:param module: root module to do enable fake quantization recursively. | |||
Args: | |||
module: root module to do enable fake quantization recursively. | |||
""" | |||
_propagate(module, "set_fake_quant", True) | |||
def enable_observer(module: Module): | |||
r""" | |||
Recursively enable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
r"""Recursively enable ``module`` observer in QATModule through :meth:`~.Module.apply` | |||
:param module: root module to do enable observer recursively. | |||
Args: | |||
module: root module to do enable observer recursively. | |||
""" | |||
_propagate(module, "set_observer", True) |
@@ -25,8 +25,7 @@ from ..tensor import Tensor | |||
class Round(Function): | |||
""" | |||
The functional round have no grad and can not use for quantization-aware-training. | |||
r"""The functional round have no grad and can not use for quantization-aware-training. | |||
We use Function and STE(Straight-Through Estimator) to implement backward propagation. | |||
""" | |||
@@ -68,17 +67,14 @@ def register_method_to_class(cls): | |||
class QuantMode(Enum): | |||
""" | |||
Quantization mode enumerate class. | |||
""" | |||
r"""Quantization mode enumerate class.""" | |||
SYMMERTIC = 1 | |||
ASYMMERTIC = 2 | |||
class QParams: | |||
""" | |||
To standardize FakeQuant, Observer and Tensor's qparams format. If custom | |||
r"""To standardize FakeQuant, Observer and Tensor's qparams format. If custom | |||
qparams is needed, inherit this class and add custom ``__slots__``. | |||
""" | |||
@@ -116,8 +112,7 @@ class QParams: | |||
class LSQParams: | |||
""" | |||
To standardize LSQ's qparams format. If custom | |||
r"""To standardize LSQ's qparams format. If custom | |||
qparams is needed, inherit this class and add custom ``__slots__``. | |||
""" | |||
@@ -183,8 +178,14 @@ def create_qparams( | |||
scale: Tensor = None, | |||
zero_point: Tensor = None, | |||
): | |||
""" | |||
Return :class:`~.QParams` according to the mode. | |||
r""" | |||
Args: | |||
mode: QuantMode: | |||
dtype_meta: Union[str: | |||
QuantDtypeMeta]: | |||
scale: Tensor: | |||
zero_point: Tensor: | |||
""" | |||
if isinstance(dtype_meta, str): | |||
dtype_meta = _builtin_quant_dtypes[dtype_meta] | |||
@@ -197,12 +198,11 @@ def create_qparams( | |||
def fake_quant_tensor(inp: Tensor, qparams: QParams) -> Tensor: | |||
""" | |||
Apply fake quantization to the inp tensor. | |||
:param inp: the input tensor which need to be faked. | |||
:param qparams: to get mode, qmin, qmax, scale and zero_point from. | |||
"""Apply fake quantization to the inp tensor. | |||
Args: | |||
inp: the input tensor which need to be faked. | |||
qparams: to get mode, qmin, qmax, scale and zero_point from. | |||
""" | |||
scale = qparams.scale | |||
if qparams.mode == QuantMode.ASYMMERTIC: | |||
@@ -217,17 +217,16 @@ def fake_quant_tensor(inp: Tensor, qparams: QParams) -> Tensor: | |||
def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: | |||
""" | |||
Apply fake quantization to bias, with the special scale from input tensor | |||
"""Apply fake quantization to bias, with the special scale from input tensor | |||
and weight tensor, the quantized type set to qint32 also. | |||
:param bias: the bias tensor which need to be faked. | |||
:param inp: the input tensor which contain the quantization parameters. | |||
:param w_qat: the weight tensor which contain the quantization parameters. | |||
Args: | |||
bias: the bias tensor which need to be faked. | |||
inp: the input tensor which contain the quantization parameters. | |||
w_qat: the weight tensor which contain the quantization parameters. | |||
.. warning:: | |||
Warning: | |||
Only work for symmetric quantization method now. | |||
""" | |||
b_qat = bias | |||
if ( | |||
@@ -220,29 +220,29 @@ def _permutation(n: int, seed: int, device: str, handle: int, dtype: str) -> Ten | |||
class RNG: | |||
r""" | |||
:class:`RNG` exposes a number of methods for generating random numbers. | |||
r""":class:`RNG` exposes a number of methods for generating random numbers. | |||
Args: | |||
seed: random seed used to initialize the pseudo-random number generator. Default: None | |||
device: the device of generated tensor. Default: None | |||
:param seed: random seed used to initialize the pseudo-random number generator. | |||
Default: None | |||
:param device: the device of generated tensor. Default: None | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine.random as rand | |||
rng = rand.RNG(seed=100) | |||
x = rng.uniform(size=(2, 2)) | |||
print(x.numpy()) | |||
import megengine.random as rand | |||
rng = rand.RNG(seed=100) | |||
x = rng.uniform(size=(2, 2)) | |||
print(x.numpy()) | |||
Outputs: | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[0.84811664 0.6147553 ] | |||
[0.59429836 0.64727545]] | |||
[[0.84811664 0.6147553 ] | |||
[0.59429836 0.64727545]] | |||
""" | |||
@@ -259,32 +259,33 @@ class RNG: | |||
def uniform( | |||
self, low: float = 0, high: float = 1, size: Optional[Iterable[int]] = None | |||
): | |||
r""" | |||
Random variable with uniform distribution $U(0, 1)$. | |||
r"""Random variable with uniform distribution $U(0, 1)$. | |||
Args: | |||
low: lower range. Default: 0 | |||
high: upper range. Default: 1 | |||
size: the size of output tensor. Default: None | |||
:param low: lower range. Default: 0 | |||
:param high: upper range. Default: 1 | |||
:param size: the size of output tensor. Default: None | |||
:return: the output tensor. | |||
Returns: | |||
the output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.random as rand | |||
import megengine as mge | |||
import megengine.random as rand | |||
x = rand.uniform(size=(2, 2)) | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
x = rand.uniform(size=(2, 2)) | |||
print(x.numpy()) | |||
Outputs: | |||
[[0.91600335 0.6680226 ] | |||
[0.2046729 0.2769141 ]] | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[0.91600335 0.6680226 ] | |||
[0.2046729 0.2769141 ]] | |||
""" | |||
_seed = self._seed() if callable(self._seed) else self._seed | |||
return _uniform( | |||
@@ -299,33 +300,34 @@ class RNG: | |||
def normal( | |||
self, mean: float = 0, std: float = 1, size: Optional[Iterable[int]] = None | |||
): | |||
r""" | |||
Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
r"""Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
:param mean: the mean or expectation of the distribution. Default: 0 | |||
:param std: the standard deviation of the distribution (variance = :math:`\sigma ^ 2`). | |||
Default: 1 | |||
:param size: the size of output tensor. Default: None | |||
:return: the output tensor. | |||
Args: | |||
mean: the mean or expectation of the distribution. Default: 0 | |||
std: the standard deviation of the distribution (variance = :math:`\sigma ^ 2`). | |||
Default: 1 | |||
size: the size of output tensor. Default: None | |||
Returns: | |||
the output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.random as rand | |||
import megengine as mge | |||
import megengine.random as rand | |||
x = rand.normal(mean=0, std=1, size=(2, 2)) | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
x = rand.normal(mean=0, std=1, size=(2, 2)) | |||
print(x.numpy()) | |||
[[-1.4010863 -0.9874344 ] | |||
[ 0.56373274 0.79656655]] | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[-1.4010863 -0.9874344 ] | |||
[ 0.56373274 0.79656655]] | |||
""" | |||
_seed = self._seed() if callable(self._seed) else self._seed | |||
return _normal( | |||
@@ -343,12 +345,12 @@ class RNG: | |||
scale: Union[Tensor, float] = 1, | |||
size: Optional[Iterable[int]] = None, | |||
): | |||
r""" | |||
Random variable with Gamma distribution :math:`\Gamma(k, \theta)`. | |||
r"""Random variable with Gamma distribution :math:`\Gamma(k, \theta)`. | |||
The corresponding probability density function is | |||
.. math:: | |||
p(x)=x^{k-1} \frac{e^{-x / \theta}}{\theta^{k} \Gamma(k)} | |||
\quad \text { for } x>0 \quad k, \theta>0, | |||
@@ -357,52 +359,54 @@ class RNG: | |||
.. math:: | |||
\Gamma(k)=(k-1) ! \quad \text { for } \quad k>0. | |||
:param shape: the shape parameter (sometimes designated "k") of the distribution. | |||
Must be non-negative. | |||
:param scale: the scale parameter (sometimes designated "theta") of the distribution. | |||
Must be non-negative. Default: 1 | |||
:param size: the size of output tensor. If shape and scale are scalars and given size is, e.g., | |||
`(m, n)`, then the output shape is `(m, n)`. If shape or scale is a Tensor and given size | |||
is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(shape, scale).shape`. | |||
The broadcast rules are consistent with `numpy.broadcast`. Default: None | |||
:return: the output tensor. | |||
Args: | |||
shape: the shape parameter (sometimes designated "k") of the distribution. | |||
Must be non-negative. | |||
scale: the scale parameter (sometimes designated "theta") of the distribution. | |||
Must be non-negative. Default: 1 | |||
size: the size of output tensor. If shape and scale are scalars and given size is, e.g., | |||
`(m, n)`, then the output shape is `(m, n)`. If shape or scale is a Tensor and given size | |||
is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(shape, scale).shape`. | |||
The broadcast rules are consistent with `numpy.broadcast`. Default: None | |||
Returns: | |||
the output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.random as rand | |||
import megengine as mge | |||
import megengine.random as rand | |||
x = rand.gamma(shape=2, scale=1, size=(2, 2)) | |||
print(x.numpy()) | |||
x = rand.gamma(shape=2, scale=1, size=(2, 2)) | |||
print(x.numpy()) | |||
shape = mge.Tensor([[ 1], | |||
[10]], dtype="float32") | |||
scale = mge.Tensor([1,5], dtype="float32") | |||
shape = mge.Tensor([[ 1], | |||
[10]], dtype="float32") | |||
scale = mge.Tensor([1,5], dtype="float32") | |||
x = rand.gamma(shape=shape, scale=scale) | |||
print(x.numpy()) | |||
x = rand.gamma(shape=shape, scale=scale) | |||
print(x.numpy()) | |||
x = rand.gamma(shape=shape, scale=scale, size=2) | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[1.5064533 4.0689363 ] | |||
[0.71639484 1.4551026 ]] | |||
x = rand.gamma(shape=shape, scale=scale, size=2) | |||
print(x.numpy()) | |||
Outputs: | |||
[[ 0.4352188 11.399335 ] | |||
[ 9.1888 52.009277 ]] | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[[ 1.1726005 3.9654975 ] | |||
[13.656933 36.559006 ]] | |||
[[ 0.25848487 2.5540342 ] | |||
[11.960409 21.031536 ]]] | |||
[[1.5064533 4.0689363 ] | |||
[0.71639484 1.4551026 ]] | |||
[[ 0.4352188 11.399335 ] | |||
[ 9.1888 52.009277 ]] | |||
[[[ 1.1726005 3.9654975 ] | |||
[13.656933 36.559006 ]] | |||
[[ 0.25848487 2.5540342 ] | |||
[11.960409 21.031536 ]]] | |||
""" | |||
_seed = self._seed() if callable(self._seed) else self._seed | |||
return _gamma( | |||
@@ -415,155 +419,161 @@ class RNG: | |||
beta: Union[Tensor, float], | |||
size: Optional[Iterable[int]] = None, | |||
): | |||
r""" | |||
Random variable with Beta distribution :math:`\operatorname{Beta}(\alpha, \beta)`. | |||
r"""Random variable with Beta distribution :math:`\operatorname{Beta}(\alpha, \beta)`. | |||
The corresponding probability density function is | |||
.. math:: | |||
p(x)=\frac{1}{\mathrm{~B}(\alpha, \beta)} x^{\alpha-1}(1-x)^{\beta-1} | |||
p(x)=\frac{1}{\mathrm{~B}(\alpha, \beta)} x^{\alpha-1}(1-x)^{\beta-1} | |||
\quad \text { for } \alpha, \beta>0, | |||
where :math:`\mathrm{~B}(\alpha, \beta)` is the beta function, | |||
.. math:: | |||
\mathrm{~B}(\alpha, \beta)=\int_{0}^{1} t^{\alpha-1}(1-t)^{\beta-1} d t. | |||
:param alpha: the alpha parameter of the distribution. Must be non-negative. | |||
:param beta: the beta parameter of the distribution. Must be non-negative. | |||
:param size: the size of output tensor. If alpha and beta are scalars and given size is, e.g., | |||
`(m, n)`, then the output shape is `(m, n)`. If alpha or beta is a Tensor and given size | |||
is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(alpha, beta).shape`. | |||
The broadcast rules are consistent with `numpy.broadcast`. Default: None | |||
:return: the output tensor. | |||
Args: | |||
alpha: the alpha parameter of the distribution. Must be non-negative. | |||
beta: the beta parameter of the distribution. Must be non-negative. | |||
size: the size of output tensor. If alpha and beta are scalars and given size is, e.g., | |||
`(m, n)`, then the output shape is `(m, n)`. If alpha or beta is a Tensor and given size | |||
is, e.g., `(m, n)`, then the output shape is `(m, n) + broadcast(alpha, beta).shape`. | |||
Returns: | |||
the output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.random as rand | |||
import megengine as mge | |||
import megengine.random as rand | |||
x = rand.beta(alpha=2, beta=1, size=(2, 2)) | |||
print(x.numpy()) | |||
x = rand.beta(alpha=2, beta=1, size=(2, 2)) | |||
print(x.numpy()) | |||
alpha = mge.Tensor([[0.5], | |||
[ 3]], dtype="float32") | |||
beta = mge.Tensor([0.5,5], dtype="float32") | |||
alpha = mge.Tensor([[0.5], | |||
[ 3]], dtype="float32") | |||
beta = mge.Tensor([0.5,5], dtype="float32") | |||
x = rand.beta(alpha=alpha, beta=beta) | |||
print(x.numpy()) | |||
x = rand.beta(alpha=alpha, beta=beta) | |||
print(x.numpy()) | |||
x = rand.beta(alpha=alpha, beta=beta, size=2) | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[0.582565 0.91763186] | |||
[0.86963767 0.6088103 ]] | |||
[[0.41503012 0.16438372] | |||
[0.90159506 0.47588003]] | |||
[[[0.55195075 0.01111084] | |||
[0.95298755 0.25048104]] | |||
[[0.11680304 0.13859665] | |||
[0.997879 0.43259275]]] | |||
x = rand.beta(alpha=alpha, beta=beta, size=2) | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[0.582565 0.91763186] | |||
[0.86963767 0.6088103 ]] | |||
[[0.41503012 0.16438372] | |||
[0.90159506 0.47588003]] | |||
[[[0.55195075 0.01111084] | |||
[0.95298755 0.25048104]] | |||
[[0.11680304 0.13859665] | |||
[0.997879 0.43259275]]] | |||
""" | |||
_seed = self._seed() if callable(self._seed) else self._seed | |||
return _beta(alpha=alpha, beta=beta, size=size, seed=_seed, handle=self._handle) | |||
def poisson(self, lam: Union[float, Tensor], size: Optional[Iterable[int]] = None): | |||
r""" | |||
Random variable with poisson distribution :math:`\operatorname{Poisson}(\lambda)`. | |||
r"""Random variable with poisson distribution :math:`\operatorname{Poisson}(\lambda)`. | |||
The corresponding probability density function is | |||
.. math:: | |||
f(k ; \lambda)=\frac{\lambda^{k} e^{-\lambda}}{k !}, | |||
where k is the number of occurrences :math:`({\displaystyle k=0,1,2...})`. | |||
:param lam: the lambda parameter of the distribution. Must be non-negative. | |||
:param size: the size of output tensor. If lam is a scalar and given size is, e.g., `(m, n)`, | |||
then the output shape is `(m, n)`. If lam is a Tensor with shape `(k, v)` and given | |||
size is, e.g., `(m, n)`, then the output shape is `(m, n, k, v)`. Default: None. | |||
:return: the output tensor. | |||
Args: | |||
lam: the lambda parameter of the distribution. Must be non-negative. | |||
size: the size of output tensor. If lam is a scalar and given size is, e.g., `(m, n)`, | |||
then the output shape is `(m, n)`. If lam is a Tensor with shape `(k, v)` and given | |||
size is, e.g., `(m, n)`, then the output shape is `(m, n, k, v)`. Default: None. | |||
Returns: | |||
the output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.random as rand | |||
import megengine as mge | |||
import megengine.random as rand | |||
x = rand.poisson(lam=2., size=(1, 3)) | |||
print(x.numpy()) | |||
x = rand.poisson(lam=2., size=(1, 3)) | |||
print(x.numpy()) | |||
lam = mge.Tensor([[1.,1.], | |||
[10,10]], dtype="float32") | |||
lam = mge.Tensor([[1.,1.], | |||
[10,10]], dtype="float32") | |||
x = rand.poisson(lam=lam) | |||
print(x.numpy()) | |||
x = rand.poisson(lam=lam) | |||
print(x.numpy()) | |||
x = rand.poisson(lam=lam, size=(1,3)) | |||
print(x.numpy()) | |||
x = rand.poisson(lam=lam, size=(1,3)) | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
Outputs: | |||
[[3. 1. 3.]] | |||
.. testoutput:: | |||
:options: +SKIP | |||
[[ 2. 2.] | |||
[12. 11.]] | |||
[[3. 1. 3.]] | |||
[[[[ 1. 1.] | |||
[11. 4.]] | |||
[[ 0. 0.] | |||
[ 9. 13.]] | |||
[[ 0. 1.] | |||
[ 7. 12.]]]] | |||
[[ 2. 2.] | |||
[12. 11.]] | |||
[[[[ 1. 1.] | |||
[11. 4.]] | |||
[[ 0. 0.] | |||
[ 9. 13.]] | |||
[[ 0. 1.] | |||
[ 7. 12.]]]] | |||
""" | |||
_seed = self._seed() if callable(self._seed) else self._seed | |||
return _poisson(lam=lam, size=size, seed=_seed, handle=self._handle) | |||
def permutation(self, n: int, *, dtype: str = "int32"): | |||
r""" | |||
Generates a random permutation of integers from :math:`0` to :math:`n - 1`. | |||
r"""Generates a random permutation of integers from :math:`0` to :math:`n - 1`. | |||
:param n: the upper bound. Must be larger than 0. | |||
:param dtype: the output data type. int32, int16 and float32 are | |||
supported. Default: int32 | |||
:return: the output tensor. | |||
Args: | |||
n: the upper bound. Must be larger than 0. | |||
dtype: the output data type. int32, int16 and float32 are supported. Default: int32 | |||
Returns: | |||
the output tensor. | |||
Examples: | |||
.. testcode:: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.random as rand | |||
import megengine as mge | |||
import megengine.random as rand | |||
x = rand.permutation(n=10, dtype="int32") | |||
print(x.numpy()) | |||
x = rand.permutation(n=10, dtype="int32") | |||
print(x.numpy()) | |||
x = rand.permutation(n=10, dtype="float32") | |||
print(x.numpy()) | |||
Outputs: | |||
.. testoutput:: | |||
:options: +SKIP | |||
x = rand.permutation(n=10, dtype="float32") | |||
print(x.numpy()) | |||
Outputs: | |||
[4 5 0 7 3 8 6 1 9 2] | |||
[3. 4. 9. 0. 6. 8. 7. 1. 5. 2.] | |||
.. testoutput:: | |||
:options: +SKIP | |||
[4 5 0 7 3 8 6 1 9 2] | |||
[3. 4. 9. 0. 6. 8. 7. 1. 5. 2.] | |||
""" | |||
_seed = self._seed() if callable(self._seed) else self._seed | |||
return _permutation( | |||