@@ -17,7 +17,8 @@ def get_backwarding_grad_manager(): | |||
class GradManager: | |||
r"""GradManager manages auto differentiation and all resources required to perform it. | |||
r""" | |||
GradManager manages auto differentiation and all resources required to perform it. | |||
Our auto differentiation framework requires that the user explicitly indicates when | |||
the forward operations start and when all resources should be released. A typical usage of | |||
@@ -71,7 +72,8 @@ class GradManager: | |||
self._gradients = dict() | |||
def attach(self, params: list, callbacks=None): | |||
r"""Registers parameters that gradients should be calculated with respect to. | |||
r""" | |||
Registers parameters that gradients should be calculated with respect to. | |||
Callback Functions should have a signature like this: | |||
.. code-block:: | |||
@@ -99,7 +101,8 @@ class GradManager: | |||
return self | |||
def detach(self, params: list): | |||
r"""Remove specific registered parameters and callback functions. | |||
r""" | |||
Remove specific registered parameters and callback functions. | |||
:param params: registered parameters | |||
""" | |||
@@ -125,7 +128,8 @@ class GradManager: | |||
return self | |||
def backward(self, ys, dys=None): | |||
r"""Performs back-propagation and computes gradients. | |||
r""" | |||
Performs back-propagation and computes gradients. | |||
:param ys: outputs of forward operators, e.g., the loss tensor | |||
:param dys: derivatives of ys | |||
@@ -165,7 +169,8 @@ class GradManager: | |||
backwarding_grad_manager = cache | |||
def record(self): | |||
r"""Starts recording forward operations. | |||
r""" | |||
Starts recording forward operations. | |||
""" | |||
if self._recording: | |||
raise RuntimeError("already recording") | |||
@@ -190,7 +195,8 @@ class GradManager: | |||
self._grad.wrt(param_wrapper, callback=callback) | |||
def release(self): | |||
r"""Stops recording and releases resources for gradients calculation. | |||
r""" | |||
Stops recording and releases resources for gradients calculation. | |||
""" | |||
if self._grad is not None: | |||
self._grad.__exit__(None, None, None) | |||
@@ -15,7 +15,8 @@ if os.environ.get("MEGENGINE_USE_SYMBOLIC_SHAPE"): | |||
def use_symbolic_shape() -> bool: | |||
"""Returns whether tensor.shape returns a tensor instead of a tuple | |||
""" | |||
Returns whether tensor.shape returns a tensor instead of a tuple | |||
""" | |||
return _use_symbolic_shape | |||
@@ -78,7 +78,8 @@ class auto: | |||
class _EnumDict(dict): | |||
"""Track enum member order and ensure member names are not reused. | |||
""" | |||
Track enum member order and ensure member names are not reused. | |||
EnumMeta will use the names found in self._member_names as the | |||
enumeration member names. | |||
@@ -91,7 +92,8 @@ class _EnumDict(dict): | |||
self._last_values = [] | |||
def __setitem__(self, key, value): | |||
"""Changes anything not dundered or not a descriptor. | |||
""" | |||
Changes anything not dundered or not a descriptor. | |||
If an enum member name is used twice, an error is raised; duplicate | |||
values are not checked for. | |||
@@ -303,7 +305,8 @@ class EnumMeta(type): | |||
def __call__( | |||
cls, value, names=None, *, module=None, qualname=None, type=None, start=1 | |||
): | |||
"""Either returns an existing member, or creates a new enum class. | |||
""" | |||
Either returns an existing member, or creates a new enum class. | |||
This method is used both when an enum class is given a value to match | |||
to an enumeration member (i.e. Color(3)) and for the functional API | |||
@@ -353,7 +356,8 @@ class EnumMeta(type): | |||
] + self._member_names_ | |||
def __getattr__(cls, name): | |||
"""Return the enum member matching `name` | |||
""" | |||
Return the enum member matching `name` | |||
We use __getattr__ instead of descriptors or inserting into the enum | |||
class' __dict__ in order to support `name` and `value` being both | |||
@@ -379,7 +383,8 @@ class EnumMeta(type): | |||
@property | |||
def __members__(cls): | |||
"""Returns a mapping of member name->value. | |||
""" | |||
Returns a mapping of member name->value. | |||
This mapping lists all enum members, including aliases. Note that this | |||
is a read-only view of the internal mapping. | |||
@@ -394,7 +399,8 @@ class EnumMeta(type): | |||
return (cls._member_map_[name] for name in reversed(cls._member_names_)) | |||
def __setattr__(cls, name, value): | |||
"""Block attempts to reassign Enum members. | |||
""" | |||
Block attempts to reassign Enum members. | |||
A simple assignment to the class namespace only changes one of the | |||
several possible ways to get an Enum member from the Enum class, | |||
@@ -409,7 +415,8 @@ class EnumMeta(type): | |||
def _create_( | |||
cls, class_name, names=None, *, module=None, qualname=None, type=None, start=1 | |||
): | |||
"""Convenience method to create a new Enum class. | |||
""" | |||
Convenience method to create a new Enum class. | |||
`names` can be: | |||
@@ -465,7 +472,8 @@ class EnumMeta(type): | |||
@staticmethod | |||
def _get_mixins_(bases): | |||
"""Returns the type for creating enum members, and the first inherited | |||
""" | |||
Returns the type for creating enum members, and the first inherited | |||
enum class. | |||
bases: the tuple of bases that was given to __new__ | |||
@@ -510,7 +518,8 @@ class EnumMeta(type): | |||
@staticmethod | |||
def _find_new_(classdict, member_type, first_enum): | |||
"""Returns the __new__ to be used for creating the enum members. | |||
""" | |||
Returns the __new__ to be used for creating the enum members. | |||
classdict: the class dictionary given to __new__ | |||
member_type: the data type whose __new__ will be used by default | |||
@@ -556,7 +565,8 @@ class EnumMeta(type): | |||
class Enum(metaclass=EnumMeta): | |||
"""Generic enumeration. | |||
""" | |||
Generic enumeration. | |||
Derive from this class to define new enumerations. | |||
@@ -188,7 +188,8 @@ class OpNode: | |||
def optimize_for_inference(dest_vars, **kwargs): | |||
r"""Applies optimize_for_inference pass for computing graph. | |||
r""" | |||
Applies optimize_for_inference pass for computing graph. | |||
:param dest_vars: list of output vars in the computing graph | |||
@@ -287,7 +288,8 @@ def dump_graph( | |||
strip_info_file=None, | |||
append_json=False | |||
): | |||
"""serialize the computing graph of `output_vars` and get byte result. | |||
""" | |||
serialize the computing graph of `output_vars` and get byte result. | |||
:param output_vars: output variables which are the graph's end point. | |||
@@ -385,7 +387,8 @@ CompGraphLoadResult = collections.namedtuple( | |||
def load_graph(fpath): | |||
"""Load a serialized computing graph from file. | |||
""" | |||
Load a serialized computing graph from file. | |||
:param fpath: Path or Handle of the input file | |||
:return: An instance of namedtuple :class:`CompGraphLoadResult`, | |||
@@ -69,7 +69,8 @@ def ambiguity_warn(dispatcher, ambiguities): | |||
def variadic_signature_matches_iter(types, full_signature): | |||
"""Check if a set of input types matches a variadic signature. | |||
""" | |||
Check if a set of input types matches a variadic signature. | |||
Notes | |||
----- | |||
@@ -288,7 +289,8 @@ class Dispatcher(CDispatcher): | |||
__repr__ = __str__ | |||
def dispatch(self, *types): | |||
"""Deterimine appropriate implementation for this type signature | |||
""" | |||
Deterimine appropriate implementation for this type signature | |||
This method is internal. Users should call this object as a function. | |||
Implementation resolution occurs within the ``__call__`` method. | |||
@@ -110,7 +110,8 @@ def _toposort(edges): | |||
def reverse_dict(d): | |||
"""Reverses direction of dependence dict | |||
""" | |||
Reverses direction of dependence dict | |||
>>> d = {'a': (1, 2), 'b': (2, 3), 'c':()} | |||
>>> reverse_dict(d) # doctest: +SKIP | |||
@@ -156,7 +157,8 @@ def groupby(func, seq): | |||
def typename(type): | |||
"""Get the name of `type`. | |||
""" | |||
Get the name of `type`. | |||
Parameters | |||
---------- | |||
@@ -72,7 +72,8 @@ class VariadicSignatureType(type): | |||
def isvariadic(obj): | |||
"""Check whether the type `obj` is variadic. | |||
""" | |||
Check whether the type `obj` is variadic. | |||
Parameters | |||
---------- | |||
@@ -95,7 +96,8 @@ def isvariadic(obj): | |||
class VariadicSignatureMeta(type): | |||
"""A metaclass that overrides ``__getitem__`` on the class. This is used to | |||
""" | |||
A metaclass that overrides ``__getitem__`` on the class. This is used to | |||
generate a new type for Variadic signatures. See the Variadic class for | |||
examples of how this behaves. | |||
""" | |||
@@ -117,7 +119,8 @@ class VariadicSignatureMeta(type): | |||
class Variadic(metaclass=VariadicSignatureMeta): | |||
"""A class whose getitem method can be used to generate a new type | |||
""" | |||
A class whose getitem method can be used to generate a new type | |||
representing a specific variadic signature. | |||
Examples | |||
@@ -389,7 +389,8 @@ class ArrayMethodMixin(abc.ABC): | |||
return self.reshape(-1) | |||
def sum(self, axis=None, keepdims: bool = False): | |||
r"""Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
r""" | |||
Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||
If ``axis`` is a list of axises, reduce over all of them. | |||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`). | |||
@@ -59,7 +59,8 @@ class _PlasmaStoreManager: | |||
class PlasmaShmQueue: | |||
def __init__(self, maxsize: int = 0): | |||
r"""Use pyarrow in-memory plasma store to implement shared memory queue. | |||
r""" | |||
Use pyarrow in-memory plasma store to implement shared memory queue. | |||
Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle | |||
and communication overhead, leading to better performance in multi-process | |||
@@ -42,7 +42,8 @@ class DataLoader: | |||
timeout: int = 0, | |||
divide: bool = False, | |||
): | |||
r"""Provides a convenient way to iterate on a given dataset. | |||
r""" | |||
Provides a convenient way to iterate on a given dataset. | |||
`DataLoader` combines a dataset with `sampler`, `transform` and `collator`, | |||
make it flexible to get minibatch continually from a dataset. | |||
@@ -23,7 +23,8 @@ from .meta_vision import VisionDataset | |||
class Cityscapes(VisionDataset): | |||
r"""`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||
r""" | |||
`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||
""" | |||
supported_order = ( | |||
@@ -46,7 +46,8 @@ def has_valid_annotation(anno, order): | |||
class COCO(VisionDataset): | |||
r"""`MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||
r""" | |||
`MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||
""" | |||
supported_order = ( | |||
@@ -23,7 +23,8 @@ from .meta_vision import VisionDataset | |||
class Objects365(VisionDataset): | |||
r"""`Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||
r""" | |||
`Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||
""" | |||
supported_order = ( | |||
@@ -24,7 +24,8 @@ from .meta_vision import VisionDataset | |||
class PascalVOC(VisionDataset): | |||
r"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||
r""" | |||
`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||
""" | |||
supported_order = ( | |||
@@ -154,7 +154,8 @@ class VisionTransform(Transform): | |||
class ToMode(VisionTransform): | |||
r"""Change input data to a target mode. | |||
r""" | |||
Change input data to a target mode. | |||
For example, most transforms use HWC mode image, | |||
while the neural network might use CHW mode input tensor. | |||
@@ -301,7 +302,8 @@ class TorchTransformCompose(VisionTransform): | |||
class Pad(VisionTransform): | |||
r"""Pad the input data. | |||
r""" | |||
Pad the input data. | |||
:param size: padding size of input image, it could be integer or sequence. | |||
If it is an integer, the input image will be padded in four directions. | |||
@@ -348,7 +350,8 @@ class Pad(VisionTransform): | |||
class Resize(VisionTransform): | |||
r"""Resize the input data. | |||
r""" | |||
Resize the input data. | |||
:param output_size: target size of image, with (height, width) shape. | |||
:param interpolation: interpolation method. All methods are listed below: | |||
@@ -474,7 +477,8 @@ class ShortestEdgeResize(VisionTransform): | |||
class RandomResize(VisionTransform): | |||
r"""Resize the input data randomly. | |||
r""" | |||
Resize the input data randomly. | |||
:param scale_range: range of scaling. | |||
:param order: the same with :class:`VisionTransform`. | |||
@@ -518,7 +522,8 @@ class RandomResize(VisionTransform): | |||
class RandomCrop(VisionTransform): | |||
r"""Crop the input data randomly. Before applying the crop transform, | |||
r""" | |||
Crop the input data randomly. Before applying the crop transform, | |||
pad the image first. If target size is still bigger than the size of | |||
padded image, pad the image size to target size. | |||
@@ -575,7 +580,8 @@ class RandomCrop(VisionTransform): | |||
class RandomResizedCrop(VisionTransform): | |||
r"""Crop the input data to random size and aspect ratio. | |||
r""" | |||
Crop the input data to random size and aspect ratio. | |||
A crop of random size (default: of 0.08 to 1.0) of the original size and a random | |||
aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | |||
After applying crop transfrom, the input data will be resized to given size. | |||
@@ -664,7 +670,8 @@ class RandomResizedCrop(VisionTransform): | |||
class CenterCrop(VisionTransform): | |||
r"""Crops the given the input data at the center. | |||
r""" | |||
Crops the given the input data at the center. | |||
:param output_size: target size of output image, with (height, width) shape. | |||
:param order: the same with :class:`VisionTransform`. | |||
@@ -707,7 +714,8 @@ class CenterCrop(VisionTransform): | |||
class RandomHorizontalFlip(VisionTransform): | |||
r"""Horizontally flip the input data randomly with a given probability. | |||
r""" | |||
Horizontally flip the input data randomly with a given probability. | |||
:param p: probability of the input data being flipped. Default: 0.5 | |||
:param order: the same with :class:`VisionTransform`. | |||
@@ -739,7 +747,8 @@ class RandomHorizontalFlip(VisionTransform): | |||
class RandomVerticalFlip(VisionTransform): | |||
r"""Vertically flip the input data randomly with a given probability. | |||
r""" | |||
Vertically flip the input data randomly with a given probability. | |||
:param p: probability of the input data being flipped. Default: 0.5 | |||
:param order: the same with :class:`VisionTransform`. | |||
@@ -771,7 +780,8 @@ class RandomVerticalFlip(VisionTransform): | |||
class Normalize(VisionTransform): | |||
r"""Normalize the input data with mean and standard deviation. | |||
r""" | |||
Normalize the input data with mean and standard deviation. | |||
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, | |||
this transform will normalize each channel of the input data. | |||
``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | |||
@@ -797,7 +807,8 @@ class Normalize(VisionTransform): | |||
class GaussianNoise(VisionTransform): | |||
r"""Add random gaussian noise to the input data. | |||
r""" | |||
Add random gaussian noise to the input data. | |||
Gaussian noise is generated with given mean and std. | |||
:param mean: Gaussian mean used to generate noise. | |||
@@ -824,7 +835,8 @@ class GaussianNoise(VisionTransform): | |||
class BrightnessTransform(VisionTransform): | |||
r"""Adjust brightness of the input data. | |||
r""" | |||
Adjust brightness of the input data. | |||
:param value: how much to adjust the brightness. Can be any | |||
non negative number. 0 gives the original image. | |||
@@ -855,7 +867,8 @@ class BrightnessTransform(VisionTransform): | |||
class ContrastTransform(VisionTransform): | |||
r"""Adjust contrast of the input data. | |||
r""" | |||
Adjust contrast of the input data. | |||
:param value: how much to adjust the contrast. Can be any | |||
non negative number. 0 gives the original image. | |||
@@ -886,7 +899,8 @@ class ContrastTransform(VisionTransform): | |||
class SaturationTransform(VisionTransform): | |||
r"""Adjust saturation of the input data. | |||
r""" | |||
Adjust saturation of the input data. | |||
:param value: how much to adjust the saturation. Can be any | |||
non negative number. 0 gives the original image. | |||
@@ -917,7 +931,8 @@ class SaturationTransform(VisionTransform): | |||
class HueTransform(VisionTransform): | |||
r"""Adjust hue of the input data. | |||
r""" | |||
Adjust hue of the input data. | |||
:param value: how much to adjust the hue. Can be any number | |||
between 0 and 0.5, 0 gives the original image. | |||
@@ -955,7 +970,8 @@ class HueTransform(VisionTransform): | |||
class ColorJitter(VisionTransform): | |||
r"""Randomly change the brightness, contrast, saturation and hue of an image. | |||
r""" | |||
Randomly change the brightness, contrast, saturation and hue of an image. | |||
:param brightness: how much to jitter brightness. | |||
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | |||
@@ -40,7 +40,8 @@ def _str2device_type(type_str: str, allow_unspec: bool = True): | |||
def get_device_count(device_type: str) -> int: | |||
"""Gets number of devices installed on this system. | |||
""" | |||
Gets number of devices installed on this system. | |||
:param device_type: device type, one of 'gpu' or 'cpu' | |||
""" | |||
@@ -54,7 +55,8 @@ def get_device_count(device_type: str) -> int: | |||
def is_cuda_available() -> bool: | |||
"""Returns whether cuda device is available on this system. | |||
""" | |||
Returns whether cuda device is available on this system. | |||
""" | |||
t = _str2device_type("gpu") | |||
@@ -62,7 +64,8 @@ def is_cuda_available() -> bool: | |||
def set_default_device(device: str = "xpux"): | |||
r"""Sets default computing node. | |||
r""" | |||
Sets default computing node. | |||
:param device: default device type. The type can be 'cpu0', 'cpu1', etc., | |||
or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | |||
@@ -81,7 +84,8 @@ def set_default_device(device: str = "xpux"): | |||
def get_default_device() -> str: | |||
r"""Gets default computing node. | |||
r""" | |||
Gets default computing node. | |||
It returns the value set by :func:`~.set_default_device`. | |||
""" | |||
@@ -98,7 +102,8 @@ def set_prealloc_config( | |||
growth_factor=2.0, | |||
device_type=DeviceType.CUDA, | |||
): | |||
"""Specifies how to pre-allocate from raw device allocator. | |||
""" | |||
Specifies how to pre-allocate from raw device allocator. | |||
:param alignment: specifies the alignment in bytes. | |||
:param min_req: min request size in bytes. | |||
@@ -123,7 +123,8 @@ def collective_comm(inp, mode, group, device): | |||
def reduce_sum( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create reduce_sum operator for collective communication. | |||
""" | |||
Create reduce_sum operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -136,7 +137,8 @@ def reduce_sum( | |||
def broadcast( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create broadcast operator for collective communication. | |||
""" | |||
Create broadcast operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -149,7 +151,8 @@ def broadcast( | |||
def all_gather( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create all_gather operator for collective communication. | |||
""" | |||
Create all_gather operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -162,7 +165,8 @@ def all_gather( | |||
def reduce_scatter_sum( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create reduce_scatter_sum operator for collective communication. | |||
""" | |||
Create reduce_scatter_sum operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -175,7 +179,8 @@ def reduce_scatter_sum( | |||
def all_reduce_sum( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create all_reduce_sum operator for collective communication. | |||
""" | |||
Create all_reduce_sum operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -188,7 +193,8 @@ def all_reduce_sum( | |||
def all_reduce_max( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create all_reduce_max operator for collective communication. | |||
""" | |||
Create all_reduce_max operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -201,7 +207,8 @@ def all_reduce_max( | |||
def all_reduce_min( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create all_reduce_min operator for collective communication. | |||
""" | |||
Create all_reduce_min operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -214,7 +221,8 @@ def all_reduce_min( | |||
def gather( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create gather operator for collective communication. | |||
""" | |||
Create gather operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -227,7 +235,8 @@ def gather( | |||
def scatter( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create scatter operator for collective communication. | |||
""" | |||
Create scatter operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -240,7 +249,8 @@ def scatter( | |||
def all_to_all( | |||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | |||
) -> Tensor: | |||
"""Create all_to_all operator for collective communication. | |||
""" | |||
Create all_to_all operator for collective communication. | |||
:param inp: input tensor. | |||
:param group: communication group. | |||
@@ -251,7 +261,8 @@ def all_to_all( | |||
def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | |||
"""Send a Tensor to a remote process. | |||
""" | |||
Send a Tensor to a remote process. | |||
:param inp: tensor to send. | |||
:param dest_rank: destination process rank. | |||
@@ -266,7 +277,8 @@ def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | |||
def remote_recv( | |||
src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None | |||
) -> Tensor: | |||
"""Receive a Tensor from a remote process. | |||
""" | |||
Receive a Tensor from a remote process. | |||
:param src_rank: source process rank. | |||
:param shape: the shape of the tensor to receive. | |||
@@ -81,7 +81,8 @@ def init_process_group( | |||
device: int, | |||
backend: Optional[str] = "nccl", | |||
) -> None: | |||
"""Initialize the distributed process group and specify the device used in the current process | |||
""" | |||
Initialize the distributed process group and specify the device used in the current process | |||
:param master_ip: ip address of the master node. | |||
:param port: port available for all processes to communicate. | |||
@@ -140,7 +140,8 @@ class TensorFuture(Future): | |||
def synchronized(func: Callable): | |||
"""Decorator. Decorated function will synchronize when finished. | |||
""" | |||
Decorator. Decorated function will synchronize when finished. | |||
Specifically, we use this to prevent data race during hub.load""" | |||
@functools.wraps(func) | |||
@@ -161,7 +162,8 @@ def _get_device_count_worker(queue, device_type): | |||
def get_device_count_by_fork(device_type: str): | |||
"""Get device count in fork thread. | |||
""" | |||
Get device count in fork thread. | |||
See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork | |||
for more information. | |||
""" | |||
@@ -173,7 +175,8 @@ def get_device_count_by_fork(device_type: str): | |||
def bcast_list_(inps: list, group: Group = WORLD): | |||
"""Broadcast tensors between given group. | |||
""" | |||
Broadcast tensors between given group. | |||
:param inps: input tensors. | |||
:param group: communication group. | |||
@@ -183,7 +186,8 @@ def bcast_list_(inps: list, group: Group = WORLD): | |||
class AllreduceCallback: | |||
"""Allreduce Callback with tensor fusion optimization. | |||
""" | |||
Allreduce Callback with tensor fusion optimization. | |||
:param reduce_method: the method to reduce gradiants. | |||
:param group: communication group. | |||
@@ -21,7 +21,8 @@ from .util import get_free_ports | |||
class Methods: | |||
"""Distributed Server Method. | |||
""" | |||
Distributed Server Method. | |||
Used for exchange information between distributed nodes. | |||
:param mm_server_port: multiple machine rpc server port. | |||
@@ -45,7 +46,8 @@ class Methods: | |||
return self.mm_server_port | |||
def set_is_grad(self, key, is_grad): | |||
"""Mark send/recv need gradiants by key. | |||
""" | |||
Mark send/recv need gradiants by key. | |||
:param key: key to match send/recv op. | |||
:param is_grad: whether this op need grad. | |||
@@ -56,7 +58,8 @@ class Methods: | |||
return True | |||
def check_is_grad(self, key): | |||
"""Check whether send/recv need gradiants. | |||
""" | |||
Check whether send/recv need gradiants. | |||
:param key: key to match send/recv op. | |||
""" | |||
@@ -68,7 +71,8 @@ class Methods: | |||
return ret | |||
def set_remote_tracer(self, key, tracer_set): | |||
"""Set tracer dict for tracing send/recv op. | |||
""" | |||
Set tracer dict for tracing send/recv op. | |||
:param key: key to match send/recv op. | |||
:param tracer_set: valid tracer set. | |||
@@ -79,7 +83,8 @@ class Methods: | |||
return True | |||
def check_remote_tracer(self, key): | |||
"""Get tracer dict for send/recv op. | |||
""" | |||
Get tracer dict for send/recv op. | |||
:param key: key to match send/recv op. | |||
""" | |||
@@ -91,7 +96,8 @@ class Methods: | |||
return ret | |||
def group_barrier(self, key, size): | |||
"""A barrier wait for all group member. | |||
""" | |||
A barrier wait for all group member. | |||
:param key: group key to match each other. | |||
:param size: group size. | |||
@@ -114,7 +120,8 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): | |||
def start_server(py_server_port, mm_server_port): | |||
"""Start python distributed server and multiple machine server. | |||
""" | |||
Start python distributed server and multiple machine server. | |||
:param py_server_port: python server port. | |||
:param mm_server_port: multiple machine server port. | |||
@@ -125,7 +132,8 @@ def start_server(py_server_port, mm_server_port): | |||
class Server: | |||
"""Distributed Server for distributed training. | |||
""" | |||
Distributed Server for distributed training. | |||
Should be running at master node. | |||
:param port: python server port. | |||
@@ -143,7 +151,8 @@ class Server: | |||
class Client: | |||
"""Distributed Client for distributed training. | |||
""" | |||
Distributed Client for distributed training. | |||
:param master_ip: ip address of master node. | |||
:param port: port of server at master node. | |||
@@ -171,7 +180,8 @@ class Client: | |||
return self.proxy.get_mm_server_port() | |||
def set_is_grad(self, key, is_grad): | |||
"""Mark send/recv need gradiants by key. | |||
""" | |||
Mark send/recv need gradiants by key. | |||
:param key: key to match send/recv op. | |||
:param is_grad: whether this op need grad. | |||
@@ -179,14 +189,16 @@ class Client: | |||
self.proxy.set_is_grad(key, is_grad) | |||
def check_is_grad(self, key): | |||
"""Check whether send/recv need gradiants. | |||
""" | |||
Check whether send/recv need gradiants. | |||
:param key: key to match send/recv op. | |||
""" | |||
return self.proxy.check_is_grad(key) | |||
def set_remote_tracer(self, key, tracer_set): | |||
"""Set tracer dict for tracing send/recv op. | |||
""" | |||
Set tracer dict for tracing send/recv op. | |||
:param key: key to match send/recv op. | |||
:param tracer_set: valid tracer set. | |||
@@ -194,14 +206,16 @@ class Client: | |||
self.proxy.set_remote_tracer(key, tracer_set) | |||
def check_remote_tracer(self, key): | |||
"""Get tracer dict for send/recv op. | |||
""" | |||
Get tracer dict for send/recv op. | |||
:param key: key to match send/recv op. | |||
""" | |||
return self.proxy.check_remote_tracer(key) | |||
def group_barrier(self, key, size): | |||
"""A barrier wait for all group member. | |||
""" | |||
A barrier wait for all group member. | |||
:param key: group key to match each other. | |||
:param size: group size. | |||
@@ -12,7 +12,8 @@ from typing import List | |||
def get_free_ports(num: int) -> List[int]: | |||
"""Get one or more free ports. | |||
""" | |||
Get one or more free ports. | |||
""" | |||
socks, ports = [], [] | |||
for i in range(num): | |||
@@ -12,7 +12,8 @@ _conv_execution_strategy = os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY", "HEURI | |||
def get_conv_execution_strategy() -> str: | |||
"""Returns the execuation strategy of :class:`~.Conv2d`. | |||
""" | |||
Returns the execuation strategy of :class:`~.Conv2d`. | |||
See :func:`~.set_conv_execution_strategy` for possible return values | |||
""" | |||
@@ -20,7 +21,8 @@ def get_conv_execution_strategy() -> str: | |||
def set_conv_execution_strategy(option: str): | |||
"""Sets the execuation strategy of :class:`~.Conv2d`. | |||
""" | |||
Sets the execuation strategy of :class:`~.Conv2d`. | |||
:param option: Decides how :class:`~.Conv2d` algorithm is chosen. | |||
Available values: | |||
@@ -100,7 +100,8 @@ def _elemwise_multi_type(*args, mode, **kwargs): | |||
def add(x, y): | |||
"""Element-wise `addition`. | |||
""" | |||
Element-wise `addition`. | |||
At least one operand should be tensor. | |||
Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | |||
@@ -193,7 +194,8 @@ def log1p(x): | |||
def sqrt(x: Tensor) -> Tensor: | |||
"""Element-wise `sqrt`. | |||
""" | |||
Element-wise `sqrt`. | |||
Returns ``NaN`` for negative input value. | |||
:param x: input tensor. | |||
@@ -209,7 +211,7 @@ def sqrt(x: Tensor) -> Tensor: | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.sqrt(x) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -239,7 +241,7 @@ def square(x: Tensor) -> Tensor: | |||
data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.square(data) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -281,7 +283,8 @@ def minimum(x, y): | |||
def cos(x): | |||
"""Element-wise `cosine`. | |||
""" | |||
Element-wise `cosine`. | |||
:param x: input tensor. | |||
:return: computed tensor. | |||
@@ -296,7 +299,7 @@ def cos(x): | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | |||
out = F.cos(x) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -374,7 +377,8 @@ def atanh(x): | |||
def left_shift(x, y): | |||
"""Element-wise `bitwise binary: x << y`. | |||
""" | |||
Element-wise `bitwise binary: x << y`. | |||
:param x: input tensor, should be int. | |||
:param y: how many bits to be left-shifted. | |||
@@ -435,7 +439,8 @@ def logical_xor(x, y): | |||
def equal(x, y): | |||
"""Element-wise `(x == y)`. | |||
""" | |||
Element-wise `(x == y)`. | |||
:param x: input tensor 1. | |||
:param y: input tensor 2. | |||
@@ -494,7 +499,8 @@ def greater_equal(x, y): | |||
def hswish(x): | |||
"""Element-wise `x * relu6(x + 3) / 6`. | |||
""" | |||
Element-wise `x * relu6(x + 3) / 6`. | |||
:param x: input tensor. | |||
:return: computed tensor. | |||
@@ -509,7 +515,7 @@ def hswish(x): | |||
x = tensor(np.arange(5).astype(np.float32)) | |||
out = F.hswish(x) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
.. testoutput:: | |||
@@ -540,7 +546,8 @@ def sigmoid(x): | |||
def clip(x: Tensor, lower=None, upper=None) -> Tensor: | |||
r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||
r""" | |||
Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||
a resulting tensor: | |||
.. math:: | |||
@@ -24,7 +24,8 @@ __all__ = [ | |||
def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | |||
r"""Calculates the mean absolute error (MAE) between | |||
r""" | |||
Calculates the mean absolute error (MAE) between | |||
each element in the pred :math:`x` and label :math:`y`. | |||
The mean absolute error can be described as: | |||
@@ -70,7 +71,8 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | |||
def square_loss(pred: Tensor, label: Tensor) -> Tensor: | |||
r"""Calculates the mean squared error (squared L2 norm) between | |||
r""" | |||
Calculates the mean squared error (squared L2 norm) between | |||
each element in the pred :math:`x` and label :math:`y`. | |||
The mean squared error can be described as: | |||
@@ -127,7 +129,8 @@ def cross_entropy( | |||
with_logits: bool = True, | |||
label_smooth: float = 0, | |||
) -> Tensor: | |||
r"""Computes the multi-class cross entropy loss (using logits by default). | |||
r""" | |||
Computes the multi-class cross entropy loss (using logits by default). | |||
By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
class probabilities are given by softmax. | |||
@@ -161,7 +164,7 @@ def cross_entropy( | |||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||
label = tensor(np.ones(label_shape, dtype=np.int32)) | |||
loss = F.nn.cross_entropy(pred, label) | |||
print(loss.numpy()) | |||
print(loss.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -195,7 +198,8 @@ def cross_entropy( | |||
def binary_cross_entropy( | |||
pred: Tensor, label: Tensor, with_logits: bool = True | |||
) -> Tensor: | |||
r"""Computes the binary cross entropy loss (using logits by default). | |||
r""" | |||
Computes the binary cross entropy loss (using logits by default). | |||
By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | |||
class probabilities are given by sigmoid. | |||
@@ -216,7 +220,7 @@ def binary_cross_entropy( | |||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||
label = tensor(np.ones((1, 2), dtype=np.float32)) | |||
loss = F.nn.binary_cross_entropy(pred, label) | |||
print(loss.numpy()) | |||
print(loss.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -233,7 +237,8 @@ def binary_cross_entropy( | |||
def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | |||
r"""Caculates the hinge loss which is often used in SVM. | |||
r""" | |||
Caculates the hinge loss which is often used in SVM. | |||
The hinge loss can be described as: | |||
@@ -43,7 +43,8 @@ __all__ = [ | |||
def isnan(inp: Tensor) -> Tensor: | |||
r"""Returns a new tensor representing if each element is ``NaN`` or not. | |||
r""" | |||
Returns a new tensor representing if each element is ``NaN`` or not. | |||
:param inp: input tensor. | |||
:return: result tensor. | |||
@@ -69,7 +70,8 @@ def isnan(inp: Tensor) -> Tensor: | |||
def isinf(inp: Tensor) -> Tensor: | |||
r"""Returns a new tensor representing if each element is ``Inf`` or not. | |||
r""" | |||
Returns a new tensor representing if each element is ``Inf`` or not. | |||
:param inp: input tensor. | |||
:return: result tensor. | |||
@@ -95,7 +97,8 @@ def isinf(inp: Tensor) -> Tensor: | |||
def sign(inp: Tensor): | |||
r"""Returns a new tensor representing the sign of each element in input tensor. | |||
r""" | |||
Returns a new tensor representing the sign of each element in input tensor. | |||
:param: input tensor. | |||
:return: the sign of input tensor. | |||
@@ -125,7 +128,8 @@ def sum( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
r"""Returns the sum of input tensor along given axis. If axis is a list of dimensions, | |||
r""" | |||
Returns the sum of input tensor along given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
:param inp: input tensor. | |||
@@ -160,7 +164,8 @@ def sum( | |||
def prod( | |||
inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None, keepdims=False | |||
) -> Tensor: | |||
r"""Returns the product of input tensor along given axis. If axis is a list of dimensions, | |||
r""" | |||
Returns the product of input tensor along given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
:param inp: input tensor. | |||
@@ -195,7 +200,8 @@ def mean( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
"""Returns the mean value of input tensor along | |||
""" | |||
Returns the mean value of input tensor along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -231,7 +237,8 @@ def var( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
"""Returns the variance value of input tensor along | |||
""" | |||
Returns the variance value of input tensor along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -250,7 +257,7 @@ def var( | |||
data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | |||
out = F.var(data) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -271,7 +278,8 @@ def std( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
"""Returns the standard deviation of input tensor along | |||
""" | |||
Returns the standard deviation of input tensor along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -290,7 +298,7 @@ def std( | |||
data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | |||
out = F.std(data, axis=1) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -306,7 +314,8 @@ def min( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
r"""Returns the min value of input tensor along | |||
r""" | |||
Returns the min value of input tensor along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -342,7 +351,8 @@ def max( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
r"""Returns the max value of the input tensor along | |||
r""" | |||
Returns the max value of the input tensor along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -376,7 +386,8 @@ def max( | |||
def norm( | |||
inp: Tensor, ord: float = None, axis: int = None, keepdims=False, | |||
): | |||
"""Calculates ``p``-norm of input tensor along | |||
""" | |||
Calculates ``p``-norm of input tensor along | |||
given axis. | |||
:param inp: input tensor. | |||
@@ -395,7 +406,7 @@ def norm( | |||
x = tensor(np.arange(-3, 3, dtype=np.float32)) | |||
out = F.norm(x) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -423,7 +434,8 @@ def argmin( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
r"""Returns the indices of the minimum values along | |||
r""" | |||
Returns the indices of the minimum values along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -481,7 +493,8 @@ def argmax( | |||
axis: Optional[Union[int, Sequence[int]]] = None, | |||
keepdims: bool = False, | |||
) -> Tensor: | |||
r"""Returns the indices of the maximum values along | |||
r""" | |||
Returns the indices of the maximum values along | |||
given axis. If axis is a list of dimensions, | |||
reduce over all of them. | |||
@@ -537,7 +550,8 @@ def argmax( | |||
def normalize( | |||
inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12, | |||
) -> Tensor: | |||
r"""Performs :math:`L_p` normalization of input tensor along | |||
r""" | |||
Performs :math:`L_p` normalization of input tensor along | |||
given axis. | |||
For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each | |||
@@ -559,7 +573,8 @@ def normalize( | |||
def argsort(inp: Tensor, descending: bool = False) -> Tensor: | |||
r"""Returns the indices that would sort the input tensor. | |||
r""" | |||
Returns the indices that would sort the input tensor. | |||
:param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor. | |||
:param descending: sort in descending order, where the largest comes first. Default: False | |||
@@ -600,7 +615,8 @@ def argsort(inp: Tensor, descending: bool = False) -> Tensor: | |||
def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: | |||
r"""Returns sorted tensor and the indices would sort the input tensor. | |||
r""" | |||
Returns sorted tensor and the indices would sort the input tensor. | |||
:param inp: input tensor. If it's 2d, the result would be sorted by row. | |||
:param descending: sort in descending order, where the largest comes first. Default: False | |||
@@ -647,7 +663,8 @@ def topk( | |||
kth_only: bool = False, | |||
no_sort: bool = False, | |||
) -> Tuple[Tensor, Tensor]: | |||
r"""Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||
r""" | |||
Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||
:param inp: input tensor. If input tensor is 2d, each row will be sorted. | |||
:param k: number of elements needed. | |||
@@ -75,7 +75,8 @@ def expand_hw(x): | |||
def linear(inp: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor: | |||
"""Applies a linear transformation to the input tensor. | |||
""" | |||
Applies a linear transformation to the input tensor. | |||
Refer to :class:`~.module.linear.Linear` for more information. | |||
@@ -101,7 +102,8 @@ def conv2d( | |||
conv_mode="CROSS_CORRELATION", | |||
compute_mode="DEFAULT", | |||
) -> Tensor: | |||
"""2D convolution operation. | |||
""" | |||
2D convolution operation. | |||
Refer to :class:`~.Conv2d` for more information. | |||
@@ -166,7 +168,8 @@ def conv_transpose2d( | |||
conv_mode="CROSS_CORRELATION", | |||
compute_mode="DEFAULT", | |||
) -> Tensor: | |||
"""2D transposed convolution operation. | |||
""" | |||
2D transposed convolution operation. | |||
Refer to :class:`~.ConvTranspose2d` for more information. | |||
@@ -227,7 +230,8 @@ def local_conv2d( | |||
dilation: Union[int, Tuple[int, int]] = 1, | |||
conv_mode="CROSS_CORRELATION", | |||
): | |||
"""Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||
""" | |||
Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||
""" | |||
assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" | |||
@@ -261,7 +265,8 @@ def max_pool2d( | |||
stride: Optional[Union[int, Tuple[int, int]]] = None, | |||
padding: Union[int, Tuple[int, int]] = 0, | |||
) -> Tensor: | |||
"""Applies a 2D max pooling over an input tensor. | |||
""" | |||
Applies a 2D max pooling over an input tensor. | |||
Refer to :class:`~.MaxPool2d` for more information. | |||
@@ -298,7 +303,8 @@ def avg_pool2d( | |||
padding: Union[int, Tuple[int, int]] = 0, | |||
mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", | |||
) -> Tensor: | |||
"""Applies 2D average pooling over an input tensor. | |||
""" | |||
Applies 2D average pooling over an input tensor. | |||
Refer to :class:`~.AvgPool2d` for more information. | |||
@@ -332,7 +338,8 @@ def avg_pool2d( | |||
def adaptive_max_pool2d( | |||
inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | |||
) -> Tensor: | |||
"""Applies a 2D max adaptive pooling over an input. | |||
""" | |||
Applies a 2D max adaptive pooling over an input. | |||
Refer to :class:`~.MaxAdaptivePool2d` for more information. | |||
@@ -353,7 +360,8 @@ def adaptive_max_pool2d( | |||
def adaptive_avg_pool2d( | |||
inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | |||
) -> Tensor: | |||
"""Applies a 2D average adaptive pooling over an input. | |||
""" | |||
Applies a 2D average adaptive pooling over an input. | |||
Refer to :class:`~.AvgAdaptivePool2d` for more information. | |||
@@ -390,7 +398,8 @@ def leaky_relu(inp: Tensor, negative_slope: float = 0.01) -> Tensor: | |||
def softplus(inp: Tensor) -> Tensor: | |||
r"""Applies the element-wise function: | |||
r""" | |||
Applies the element-wise function: | |||
.. math:: | |||
\text{softplus}(x) = \log(1 + \exp(x)) | |||
@@ -416,7 +425,7 @@ def softplus(inp: Tensor) -> Tensor: | |||
x = tensor(np.arange(-3, 3, dtype=np.float32)) | |||
y = F.softplus(x) | |||
print(y.numpy()) | |||
print(y.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -429,7 +438,8 @@ def softplus(inp: Tensor) -> Tensor: | |||
def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | |||
r""" | |||
Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | |||
input Tensor. The LogSoftmax formulation can be simplified as: | |||
.. math:: | |||
@@ -456,7 +466,7 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | |||
y = F.logsoftmax(x, axis=1) | |||
print(y.numpy()) | |||
print(y.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -470,7 +480,8 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
def logsigmoid(inp: Tensor) -> Tensor: | |||
r"""Applies the element-wise function: | |||
r""" | |||
Applies the element-wise function: | |||
.. math:: | |||
\text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)}) | |||
@@ -490,13 +501,13 @@ def logsigmoid(inp: Tensor) -> Tensor: | |||
x = tensor(np.arange(-5, 5, dtype=np.float32)) | |||
y = F.logsigmoid(x) | |||
print(y.numpy()) | |||
print(y.numpy().round(decimals=4)) | |||
Outputs: | |||
.. testoutput:: | |||
[-5.0067 -4.0181 -3.0486 -2.1269 -1.3133 -0.6931 -0.3133 -0.1269 -0.0486 | |||
[-5.0067 -4.0182 -3.0486 -2.1269 -1.3133 -0.6931 -0.3133 -0.1269 -0.0486 | |||
-0.0181] | |||
""" | |||
@@ -539,7 +550,7 @@ def logsumexp( | |||
x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | |||
y = F.logsumexp(x, axis=1, keepdims=False) | |||
print(y.numpy()) | |||
print(y.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -589,7 +600,7 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor: | |||
x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | |||
out = F.softmax(x) | |||
print(out.numpy()) | |||
print(out.numpy().round(decimals=4)) | |||
Outputs: | |||
@@ -619,7 +630,8 @@ def batch_norm( | |||
eps: float = 1e-5, | |||
inplace: bool = True | |||
): | |||
r"""Applies batch normalization to the input. | |||
r""" | |||
Applies batch normalization to the input. | |||
Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | |||
@@ -734,7 +746,8 @@ def sync_batch_norm( | |||
eps_mode="ADDITIVE", | |||
group=WORLD, | |||
) -> Tensor: | |||
r"""Applies synchronized batch normalization to the input. | |||
r""" | |||
Applies synchronized batch normalization to the input. | |||
Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | |||
@@ -835,7 +848,8 @@ def sync_batch_norm( | |||
def one_hot(inp: Tensor, num_classes: int) -> Tensor: | |||
r"""Performs one-hot encoding for the input tensor. | |||
r""" | |||
Performs one-hot encoding for the input tensor. | |||
:param inp: input tensor. | |||
:param num_classes: number of classes denotes the last dimension of the output tensor. | |||
@@ -878,7 +892,8 @@ def warp_perspective( | |||
border_val: float = 0.0, | |||
interp_mode: str = "LINEAR", | |||
): | |||
r"""Applies perspective transformation to batched 2D images. | |||
r""" | |||
Applies perspective transformation to batched 2D images. | |||
The input images are transformed to the output images by the transformation matrix: | |||
@@ -1094,13 +1109,13 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2,3)) | |||
_, y, _ = F.svd(x) | |||
print(y.numpy()) | |||
print(y.numpy().round(decimals=3)) | |||
Outputs: | |||
.. testoutput:: | |||
[7.3485 1. ] | |||
[7.348 1. ] | |||
""" | |||
op = builtin.SVD(full_matrices=full_matrices, compute_uv=compute_uv) | |||
@@ -1115,7 +1130,8 @@ def interpolate( | |||
mode: str = "BILINEAR", | |||
align_corners: bool = None, | |||
) -> Tensor: | |||
r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
r""" | |||
Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||
:param inp: input tensor. | |||
:param size: size of the output tensor. Default: None | |||
@@ -1257,7 +1273,8 @@ def interpolate( | |||
def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | |||
"""Returns a new tensor where each of the elements are randomly set to zero | |||
""" | |||
Returns a new tensor where each of the elements are randomly set to zero | |||
with probability P = ``drop_prob``. Optionally rescale the output tensor if ``training`` is True. | |||
:param inp: input tensor. | |||
@@ -1302,7 +1319,8 @@ def embedding( | |||
max_norm: Optional[float] = None, | |||
norm_type: Optional[float] = None, | |||
): | |||
"""Applies lookup table for embedding. | |||
""" | |||
Applies lookup table for embedding. | |||
:param inp: tensor with indices. | |||
:param weight: learnable weights which embeds from. | |||
@@ -1329,7 +1347,8 @@ def roi_pooling( | |||
mode: str = "max", | |||
scale: float = 1.0, | |||
) -> Tensor: | |||
"""Applies roi pooling on input feature. | |||
""" | |||
Applies roi pooling on input feature. | |||
:param inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||
:param rois: `(K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||
@@ -1350,7 +1369,7 @@ def roi_pooling( | |||
inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
rois = tensor(np.random.random((4, 5))) | |||
y = F.nn.roi_pooling(inp, rois, (2, 2)) | |||
print(y.numpy()[0]) | |||
print(y.numpy()[0].round(decimals=4)) | |||
Outputs: | |||
@@ -1382,7 +1401,8 @@ def roi_align( | |||
sample_points: Union[int, tuple, list] = 2, | |||
aligned: bool = True, | |||
) -> Tensor: | |||
"""Applies roi align on input feature. | |||
""" | |||
Applies roi align on input feature. | |||
:param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||
:param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||
@@ -1407,7 +1427,7 @@ def roi_align( | |||
inp = tensor(np.random.randn(1, 1, 128, 128)) | |||
rois = tensor(np.random.random((4, 5))) | |||
y = F.nn.roi_align(inp, rois, (2, 2)) | |||
print(y.numpy()[0]) | |||
print(y.numpy()[0].round(decimals=4)) | |||
Outputs: | |||
@@ -1444,7 +1464,8 @@ def roi_align( | |||
def indexing_one_hot( | |||
src: Tensor, index: Tensor, axis: int = 1, keepdims=False | |||
) -> Tensor: | |||
r"""One-hot indexing for some axes. | |||
r""" | |||
One-hot indexing for some axes. | |||
:param src: input tensor. | |||
:param index: index tensor. | |||
@@ -28,7 +28,8 @@ def conv_bias_activation( | |||
conv_mode="CROSS_CORRELATION", | |||
compute_mode="DEFAULT", | |||
) -> Tensor: | |||
"""Convolution bias with activation operation, only for inference. | |||
""" | |||
Convolution bias with activation operation, only for inference. | |||
:param inp: feature map of the convolution operation. | |||
:param weight: convolution kernel. | |||
@@ -58,7 +58,8 @@ __all__ = [ | |||
def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | |||
"""Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | |||
""" | |||
Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | |||
:param shape: expected shape of output tensor. | |||
:param dtype: data type. Default: None | |||
@@ -100,7 +101,8 @@ def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Ten | |||
def full(shape, value, dtype="float32", device=None): | |||
"""Returns a tensor with given shape and value. | |||
""" | |||
Returns a tensor with given shape and value. | |||
""" | |||
if isinstance(shape, int): | |||
shape = (shape,) | |||
@@ -113,7 +115,8 @@ def full(shape, value, dtype="float32", device=None): | |||
def ones(shape, dtype="float32", device=None): | |||
"""Returns a ones tensor with given shape. | |||
""" | |||
Returns a ones tensor with given shape. | |||
:param inp: input tensor. | |||
:return: output zero tensor. | |||
@@ -139,13 +142,15 @@ def ones(shape, dtype="float32", device=None): | |||
def zeros(shape, dtype="float32", device=None): | |||
"""Returns a zero tensor with given shape. | |||
""" | |||
Returns a zero tensor with given shape. | |||
""" | |||
return full(shape, 0.0, dtype=dtype, device=device) | |||
def zeros_like(inp: Tensor) -> Tensor: | |||
"""Returns a zero tensor with the same shape as input tensor. | |||
""" | |||
Returns a zero tensor with the same shape as input tensor. | |||
:param inp: input tensor. | |||
:return: output zero tensor. | |||
@@ -174,13 +179,15 @@ def zeros_like(inp: Tensor) -> Tensor: | |||
def ones_like(inp: Tensor) -> Tensor: | |||
"""Returns a ones tensor with the same shape as input tensor. | |||
""" | |||
Returns a ones tensor with the same shape as input tensor. | |||
""" | |||
return ones(inp.shape, dtype=inp.dtype, device=inp.device) | |||
def full_like(inp: Tensor, value: Union[int, float]) -> Tensor: | |||
"""Returns a tensor filled with given value with the same shape as input tensor. | |||
""" | |||
Returns a tensor filled with given value with the same shape as input tensor. | |||
""" | |||
return full(inp.shape, value, dtype=inp.dtype, device=inp.device) | |||
@@ -274,7 +281,8 @@ def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: | |||
def stack(inps, axis=0, device=None): | |||
"""Concats a sequence of tensors along a new axis. | |||
""" | |||
Concats a sequence of tensors along a new axis. | |||
The input tensors must have the same shape. | |||
:param inps: input tensors. | |||
@@ -316,7 +324,8 @@ def stack(inps, axis=0, device=None): | |||
def split(inp, nsplits_or_sections, axis=0): | |||
"""Splits the input tensor into several smaller tensors. | |||
""" | |||
Splits the input tensor into several smaller tensors. | |||
When nsplits_or_sections is int, the last tensor may be smaller than others. | |||
:param inp: input tensor. | |||
@@ -334,7 +343,7 @@ def split(inp, nsplits_or_sections, axis=0): | |||
x = tensor(np.random.random((2,3,4,5)), dtype=np.float32) | |||
out = F.split(x, 2, axis=3) | |||
print(out[0].shape, out[1].shape) | |||
print(out[0].numpy().shape, out[1].numpy().shape) | |||
Outputs: | |||
@@ -400,7 +409,8 @@ def _get_idx(index, axis): | |||
def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||
# TODO: rewrite doc | |||
r"""Gathers data from input tensor on axis using index. | |||
r""" | |||
Gathers data from input tensor on axis using index. | |||
For a 3-D tensor, the output is specified by:: | |||
@@ -472,7 +482,8 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||
def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | |||
# TODO: rewrite doc | |||
r"""Writes all values from the tensor source into input tensor | |||
r""" | |||
Writes all values from the tensor source into input tensor | |||
at the indices specified in the index tensor. | |||
For each value in source, its output index is specified by its index | |||
@@ -577,7 +588,8 @@ def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | |||
def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: | |||
r"""Selects elements either from Tensor x or Tensor y, according to mask. | |||
r""" | |||
Selects elements either from Tensor x or Tensor y, according to mask. | |||
.. math:: | |||
@@ -764,7 +776,8 @@ AxisDesc = AxisAddRemove.AxisDesc | |||
def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor: | |||
r"""Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. | |||
r""" | |||
Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. | |||
:param inp: input tensor. | |||
:param start_axis: start dimension that the sub-tensor to be flattened. Default: 0 | |||
@@ -819,7 +832,7 @@ def expand_dims(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||
x = tensor([1, 2]) | |||
out = F.expand_dims(x, 0) | |||
print(out.shape) | |||
print(out.numpy().shape) | |||
Outputs: | |||
@@ -865,7 +878,7 @@ def squeeze(inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None) -> Te | |||
x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) | |||
out = F.squeeze(x, 3) | |||
print(out.shape) | |||
print(out.numpy().shape) | |||
Outputs: | |||
@@ -884,7 +897,8 @@ def linspace( | |||
dtype="float32", | |||
device: Optional[CompNode] = None, | |||
) -> Tensor: | |||
r"""Returns equally spaced numbers over a specified interval. | |||
r""" | |||
Returns equally spaced numbers over a specified interval. | |||
:param start: starting value of the squence, shoule be scalar. | |||
:param stop: last value of the squence, shoule be scalar. | |||
@@ -928,7 +942,8 @@ def arange( | |||
dtype="float32", | |||
device: Optional[CompNode] = None, | |||
) -> Tensor: | |||
r"""Returns a tensor with values from start to stop with adjacent interval step. | |||
r""" | |||
Returns a tensor with values from start to stop with adjacent interval step. | |||
:param start: starting value of the squence, shoule be scalar. | |||
:param stop: ending value of the squence, shoule be scalar. | |||
@@ -11,7 +11,8 @@ import functools | |||
def get_ndtuple(value, *, n, allow_zero: bool = True): | |||
r"""Converts possibly 1D tuple to n-dim tuple. | |||
r""" | |||
Converts possibly 1D tuple to n-dim tuple. | |||
:param value: value will be filled in generated tuple. | |||
:param n: how many elements will the tuple have. | |||
@@ -43,7 +43,8 @@ PROTOCOLS = { | |||
def _get_megengine_home() -> str: | |||
"""MGE_HOME setting complies with the XDG Base Directory Specification | |||
""" | |||
MGE_HOME setting complies with the XDG Base Directory Specification | |||
""" | |||
megengine_home = os.path.expanduser( | |||
os.getenv( | |||
@@ -94,7 +95,8 @@ def _init_hub( | |||
commit: str = None, | |||
protocol: str = DEFAULT_PROTOCOL, | |||
): | |||
"""Imports hubmodule like python import. | |||
""" | |||
Imports hubmodule like python import. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
@@ -137,7 +139,8 @@ def list( | |||
commit: str = None, | |||
protocol: str = DEFAULT_PROTOCOL, | |||
) -> List[str]: | |||
"""Lists all entrypoints available in repo hubconf. | |||
""" | |||
Lists all entrypoints available in repo hubconf. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
@@ -175,7 +178,8 @@ def load( | |||
protocol: str = DEFAULT_PROTOCOL, | |||
**kwargs | |||
) -> Any: | |||
"""Loads model from github or gitlab repo, with pretrained weights. | |||
""" | |||
Loads model from github or gitlab repo, with pretrained weights. | |||
:param repo_info: | |||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | |||
@@ -215,7 +219,8 @@ def help( | |||
commit: str = None, | |||
protocol: str = DEFAULT_PROTOCOL, | |||
) -> str: | |||
"""This function returns docstring of entrypoint ``entry`` by following steps: | |||
""" | |||
This function returns docstring of entrypoint ``entry`` by following steps: | |||
1. Pull the repo code specified by git and repo_info. | |||
2. Load the entry defined in repo's hubconf.py | |||
@@ -250,7 +255,8 @@ def help( | |||
def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||
"""Loads MegEngine serialized object from the given URL. | |||
""" | |||
Loads MegEngine serialized object from the given URL. | |||
If the object is already present in ``model_dir``, it's deserialized and | |||
returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | |||
@@ -27,7 +27,8 @@ def load_module(name: str, path: str) -> types.ModuleType: | |||
def check_module_exists(module: str) -> bool: | |||
"""Checks whether python module exists or not. | |||
""" | |||
Checks whether python module exists or not. | |||
:param module: name of module. | |||
""" | |||
@@ -36,7 +37,8 @@ def check_module_exists(module: str) -> bool: | |||
@contextmanager | |||
def cd(target: str) -> Iterator[None]: | |||
"""Changes current directory to target. | |||
""" | |||
Changes current directory to target. | |||
:param target: target directory. | |||
""" | |||
@@ -519,7 +519,8 @@ class trace: | |||
optimize_for_inference=True, | |||
**kwargs | |||
): | |||
r"""Serializes trace to file system. | |||
r""" | |||
Serializes trace to file system. | |||
:param file: output file, could be file object or filename. | |||
:param arg_names: names of the input tensors in the traced function. | |||
@@ -17,7 +17,8 @@ _default_level = logging.getLevelName(_default_level_name.upper()) | |||
def set_log_file(fout, mode="a"): | |||
r"""Sets log output file. | |||
r""" | |||
Sets log output file. | |||
:type fout: str or file-like | |||
:param fout: file-like object that supports write and flush, or string for | |||
@@ -38,37 +39,44 @@ class MegEngineLogFormatter(logging.Formatter): | |||
max_lines = 256 | |||
def _color_exc(self, msg): | |||
r"""Sets the color of message as the execution type. | |||
r""" | |||
Sets the color of message as the execution type. | |||
""" | |||
return "\x1b[34m{}\x1b[0m".format(msg) | |||
def _color_dbg(self, msg): | |||
r"""Sets the color of message as the debugging type. | |||
r""" | |||
Sets the color of message as the debugging type. | |||
""" | |||
return "\x1b[36m{}\x1b[0m".format(msg) | |||
def _color_warn(self, msg): | |||
r"""Sets the color of message as the warning type. | |||
r""" | |||
Sets the color of message as the warning type. | |||
""" | |||
return "\x1b[1;31m{}\x1b[0m".format(msg) | |||
def _color_err(self, msg): | |||
r"""Sets the color of message as the error type. | |||
r""" | |||
Sets the color of message as the error type. | |||
""" | |||
return "\x1b[1;4;31m{}\x1b[0m".format(msg) | |||
def _color_omitted(self, msg): | |||
r"""Sets the color of message as the omitted type. | |||
r""" | |||
Sets the color of message as the omitted type. | |||
""" | |||
return "\x1b[35m{}\x1b[0m".format(msg) | |||
def _color_normal(self, msg): | |||
r"""Sets the color of message as the normal type. | |||
r""" | |||
Sets the color of message as the normal type. | |||
""" | |||
return msg | |||
def _color_date(self, msg): | |||
r"""Sets the color of message the same as date. | |||
r""" | |||
Sets the color of message the same as date. | |||
""" | |||
return "\x1b[32m{}\x1b[0m".format(msg) | |||
@@ -142,7 +150,8 @@ class MegEngineLogFormatter(logging.Formatter): | |||
def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
r"""Gets megengine logger with given name. | |||
r""" | |||
Gets megengine logger with given name. | |||
""" | |||
logger = logging.getLogger(name) | |||
@@ -161,7 +170,8 @@ def get_logger(name=None, formatter=MegEngineLogFormatter): | |||
def set_log_level(level, update_existing=True): | |||
"""Sets default logging level. | |||
""" | |||
Sets default logging level. | |||
:type level: int e.g. logging.INFO | |||
:param level: loggin level given by python :mod:`logging` module | |||
@@ -198,7 +208,8 @@ try: | |||
_imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Debug) | |||
def set_mgb_log_level(level): | |||
r"""Sets megbrain log level | |||
r""" | |||
Sets megbrain log level | |||
:type level: int e.g. logging.INFO | |||
:param level: new log level | |||
@@ -218,7 +229,8 @@ except ImportError as exc: | |||
@contextlib.contextmanager | |||
def replace_mgb_log_level(level): | |||
r"""Replaces megbrain log level in a block and restore after exiting. | |||
r""" | |||
Replaces megbrain log level in a block and restore after exiting. | |||
:type level: int e.g. logging.INFO | |||
:param level: new log level | |||
@@ -231,7 +243,8 @@ def replace_mgb_log_level(level): | |||
def enable_debug_log(): | |||
r"""Sets logging level to debug for all components. | |||
r""" | |||
Sets logging level to debug for all components. | |||
""" | |||
set_log_level(logging.DEBUG) | |||
set_mgb_log_level(logging.DEBUG) |
@@ -27,7 +27,8 @@ class _AdaptivePoolNd(Module): | |||
class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
r"""Applies a 2D max adaptive pooling over an input. | |||
r""" | |||
Applies a 2D max adaptive pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
an output shape :math:`(OH, OW)`, this layer generates the output of | |||
@@ -62,7 +63,7 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
.. testoutput:: | |||
[[[[5. 7.] | |||
[[[[ 5. 7.] | |||
[13. 15.]]]] | |||
""" | |||
@@ -72,7 +73,8 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||
class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
r"""Applies a 2D average pooling over an input. | |||
r""" | |||
Applies a 2D average pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
an output shape :math:`(OH, OW)`, this layer generates the output of | |||
@@ -105,7 +107,7 @@ class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||
.. testoutput:: | |||
[[[[2.5 4.5] | |||
[[[[ 2.5 4.5] | |||
[10.5 12.5]]]] | |||
""" | |||
@@ -87,7 +87,8 @@ class _ConvNd(Module): | |||
class Conv2d(_ConvNd): | |||
r"""Applies a 2D convolution over an input tensor. | |||
r""" | |||
Applies a 2D convolution over an input tensor. | |||
For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, | |||
this layer generates an output of the size | |||
@@ -145,7 +146,7 @@ class Conv2d(_ConvNd): | |||
m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | |||
inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | |||
oup = m(inp) | |||
print(oup.shape) | |||
print(oup.numpy().shape) | |||
Outputs: | |||
@@ -232,7 +233,8 @@ class Conv2d(_ConvNd): | |||
class ConvTranspose2d(_ConvNd): | |||
r"""Applies a 2D transposed convolution over an input tensor. | |||
r""" | |||
Applies a 2D transposed convolution over an input tensor. | |||
This module is also known as a deconvolution or a fractionally-strided convolution. | |||
:class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | |||
@@ -340,7 +342,8 @@ class ConvTranspose2d(_ConvNd): | |||
class LocalConv2d(Conv2d): | |||
r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
r""" | |||
Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||
It is also known as the locally connected layer. | |||
:param in_channels: number of input channels. | |||
@@ -11,7 +11,8 @@ from .module import Module | |||
class Dropout(Module): | |||
r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
r""" | |||
Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||
Commonly used in large networks to prevent overfitting. | |||
Note that we perform dropout only during training, we also rescale(multiply) the output tensor | |||
by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | |||
@@ -93,7 +93,7 @@ class Embedding(Module): | |||
) | |||
self.reset_parameters() | |||
else: | |||
if initial_weight.shape != (num_embeddings, embedding_dim): | |||
if initial_weight.numpy().shape != (num_embeddings, embedding_dim): | |||
raise ValueError( | |||
"The weight shape should match num_embeddings and embedding_dim" | |||
) | |||
@@ -18,7 +18,8 @@ from ..tensor import Tensor | |||
def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||
"""Fills the given ``tensor`` with value ``val``. | |||
""" | |||
Fills the given ``tensor`` with value ``val``. | |||
:param tensor: tensor to be initialized. | |||
:param val: value to be filled throughout the tensor. | |||
@@ -27,7 +28,8 @@ def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||
def zeros_(tensor: Tensor) -> None: | |||
"""Fills the given ``tensor`` with scalar value `0`. | |||
""" | |||
Fills the given ``tensor`` with scalar value `0`. | |||
:param tensor: tensor to be initialized. | |||
""" | |||
@@ -35,7 +37,8 @@ def zeros_(tensor: Tensor) -> None: | |||
def ones_(tensor: Tensor) -> None: | |||
"""Fills the given ``tensor`` with the scalar value `1`. | |||
""" | |||
Fills the given ``tensor`` with the scalar value `1`. | |||
:param tensor: tensor to be initialized. | |||
""" | |||
@@ -43,7 +46,8 @@ def ones_(tensor: Tensor) -> None: | |||
def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||
r"""Fills the given ``tensor`` with random value sampled from uniform distribution | |||
r""" | |||
Fills the given ``tensor`` with random value sampled from uniform distribution | |||
:math:`\mathcal{U}(\text{a}, \text{b})`. | |||
:param tensor: tensor to be initialized. | |||
@@ -54,7 +58,8 @@ def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||
def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
r"""Fills the given ``tensor`` with random value sampled from normal distribution | |||
r""" | |||
Fills the given ``tensor`` with random value sampled from normal distribution | |||
:math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | |||
:param tensor: tensor to be initialized. | |||
@@ -67,7 +72,8 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||
def calculate_gain( | |||
nonlinearity: str, param: Optional[Union[int, float]] = None | |||
) -> float: | |||
r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||
r""" | |||
Returns a recommended gain value (see the table below) for the given nonlinearity | |||
function. | |||
================= ==================================================== | |||
@@ -168,7 +174,8 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
r""" | |||
Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
where | |||
.. math:: | |||
@@ -188,7 +195,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
r"""Fills tensor with random values sampled from | |||
r""" | |||
Fills tensor with random values sampled from | |||
:math:`\mathcal{N}(0, \text{std}^2)` where | |||
.. math:: | |||
@@ -209,7 +217,8 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
def msra_uniform_( | |||
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
) -> None: | |||
r"""Fills tensor wilth random values sampled from | |||
r""" | |||
Fills tensor wilth random values sampled from | |||
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where | |||
.. math:: | |||
@@ -238,7 +247,8 @@ def msra_uniform_( | |||
def msra_normal_( | |||
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | |||
) -> None: | |||
r"""Fills tensor wilth random values sampled from | |||
r""" | |||
Fills tensor wilth random values sampled from | |||
:math:`\mathcal{N}(0, \text{std}^2)` where | |||
.. math:: | |||
@@ -14,7 +14,8 @@ from .module import Module | |||
class Linear(Module): | |||
r"""Applies a linear transformation to the input. For instance, if input | |||
r""" | |||
Applies a linear transformation to the input. For instance, if input | |||
is x, then output y is: | |||
.. math:: | |||
@@ -39,7 +40,7 @@ class Linear(Module): | |||
m = M.Linear(in_features=3, out_features=1) | |||
inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | |||
oup = m(inp) | |||
print(oup.shape) | |||
print(oup.numpy().shape) | |||
Outputs: | |||
@@ -57,7 +57,8 @@ def _is_module(obj): | |||
class Module(metaclass=ABCMeta): | |||
"""Base Module class. | |||
""" | |||
Base Module class. | |||
""" | |||
def __init__(self): | |||
@@ -76,7 +77,8 @@ class Module(metaclass=ABCMeta): | |||
pass | |||
def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | |||
"""Registers a hook to handle forward inputs. `hook` should be a function. | |||
""" | |||
Registers a hook to handle forward inputs. `hook` should be a function. | |||
:param hook: a function that receive `module` and `inputs`, then return | |||
a modified `inputs` or `None`. | |||
@@ -85,7 +87,8 @@ class Module(metaclass=ABCMeta): | |||
return HookHandler(self._forward_pre_hooks, hook) | |||
def register_forward_hook(self, hook: Callable) -> HookHandler: | |||
"""Registers a hook to handle forward results. `hook` should be a function that | |||
""" | |||
Registers a hook to handle forward results. `hook` should be a function that | |||
receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | |||
This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | |||
@@ -118,7 +121,8 @@ class Module(metaclass=ABCMeta): | |||
predicate: Callable[[Any], bool] = lambda _: True, | |||
seen: Optional[Set[int]] = None | |||
) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: | |||
"""Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
""" | |||
Scans the module object and returns an iterable for the :class:`~.Tensor` | |||
and :class:`~.Module` attributes that agree with the ``predicate``. For multiple | |||
calls of this function with same arguments, the order of objects within the | |||
returned iterable is guaranteed to be identical, as long as all the involved | |||
@@ -165,7 +169,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: | |||
r"""Returns an iterable for the :class:`~.Parameter` of the module. | |||
r""" | |||
Returns an iterable for the :class:`~.Parameter` of the module. | |||
:param recursive: If ``True``, returns all :class:`~.Parameter` within this | |||
module, else only returns :class:`~.Parameter` that are direct attributes | |||
@@ -190,7 +195,8 @@ class Module(metaclass=ABCMeta): | |||
def named_parameters( | |||
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
) -> Iterable[Tuple[str, Parameter]]: | |||
"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
""" | |||
Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||
``key`` is the dotted path from this module to the :class:`~.Parameter`. | |||
:param prefix: prefix prepended to the keys. | |||
@@ -219,7 +225,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: | |||
"""Returns an iterable for the buffers of the module. | |||
""" | |||
Returns an iterable for the buffers of the module. | |||
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
@@ -234,7 +241,8 @@ class Module(metaclass=ABCMeta): | |||
def named_buffers( | |||
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | |||
) -> Iterable[Tuple[str, Tensor]]: | |||
"""Returns an iterable for key buffer pairs of the module, where | |||
""" | |||
Returns an iterable for key buffer pairs of the module, where | |||
``key`` is the dotted path from this module to the buffer. | |||
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | |||
@@ -253,7 +261,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def children(self, **kwargs) -> "Iterable[Module]": | |||
"""Returns an iterable for all the submodules that are direct attributes of this | |||
""" | |||
Returns an iterable for all the submodules that are direct attributes of this | |||
module. | |||
""" | |||
yield from self._flatten( | |||
@@ -261,7 +270,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": | |||
"""Returns an iterable of key-submodule pairs for all the submodules that are | |||
""" | |||
Returns an iterable of key-submodule pairs for all the submodules that are | |||
direct attributes of this module, where 'key' is the attribute name of | |||
submodules. | |||
""" | |||
@@ -270,7 +280,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def modules(self, **kwargs) -> "Iterable[Module]": | |||
"""Returns an iterable for all the modules within this module, including itself. | |||
""" | |||
Returns an iterable for all the modules within this module, including itself. | |||
""" | |||
if "with_parent" in kwargs and kwargs["with_parent"]: | |||
yield self, None | |||
@@ -281,7 +292,8 @@ class Module(metaclass=ABCMeta): | |||
def named_modules( | |||
self, prefix: Optional[str] = None, **kwargs | |||
) -> "Iterable[Tuple[str, Module]]": | |||
"""Returns an iterable of key-module pairs for all the modules within this | |||
""" | |||
Returns an iterable of key-module pairs for all the modules within this | |||
module, including itself, where 'key' is the dotted path from this module to the | |||
submodules. | |||
@@ -296,7 +308,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def apply(self, fn: "Callable[[Module], Any]") -> None: | |||
"""Applies function ``fn`` to all the modules within this module, including | |||
""" | |||
Applies function ``fn`` to all the modules within this module, including | |||
itself. | |||
:param fn: the function to be applied on modules. | |||
@@ -306,14 +319,16 @@ class Module(metaclass=ABCMeta): | |||
@deprecated(version="1.0") | |||
def zero_grad(self) -> None: | |||
"""Sets all parameters' grads to zero | |||
""" | |||
Sets all parameters' grads to zero | |||
""" | |||
for param in self.parameters(): | |||
if param.grad is not None: | |||
param.grad.reset_zero() | |||
def train(self, mode: bool = True, recursive: bool = True) -> None: | |||
"""Sets training mode of all the modules within this module (including itself) to | |||
""" | |||
Sets training mode of all the modules within this module (including itself) to | |||
``mode``. This effectively sets the ``training`` attributes of those modules | |||
to ``mode``, but only has effect on certain modules (e.g. | |||
:class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | |||
@@ -331,7 +346,8 @@ class Module(metaclass=ABCMeta): | |||
self.apply(fn) | |||
def eval(self) -> None: | |||
"""Sets training mode of all the modules within this module (including itself) to | |||
""" | |||
Sets training mode of all the modules within this module (including itself) to | |||
``False``. See :meth:`~.Module.train` for details. | |||
""" | |||
self.train(False) | |||
@@ -351,7 +367,8 @@ class Module(metaclass=ABCMeta): | |||
def replace_param( | |||
self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | |||
): | |||
"""Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
""" | |||
Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||
speedup multimachine training. | |||
""" | |||
offset = 0 | |||
@@ -377,7 +394,8 @@ class Module(metaclass=ABCMeta): | |||
return offset | |||
def state_dict(self, rst=None, prefix="", keep_var=False): | |||
r"""Returns a dictionary containing whole states of the module. | |||
r""" | |||
Returns a dictionary containing whole states of the module. | |||
""" | |||
def is_state(obj): | |||
@@ -407,7 +425,8 @@ class Module(metaclass=ABCMeta): | |||
state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | |||
strict=True, | |||
): | |||
r"""Loads a given dictionary created by :func:`state_dict` into this module. | |||
r""" | |||
Loads a given dictionary created by :func:`state_dict` into this module. | |||
If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | |||
returned by :func:`state_dict`. | |||
@@ -485,7 +504,8 @@ class Module(metaclass=ABCMeta): | |||
) | |||
def _load_state_dict_with_closure(self, closure): | |||
"""Advance state_dict load through callable ``closure`` whose signature is | |||
""" | |||
Advance state_dict load through callable ``closure`` whose signature is | |||
``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` | |||
""" | |||
assert callable(closure), "closure must be a function" | |||
@@ -536,7 +556,8 @@ class Module(metaclass=ABCMeta): | |||
super().__delattr__(name) | |||
def _module_info_string(self) -> str: | |||
r"""Set the extra representation of the module. | |||
r""" | |||
Set the extra representation of the module. | |||
""" | |||
return "" | |||
@@ -36,7 +36,8 @@ class _PoolNd(Module): | |||
class MaxPool2d(_PoolNd): | |||
r"""Applies a 2D max pooling over an input. | |||
r""" | |||
Applies a 2D max pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
:attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
@@ -83,7 +84,8 @@ class MaxPool2d(_PoolNd): | |||
class AvgPool2d(_PoolNd): | |||
r"""Applies a 2D average pooling over an input. | |||
r""" | |||
Applies a 2D average pooling over an input. | |||
For instance, given an input of the size :math:`(N, C, H, W)` and | |||
:attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | |||
@@ -19,7 +19,8 @@ from .module import QuantizedModule | |||
class Conv2d(Float.Conv2d, QuantizedModule): | |||
r"""Quantized version of :class:`~.qat.conv.Conv2d`.""" | |||
r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
r""" | |||
Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
The parameter is same with :class: `~.Conv2d`. | |||
""" | |||
@@ -11,7 +11,8 @@ from .conv import Conv2d | |||
class _ConvBnActivation2d(Conv2d): | |||
r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
r""" | |||
Applies a 2D convolution over a quantized input tensor, used for inference only. | |||
The parameter is same with :class: `~.Conv2d`. | |||
""" | |||
@@ -12,7 +12,8 @@ from .module import Module | |||
class Sequential(Module): | |||
r"""A sequential container. | |||
r""" | |||
A sequential container. | |||
Modules will be added to it in the order they are passed in the constructor. | |||
Alternatively, an ordered dict of modules can also be passed in. | |||
@@ -29,10 +30,9 @@ class Sequential(Module): | |||
from collections import OrderedDict | |||
batch_size = 64 | |||
data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32) | |||
data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||
label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | |||
data = data.reshape(batch_size, -1) | |||
net0 = M.Sequential( | |||
M.Linear(28 * 28, 320), | |||
M.Linear(320, 10) | |||
@@ -40,10 +40,9 @@ class Sequential(Module): | |||
pred0 = net0(data) | |||
modules = OrderedDict() | |||
modules["fc0"] = nn.Linear(28 * 28, 320) | |||
modules["fc1"] = nn.Linear(320, 10) | |||
net1 = nn.Sequential(modules) | |||
modules["fc0"] = M.Linear(28 * 28, 320) | |||
modules["fc1"] = M.Linear(320, 10) | |||
net1 = M.Sequential(modules) | |||
pred1 = net1(data) | |||
""" | |||
@@ -16,7 +16,8 @@ from .optimizer import Optimizer | |||
class Adadelta(Optimizer): | |||
r"""Implements Adadelta algorithm. | |||
r""" | |||
Implements Adadelta algorithm. | |||
It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" <https://arxiv.org/abs/1212.5701>`_. | |||
@@ -16,7 +16,8 @@ from .optimizer import Optimizer | |||
class Adagrad(Optimizer): | |||
r"""Implements Adagrad algorithm. | |||
r""" | |||
Implements Adagrad algorithm. | |||
It has been proposed in `"Adaptive Subgradient Methods for Online Learning | |||
and Stochastic Optimization" <http://jmlr.org/papers/v12/duchi11a.html>`_. | |||
@@ -13,7 +13,8 @@ from .optimizer import Optimizer | |||
class Adam(Optimizer): | |||
r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
r""" | |||
Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||
:param params: iterable of parameters to optimize or dicts defining | |||
parameter groups. | |||
@@ -12,7 +12,8 @@ from .optimizer import Optimizer | |||
class LRScheduler(metaclass=ABCMeta): | |||
r"""Base class for all learning rate based schedulers. | |||
r""" | |||
Base class for all learning rate based schedulers. | |||
:param optimizer: wrapped optimizer. | |||
:param current_epoch: the index of current epoch. Default: -1 | |||
@@ -44,14 +45,16 @@ class LRScheduler(metaclass=ABCMeta): | |||
self.step() | |||
def state_dict(self): | |||
r"""Returns the state of the scheduler as a :class:`dict`. | |||
r""" | |||
Returns the state of the scheduler as a :class:`dict`. | |||
It contains an entry for every variable in self.__dict__ which | |||
is not the optimizer. | |||
""" | |||
raise NotImplementedError | |||
def load_state_dict(self, state_dict): | |||
r"""Loads the schedulers state. | |||
r""" | |||
Loads the schedulers state. | |||
:type state_dict: dict | |||
:param state_dict: scheduler state. | |||
@@ -14,7 +14,8 @@ from .optimizer import Optimizer | |||
class MultiStepLR(LRScheduler): | |||
r"""Decays the learning rate of each parameter group by gamma once the | |||
r""" | |||
Decays the learning rate of each parameter group by gamma once the | |||
number of epoch reaches one of the milestones. | |||
:param optimizer: wrapped optimizer. | |||
@@ -44,7 +45,8 @@ class MultiStepLR(LRScheduler): | |||
super().__init__(optimizer, current_epoch) | |||
def state_dict(self): | |||
r"""Returns the state of the scheduler as a :class:`dict`. | |||
r""" | |||
Returns the state of the scheduler as a :class:`dict`. | |||
It contains an entry for every variable in self.__dict__ which | |||
is not the optimizer. | |||
""" | |||
@@ -55,7 +57,8 @@ class MultiStepLR(LRScheduler): | |||
} | |||
def load_state_dict(self, state_dict): | |||
r"""Loads the schedulers state. | |||
r""" | |||
Loads the schedulers state. | |||
:type state_dict: dict | |||
:param state_dict: scheduler state. | |||
@@ -28,7 +28,8 @@ required = _RequiredParameter() | |||
class Optimizer(metaclass=ABCMeta): | |||
r"""Base class for all optimizers. | |||
r""" | |||
Base class for all optimizers. | |||
:param params: specifies what Tensors should be optimized. | |||
:param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | |||
@@ -72,7 +73,8 @@ class Optimizer(metaclass=ABCMeta): | |||
self._create_state(group) | |||
def add_param_group(self, param_group: dict): | |||
r"""Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
r""" | |||
Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||
This can be useful when fine tuning a pre-trained network as frozen layers can be made | |||
trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. | |||
@@ -137,7 +139,8 @@ class Optimizer(metaclass=ABCMeta): | |||
return params | |||
def step(self): | |||
r"""Performs a single optimization step. | |||
r""" | |||
Performs a single optimization step. | |||
""" | |||
for group in self.param_groups: | |||
@@ -158,14 +161,16 @@ class Optimizer(metaclass=ABCMeta): | |||
param.grad.reset_zero() | |||
def clear_grad(self): | |||
r"""Set the grad attribute to None for all parameters. | |||
r""" | |||
Set the grad attribute to None for all parameters. | |||
""" | |||
for param_group in self.param_groups: | |||
for param in param_group["params"]: | |||
param.grad = None | |||
def state_dict(self) -> Dict: | |||
r"""Export the optimizer state. | |||
r""" | |||
Export the optimizer state. | |||
:return: optimizer state. Can be loaded by :meth:`load_state_dict`. | |||
""" | |||
@@ -191,7 +196,8 @@ class Optimizer(metaclass=ABCMeta): | |||
return {"param_groups": param_groups, "state": state} | |||
def load_state_dict(self, state: dict): | |||
r"""Loads the optimizer state. | |||
r""" | |||
Loads the optimizer state. | |||
:param state: optimizer state. Should be an object returned | |||
from a call to :meth:`state_dict`. | |||
@@ -13,7 +13,8 @@ from .optimizer import Optimizer | |||
class SGD(Optimizer): | |||
r"""Implements stochastic gradient descent. | |||
r""" | |||
Implements stochastic gradient descent. | |||
Nesterov momentum is based on the formula from | |||
`"On the importance of initialization and momentum in deep learning" <http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf>`_ . | |||
@@ -174,7 +174,8 @@ class HistogramObserver(MinMaxObserver): | |||
self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32") | |||
def _non_linear_param_search(self): | |||
r"""Non-linear parameter search. | |||
r""" | |||
Non-linear parameter search. | |||
An approximation for L2 error minimization for selecting min/max. | |||
By selecting new min/max, we filter out outliers in input distribution. | |||
""" | |||
@@ -43,7 +43,8 @@ def register_method_to_class(cls): | |||
class QuantMode(Enum): | |||
"""Quantization mode enumerate class. | |||
""" | |||
Quantization mode enumerate class. | |||
""" | |||
SYMMERTIC = 1 | |||
@@ -63,13 +64,15 @@ qparam_dict = { | |||
def get_qparam_dict(mode: QuantMode): | |||
"""Return the quantization parameters dictionary according to the mode. | |||
""" | |||
Return the quantization parameters dictionary according to the mode. | |||
""" | |||
return qparam_dict.get(mode, None) | |||
def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor: | |||
"""Apply fake quantization to the inp tensor. | |||
""" | |||
Apply fake quantization to the inp tensor. | |||
:param inp: the input tensor which need to be faked. | |||
:param qmin: the minimum value which the integer limit to. | |||
@@ -91,7 +94,8 @@ def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor | |||
def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: | |||
"""Apply fake quantization to bias, with the special scale from input tensor | |||
""" | |||
Apply fake quantization to bias, with the special scale from input tensor | |||
and weight tensor, the quantized type set to qint32 also. | |||
:param bias: the bias tensor which need to be faked. | |||
@@ -21,7 +21,8 @@ __all__ = ["normal", "uniform"] | |||
def normal( | |||
mean: float = 0, std: float = 1, size: Optional[Iterable[int]] = None | |||
) -> Tensor: | |||
r"""Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
r""" | |||
Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||
:param size: output tensor size. | |||
:param mean: the mean or expectation of the distribution. | |||
@@ -59,7 +60,8 @@ def normal( | |||
def uniform( | |||
low: float = 0, high: float = 1, size: Optional[Iterable[int]] = None | |||
) -> Tensor: | |||
r"""Random variable with uniform distribution $U(0, 1)$. | |||
r""" | |||
Random variable with uniform distribution $U(0, 1)$. | |||
:param size: output tensor size. | |||
:param low: lower range. | |||
@@ -14,7 +14,8 @@ from .utils.max_recursion_limit import max_recursion_limit | |||
def save(obj, f, pickle_module=pickle, pickle_protocol=pickle.HIGHEST_PROTOCOL): | |||
r"""Save an object to disk file. | |||
r""" | |||
Save an object to disk file. | |||
:type obj: object | |||
:param obj: object to save. Only ``module`` or ``state_dict`` are allowed. | |||
@@ -81,7 +82,8 @@ def _get_callable_map_location(map_location): | |||
def load(f, map_location=None, pickle_module=pickle): | |||
r"""Load an object saved with save() from a file. | |||
r""" | |||
Load an object saved with save() from a file. | |||
:type f: text file object | |||
:param f: a string of file name or a text file object from which to load. | |||
@@ -97,5 +97,6 @@ tensor = Tensor | |||
class Parameter(Tensor): | |||
r"""A kind of Tensor that is to be considered a module parameter. | |||
r""" | |||
A kind of Tensor that is to be considered a module parameter. | |||
""" |
@@ -17,7 +17,8 @@ from ..core.tensor.raw_tensor import as_raw_tensor | |||
def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | |||
"""Returns :class:`.tensor.core.megbrain_graph.VarNode` of type ``var_type`` that input ``var`` | |||
""" | |||
Returns :class:`.tensor.core.megbrain_graph.VarNode` of type ``var_type`` that input ``var`` | |||
depands on. If ``var_type`` is None, returns all types. | |||
""" | |||
outputs = [] | |||
@@ -46,14 +47,16 @@ def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | |||
def get_owner_opr_inputs(var: VarNode) -> List[VarNode]: | |||
"""Gets the inputs of owner opr of a variable. | |||
""" | |||
Gets the inputs of owner opr of a variable. | |||
""" | |||
assert isinstance(var, VarNode) | |||
return var.owner.inputs | |||
def get_owner_opr_type(var: VarNode) -> str: | |||
"""Gets the type of owner opr of a variable. | |||
""" | |||
Gets the type of owner opr of a variable. | |||
""" | |||
assert isinstance(var, VarNode) | |||
@@ -61,14 +64,16 @@ def get_owner_opr_type(var: VarNode) -> str: | |||
def get_opr_type(opr: OperatorNode) -> str: | |||
"""Gets the type of an opr. | |||
""" | |||
Gets the type of an opr. | |||
""" | |||
assert isinstance(opr, OperatorNode) | |||
return opr.type | |||
def graph_traversal(outputs: VarNode): | |||
"""Helper function to traverse the computing graph and return enough useful information. | |||
""" | |||
Helper function to traverse the computing graph and return enough useful information. | |||
:param outputs: model outputs. | |||
:return: tuple (map_oprs, map_vars, var2oprs, opr2receivers, indegree2opr, opr2indegree) | |||
@@ -124,7 +129,8 @@ def graph_traversal(outputs: VarNode): | |||
def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNode]: | |||
"""Gets oprs in some topological order for a dumped model. | |||
""" | |||
Gets oprs in some topological order for a dumped model. | |||
:param outputs: model outputs. | |||
:param prune_reshape: whether to prune the useless operators during inference. | |||
@@ -194,7 +200,8 @@ def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNo | |||
def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | |||
"""Replaces vars in the graph. | |||
""" | |||
Replaces vars in the graph. | |||
:param dst: target vars representing the graph. | |||
:param varmap: the map that specifies how to replace the vars. | |||
@@ -221,7 +228,8 @@ def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | |||
def replace_oprs( | |||
dst: List[VarNode], oprmap: Dict[OperatorNode, OperatorNode] | |||
) -> List[VarNode]: | |||
"""Replaces operators in the graph. | |||
""" | |||
Replaces operators in the graph. | |||
:param dst: target vars representing the graph. | |||
:param oprmap: the map that specifies how to replace the operators. | |||
@@ -246,7 +254,8 @@ def replace_oprs( | |||
def set_priority_to_id(dest_vars): | |||
"""For all oprs in the subgraph constructed by dest_vars, | |||
""" | |||
For all oprs in the subgraph constructed by dest_vars, | |||
sets its priority to id if its original priority is zero. | |||
:param dest_vars: target vars representing the graph. | |||
""" | |||
@@ -258,7 +267,8 @@ def set_priority_to_id(dest_vars): | |||
def load_and_inference(file, inp_data_list: List[numpy.ndarray]) -> List[numpy.ndarray]: | |||
"""Loads a serialized computing graph and run inference with input data. | |||
""" | |||
Loads a serialized computing graph and run inference with input data. | |||
:param file: path or handle of the input file. | |||
:param inp_data_list: list of input data. | |||
@@ -16,7 +16,8 @@ if platform.system() != "Windows": | |||
class AlternativeRecursionLimit: | |||
r"""A reentrant context manager for setting global recursion limits. | |||
r""" | |||
A reentrant context manager for setting global recursion limits. | |||
""" | |||
def __init__(self, new_py_limit): | |||
@@ -73,6 +74,7 @@ _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | |||
def max_recursion_limit(): | |||
r"""Sets recursion limit to the max possible value. | |||
r""" | |||
Sets recursion limit to the max possible value. | |||
""" | |||
return _max_recursion_limit_context_manager |
@@ -12,7 +12,8 @@ import numpy as np | |||
def load_tensor_binary(fobj): | |||
"""Load a tensor dumped by the :class:`BinaryOprIODump` plugin; the actual | |||
""" | |||
Load a tensor dumped by the :class:`BinaryOprIODump` plugin; the actual | |||
tensor value dump is implemented by ``mgb::debug::dump_tensor``. | |||
Multiple values can be compared by ``tools/compare_binary_iodump.py``. | |||
@@ -57,7 +57,8 @@ def _tabulate_confluence(tab, **kwargs): | |||
def main(passed_args=None): # pylint: disable=too-many-statements | |||
"""Analyses profile info from :mod:`~.utils.profile_analyzer` . | |||
""" | |||
Analyses profile info from :mod:`~.utils.profile_analyzer` . | |||
Run this file with ``--help`` to get more usage. | |||
""" | |||
@@ -15,7 +15,8 @@ import numpy as np | |||
class NonExistNum: | |||
"""An object that behaves like a number but means a field does not exist; It is | |||
""" | |||
An object that behaves like a number but means a field does not exist; It is | |||
always greater than any real number. | |||
""" | |||
@@ -64,15 +65,18 @@ class OprProfRst: | |||
"""A dict containing operator info: name, id and type.""" | |||
time_dict = None | |||
"""A mapping from ``"host"`` or ``"device"`` to list of profiling | |||
""" | |||
A mapping from ``"host"`` or ``"device"`` to list of profiling | |||
results.""" | |||
footprint = None | |||
"""A mapping from ``"memory"`` or ``"computation"`` to the actual number | |||
""" | |||
A mapping from ``"memory"`` or ``"computation"`` to the actual number | |||
of corresponding operations.""" | |||
def __init__(self, entry: dict): | |||
"""Opr profiling initialization, which sets up name, type and id of opr_info. | |||
""" | |||
Opr profiling initialization, which sets up name, type and id of opr_info. | |||
:param entry: profiling json exec_graph items. | |||
""" | |||
@@ -84,7 +88,8 @@ class OprProfRst: | |||
self.footprint = collections.defaultdict(NonExistNum) | |||
def update_device_prof_info(self, dev_time: dict): | |||
"""Updates device profiling info. | |||
""" | |||
Updates device profiling info. | |||
:param dev_time: device time for single opr, | |||
is an attribute of profiling result. | |||
@@ -93,7 +98,8 @@ class OprProfRst: | |||
self.time_dict["device"].append(copy.deepcopy(dev_time)) | |||
def update_host_prof_info(self, host_time: dict): | |||
"""Updates host profiling info. | |||
""" | |||
Updates host profiling info. | |||
:param host_time: host time for single opr, | |||
is an attribute of profiling result. | |||
@@ -102,7 +108,8 @@ class OprProfRst: | |||
self.time_dict["host"].append(copy.deepcopy(host_time)) | |||
def update_footprint(self, footprint: dict): | |||
"""Updates opr footprint. | |||
""" | |||
Updates opr footprint. | |||
:param footprint: footprint for single opr, | |||
is an attribute of profiling result. | |||
@@ -128,7 +135,8 @@ class Record: | |||
] | |||
def __init__(self, time: float, info: dict, footprint: dict): | |||
"""Initializes single record. | |||
""" | |||
Initializes single record. | |||
:param time: opr running time, evaluated by applying users providing | |||
function to OprProfRst. | |||
@@ -153,7 +161,8 @@ class Record: | |||
self.opr_id = int(self.opr_id) | |||
def get_column_by_name(self, name: str = None): | |||
"""Extracts column value by its column name. | |||
""" | |||
Extracts column value by its column name. | |||
:param name: column name, None for time. | |||
""" | |||
@@ -165,7 +174,8 @@ class Record: | |||
class ProfileAnalyzer: | |||
def __init__(self, obj: dict, opr_filter: Callable = lambda opr, inp, out: True): | |||
"""Initializes ProfileAnalyzer. | |||
""" | |||
Initializes ProfileAnalyzer. | |||
:param obj: dict dumped from json str. | |||
:param opr_filter: function that filter oprs. | |||
@@ -202,7 +212,8 @@ class ProfileAnalyzer: | |||
def _aggregate( | |||
self, records: List[Record], aop: Union[str, Callable], atype: Optional[str] | |||
) -> List[Record]: | |||
"""Aggregate operation. | |||
""" | |||
Aggregate operation. | |||
:param records: selected records. | |||
:param aop: aggregate operation, if aop is str, we would replace it | |||
@@ -247,7 +258,8 @@ class ProfileAnalyzer: | |||
return rst | |||
def _sort(self, records: List[Record], sort_by: str) -> List[Record]: | |||
"""Sort operation. | |||
""" | |||
Sort operation. | |||
:param records: the records after aggregate operation. | |||
:param sort_by: keyword for sorting the list. | |||
@@ -271,7 +283,8 @@ class ProfileAnalyzer: | |||
sort_by: str = None, | |||
top_k: int = 0, | |||
) -> List[Record]: | |||
"""Select operation. | |||
""" | |||
Select operation. | |||
:param time_func: time_func provided by user, would apply to every | |||
OprProfRst. | |||
@@ -304,7 +317,8 @@ class TimeFuncHelper: | |||
@staticmethod | |||
def _eval_time(prof_type, end_key, func, opr_prof): | |||
"""Eval time. | |||
""" | |||
Eval time. | |||
:type prof_type: str | |||
:param prof_type: 'host' or 'device'. | |||
@@ -325,7 +339,8 @@ class TimeFuncHelper: | |||
@staticmethod | |||
def eval_time_func(prof_type: str, end_key: str, func: Callable) -> float: | |||
"""Eval oprerator profile time. | |||
""" | |||
Eval oprerator profile time. | |||
:param prof_type: 'host' or 'device'. | |||
:param end_key: 'kern' or 'end'. | |||
@@ -338,7 +353,8 @@ class TimeFuncHelper: | |||
def _min_start( | |||
prof_type, end_key, func, opr_prof | |||
): # pylint: disable=unused-argument | |||
"""Eval minimum start time. | |||
""" | |||
Eval minimum start time. | |||
:type prof_type: str | |||
:param prof_type: 'host' or 'device'. | |||
@@ -360,7 +376,8 @@ class TimeFuncHelper: | |||
def min_start_func( | |||
prof_type: str, end_key: str, func: Callable | |||
) -> float: # pylint: disable=unused-argument | |||
"""Eval oprerator profile min start time. | |||
""" | |||
Eval oprerator profile min start time. | |||
:param prof_type: 'host' or 'device'. | |||
:param end_key: 'kern' or 'end'. | |||
@@ -371,7 +388,8 @@ class TimeFuncHelper: | |||
@staticmethod | |||
def _max_end(prof_type, end_key, func, opr_prof): # pylint: disable=unused-argument | |||
"""Eval maximum end time | |||
""" | |||
Eval maximum end time | |||
:type prof_type: str | |||
:param prof_type: 'host' or 'device'. | |||
@@ -391,7 +409,8 @@ class TimeFuncHelper: | |||
@staticmethod | |||
def max_end_func(prof_type: str, end_key: str, func: Callable) -> float: | |||
"""Eval oprerator profile max end time. | |||
""" | |||
Eval oprerator profile max end time. | |||
:param prof_type: 'host' or 'device'. | |||
:param end_key: 'kern' or 'end'. | |||
@@ -169,7 +169,7 @@ class Profiler: | |||
Examples: | |||
.. testcode:: | |||
.. code-block:: | |||
import megengine as mge | |||
import megengine.module as M | |||
@@ -3,7 +3,8 @@ from ..core._imperative_rt.imperative import sync | |||
class TensorSanityCheck: | |||
r"""An object that checks whether the input tensors of each operator have changed before and after the operation. | |||
r""" | |||
An object that checks whether the input tensors of each operator have changed before and after the operation. | |||
Examples: | |||
@@ -11,7 +11,8 @@ import functools | |||
def get_ndtuple(value, *, n, allow_zero=True): | |||
r"""Converts possibly 1D tuple to nd tuple. | |||
r""" | |||
Converts possibly 1D tuple to nd tuple. | |||
:type allow_zero: bool | |||
:param allow_zero: whether to allow zero tuple value.""" | |||
@@ -1,12 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
# | |||
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
# | |||
# Unless required by applicable law or agreed to in writing, | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
[pytest] | |||
markers = | |||
isolated_distributed: marks distributed tests that should runs without cuda use | |||
in main thread (deselect with '-m "not "isolated_distributed"') |
@@ -1,6 +1,6 @@ | |||
#!/bin/bash -e | |||
test_dirs="test megengine" | |||
test_dirs="megengine test" | |||
TEST_PLAT=$1 | |||