@@ -17,7 +17,8 @@ def get_backwarding_grad_manager(): | |||||
class GradManager: | class GradManager: | ||||
r"""GradManager manages auto differentiation and all resources required to perform it. | |||||
r""" | |||||
GradManager manages auto differentiation and all resources required to perform it. | |||||
Our auto differentiation framework requires that the user explicitly indicates when | Our auto differentiation framework requires that the user explicitly indicates when | ||||
the forward operations start and when all resources should be released. A typical usage of | the forward operations start and when all resources should be released. A typical usage of | ||||
@@ -71,7 +72,8 @@ class GradManager: | |||||
self._gradients = dict() | self._gradients = dict() | ||||
def attach(self, params: list, callbacks=None): | def attach(self, params: list, callbacks=None): | ||||
r"""Registers parameters that gradients should be calculated with respect to. | |||||
r""" | |||||
Registers parameters that gradients should be calculated with respect to. | |||||
Callback Functions should have a signature like this: | Callback Functions should have a signature like this: | ||||
.. code-block:: | .. code-block:: | ||||
@@ -99,7 +101,8 @@ class GradManager: | |||||
return self | return self | ||||
def detach(self, params: list): | def detach(self, params: list): | ||||
r"""Remove specific registered parameters and callback functions. | |||||
r""" | |||||
Remove specific registered parameters and callback functions. | |||||
:param params: registered parameters | :param params: registered parameters | ||||
""" | """ | ||||
@@ -125,7 +128,8 @@ class GradManager: | |||||
return self | return self | ||||
def backward(self, ys, dys=None): | def backward(self, ys, dys=None): | ||||
r"""Performs back-propagation and computes gradients. | |||||
r""" | |||||
Performs back-propagation and computes gradients. | |||||
:param ys: outputs of forward operators, e.g., the loss tensor | :param ys: outputs of forward operators, e.g., the loss tensor | ||||
:param dys: derivatives of ys | :param dys: derivatives of ys | ||||
@@ -165,7 +169,8 @@ class GradManager: | |||||
backwarding_grad_manager = cache | backwarding_grad_manager = cache | ||||
def record(self): | def record(self): | ||||
r"""Starts recording forward operations. | |||||
r""" | |||||
Starts recording forward operations. | |||||
""" | """ | ||||
if self._recording: | if self._recording: | ||||
raise RuntimeError("already recording") | raise RuntimeError("already recording") | ||||
@@ -190,7 +195,8 @@ class GradManager: | |||||
self._grad.wrt(param_wrapper, callback=callback) | self._grad.wrt(param_wrapper, callback=callback) | ||||
def release(self): | def release(self): | ||||
r"""Stops recording and releases resources for gradients calculation. | |||||
r""" | |||||
Stops recording and releases resources for gradients calculation. | |||||
""" | """ | ||||
if self._grad is not None: | if self._grad is not None: | ||||
self._grad.__exit__(None, None, None) | self._grad.__exit__(None, None, None) | ||||
@@ -15,7 +15,8 @@ if os.environ.get("MEGENGINE_USE_SYMBOLIC_SHAPE"): | |||||
def use_symbolic_shape() -> bool: | def use_symbolic_shape() -> bool: | ||||
"""Returns whether tensor.shape returns a tensor instead of a tuple | |||||
""" | |||||
Returns whether tensor.shape returns a tensor instead of a tuple | |||||
""" | """ | ||||
return _use_symbolic_shape | return _use_symbolic_shape | ||||
@@ -78,7 +78,8 @@ class auto: | |||||
class _EnumDict(dict): | class _EnumDict(dict): | ||||
"""Track enum member order and ensure member names are not reused. | |||||
""" | |||||
Track enum member order and ensure member names are not reused. | |||||
EnumMeta will use the names found in self._member_names as the | EnumMeta will use the names found in self._member_names as the | ||||
enumeration member names. | enumeration member names. | ||||
@@ -91,7 +92,8 @@ class _EnumDict(dict): | |||||
self._last_values = [] | self._last_values = [] | ||||
def __setitem__(self, key, value): | def __setitem__(self, key, value): | ||||
"""Changes anything not dundered or not a descriptor. | |||||
""" | |||||
Changes anything not dundered or not a descriptor. | |||||
If an enum member name is used twice, an error is raised; duplicate | If an enum member name is used twice, an error is raised; duplicate | ||||
values are not checked for. | values are not checked for. | ||||
@@ -303,7 +305,8 @@ class EnumMeta(type): | |||||
def __call__( | def __call__( | ||||
cls, value, names=None, *, module=None, qualname=None, type=None, start=1 | cls, value, names=None, *, module=None, qualname=None, type=None, start=1 | ||||
): | ): | ||||
"""Either returns an existing member, or creates a new enum class. | |||||
""" | |||||
Either returns an existing member, or creates a new enum class. | |||||
This method is used both when an enum class is given a value to match | This method is used both when an enum class is given a value to match | ||||
to an enumeration member (i.e. Color(3)) and for the functional API | to an enumeration member (i.e. Color(3)) and for the functional API | ||||
@@ -353,7 +356,8 @@ class EnumMeta(type): | |||||
] + self._member_names_ | ] + self._member_names_ | ||||
def __getattr__(cls, name): | def __getattr__(cls, name): | ||||
"""Return the enum member matching `name` | |||||
""" | |||||
Return the enum member matching `name` | |||||
We use __getattr__ instead of descriptors or inserting into the enum | We use __getattr__ instead of descriptors or inserting into the enum | ||||
class' __dict__ in order to support `name` and `value` being both | class' __dict__ in order to support `name` and `value` being both | ||||
@@ -379,7 +383,8 @@ class EnumMeta(type): | |||||
@property | @property | ||||
def __members__(cls): | def __members__(cls): | ||||
"""Returns a mapping of member name->value. | |||||
""" | |||||
Returns a mapping of member name->value. | |||||
This mapping lists all enum members, including aliases. Note that this | This mapping lists all enum members, including aliases. Note that this | ||||
is a read-only view of the internal mapping. | is a read-only view of the internal mapping. | ||||
@@ -394,7 +399,8 @@ class EnumMeta(type): | |||||
return (cls._member_map_[name] for name in reversed(cls._member_names_)) | return (cls._member_map_[name] for name in reversed(cls._member_names_)) | ||||
def __setattr__(cls, name, value): | def __setattr__(cls, name, value): | ||||
"""Block attempts to reassign Enum members. | |||||
""" | |||||
Block attempts to reassign Enum members. | |||||
A simple assignment to the class namespace only changes one of the | A simple assignment to the class namespace only changes one of the | ||||
several possible ways to get an Enum member from the Enum class, | several possible ways to get an Enum member from the Enum class, | ||||
@@ -409,7 +415,8 @@ class EnumMeta(type): | |||||
def _create_( | def _create_( | ||||
cls, class_name, names=None, *, module=None, qualname=None, type=None, start=1 | cls, class_name, names=None, *, module=None, qualname=None, type=None, start=1 | ||||
): | ): | ||||
"""Convenience method to create a new Enum class. | |||||
""" | |||||
Convenience method to create a new Enum class. | |||||
`names` can be: | `names` can be: | ||||
@@ -465,7 +472,8 @@ class EnumMeta(type): | |||||
@staticmethod | @staticmethod | ||||
def _get_mixins_(bases): | def _get_mixins_(bases): | ||||
"""Returns the type for creating enum members, and the first inherited | |||||
""" | |||||
Returns the type for creating enum members, and the first inherited | |||||
enum class. | enum class. | ||||
bases: the tuple of bases that was given to __new__ | bases: the tuple of bases that was given to __new__ | ||||
@@ -510,7 +518,8 @@ class EnumMeta(type): | |||||
@staticmethod | @staticmethod | ||||
def _find_new_(classdict, member_type, first_enum): | def _find_new_(classdict, member_type, first_enum): | ||||
"""Returns the __new__ to be used for creating the enum members. | |||||
""" | |||||
Returns the __new__ to be used for creating the enum members. | |||||
classdict: the class dictionary given to __new__ | classdict: the class dictionary given to __new__ | ||||
member_type: the data type whose __new__ will be used by default | member_type: the data type whose __new__ will be used by default | ||||
@@ -556,7 +565,8 @@ class EnumMeta(type): | |||||
class Enum(metaclass=EnumMeta): | class Enum(metaclass=EnumMeta): | ||||
"""Generic enumeration. | |||||
""" | |||||
Generic enumeration. | |||||
Derive from this class to define new enumerations. | Derive from this class to define new enumerations. | ||||
@@ -188,7 +188,8 @@ class OpNode: | |||||
def optimize_for_inference(dest_vars, **kwargs): | def optimize_for_inference(dest_vars, **kwargs): | ||||
r"""Applies optimize_for_inference pass for computing graph. | |||||
r""" | |||||
Applies optimize_for_inference pass for computing graph. | |||||
:param dest_vars: list of output vars in the computing graph | :param dest_vars: list of output vars in the computing graph | ||||
@@ -287,7 +288,8 @@ def dump_graph( | |||||
strip_info_file=None, | strip_info_file=None, | ||||
append_json=False | append_json=False | ||||
): | ): | ||||
"""serialize the computing graph of `output_vars` and get byte result. | |||||
""" | |||||
serialize the computing graph of `output_vars` and get byte result. | |||||
:param output_vars: output variables which are the graph's end point. | :param output_vars: output variables which are the graph's end point. | ||||
@@ -385,7 +387,8 @@ CompGraphLoadResult = collections.namedtuple( | |||||
def load_graph(fpath): | def load_graph(fpath): | ||||
"""Load a serialized computing graph from file. | |||||
""" | |||||
Load a serialized computing graph from file. | |||||
:param fpath: Path or Handle of the input file | :param fpath: Path or Handle of the input file | ||||
:return: An instance of namedtuple :class:`CompGraphLoadResult`, | :return: An instance of namedtuple :class:`CompGraphLoadResult`, | ||||
@@ -69,7 +69,8 @@ def ambiguity_warn(dispatcher, ambiguities): | |||||
def variadic_signature_matches_iter(types, full_signature): | def variadic_signature_matches_iter(types, full_signature): | ||||
"""Check if a set of input types matches a variadic signature. | |||||
""" | |||||
Check if a set of input types matches a variadic signature. | |||||
Notes | Notes | ||||
----- | ----- | ||||
@@ -288,7 +289,8 @@ class Dispatcher(CDispatcher): | |||||
__repr__ = __str__ | __repr__ = __str__ | ||||
def dispatch(self, *types): | def dispatch(self, *types): | ||||
"""Deterimine appropriate implementation for this type signature | |||||
""" | |||||
Deterimine appropriate implementation for this type signature | |||||
This method is internal. Users should call this object as a function. | This method is internal. Users should call this object as a function. | ||||
Implementation resolution occurs within the ``__call__`` method. | Implementation resolution occurs within the ``__call__`` method. | ||||
@@ -110,7 +110,8 @@ def _toposort(edges): | |||||
def reverse_dict(d): | def reverse_dict(d): | ||||
"""Reverses direction of dependence dict | |||||
""" | |||||
Reverses direction of dependence dict | |||||
>>> d = {'a': (1, 2), 'b': (2, 3), 'c':()} | >>> d = {'a': (1, 2), 'b': (2, 3), 'c':()} | ||||
>>> reverse_dict(d) # doctest: +SKIP | >>> reverse_dict(d) # doctest: +SKIP | ||||
@@ -156,7 +157,8 @@ def groupby(func, seq): | |||||
def typename(type): | def typename(type): | ||||
"""Get the name of `type`. | |||||
""" | |||||
Get the name of `type`. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -72,7 +72,8 @@ class VariadicSignatureType(type): | |||||
def isvariadic(obj): | def isvariadic(obj): | ||||
"""Check whether the type `obj` is variadic. | |||||
""" | |||||
Check whether the type `obj` is variadic. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -95,7 +96,8 @@ def isvariadic(obj): | |||||
class VariadicSignatureMeta(type): | class VariadicSignatureMeta(type): | ||||
"""A metaclass that overrides ``__getitem__`` on the class. This is used to | |||||
""" | |||||
A metaclass that overrides ``__getitem__`` on the class. This is used to | |||||
generate a new type for Variadic signatures. See the Variadic class for | generate a new type for Variadic signatures. See the Variadic class for | ||||
examples of how this behaves. | examples of how this behaves. | ||||
""" | """ | ||||
@@ -117,7 +119,8 @@ class VariadicSignatureMeta(type): | |||||
class Variadic(metaclass=VariadicSignatureMeta): | class Variadic(metaclass=VariadicSignatureMeta): | ||||
"""A class whose getitem method can be used to generate a new type | |||||
""" | |||||
A class whose getitem method can be used to generate a new type | |||||
representing a specific variadic signature. | representing a specific variadic signature. | ||||
Examples | Examples | ||||
@@ -389,7 +389,8 @@ class ArrayMethodMixin(abc.ABC): | |||||
return self.reshape(-1) | return self.reshape(-1) | ||||
def sum(self, axis=None, keepdims: bool = False): | def sum(self, axis=None, keepdims: bool = False): | ||||
r"""Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||||
r""" | |||||
Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||||
If ``axis`` is a list of axises, reduce over all of them. | If ``axis`` is a list of axises, reduce over all of them. | ||||
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`). | If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`). | ||||
@@ -59,7 +59,8 @@ class _PlasmaStoreManager: | |||||
class PlasmaShmQueue: | class PlasmaShmQueue: | ||||
def __init__(self, maxsize: int = 0): | def __init__(self, maxsize: int = 0): | ||||
r"""Use pyarrow in-memory plasma store to implement shared memory queue. | |||||
r""" | |||||
Use pyarrow in-memory plasma store to implement shared memory queue. | |||||
Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle | Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle | ||||
and communication overhead, leading to better performance in multi-process | and communication overhead, leading to better performance in multi-process | ||||
@@ -42,7 +42,8 @@ class DataLoader: | |||||
timeout: int = 0, | timeout: int = 0, | ||||
divide: bool = False, | divide: bool = False, | ||||
): | ): | ||||
r"""Provides a convenient way to iterate on a given dataset. | |||||
r""" | |||||
Provides a convenient way to iterate on a given dataset. | |||||
`DataLoader` combines a dataset with `sampler`, `transform` and `collator`, | `DataLoader` combines a dataset with `sampler`, `transform` and `collator`, | ||||
make it flexible to get minibatch continually from a dataset. | make it flexible to get minibatch continually from a dataset. | ||||
@@ -23,7 +23,8 @@ from .meta_vision import VisionDataset | |||||
class Cityscapes(VisionDataset): | class Cityscapes(VisionDataset): | ||||
r"""`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||||
r""" | |||||
`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset. | |||||
""" | """ | ||||
supported_order = ( | supported_order = ( | ||||
@@ -46,7 +46,8 @@ def has_valid_annotation(anno, order): | |||||
class COCO(VisionDataset): | class COCO(VisionDataset): | ||||
r"""`MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||||
r""" | |||||
`MS COCO <http://cocodataset.org/#home>`_ Dataset. | |||||
""" | """ | ||||
supported_order = ( | supported_order = ( | ||||
@@ -23,7 +23,8 @@ from .meta_vision import VisionDataset | |||||
class Objects365(VisionDataset): | class Objects365(VisionDataset): | ||||
r"""`Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||||
r""" | |||||
`Objects365 <https://www.objects365.org/overview.html>`_ Dataset. | |||||
""" | """ | ||||
supported_order = ( | supported_order = ( | ||||
@@ -24,7 +24,8 @@ from .meta_vision import VisionDataset | |||||
class PascalVOC(VisionDataset): | class PascalVOC(VisionDataset): | ||||
r"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||||
r""" | |||||
`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset. | |||||
""" | """ | ||||
supported_order = ( | supported_order = ( | ||||
@@ -154,7 +154,8 @@ class VisionTransform(Transform): | |||||
class ToMode(VisionTransform): | class ToMode(VisionTransform): | ||||
r"""Change input data to a target mode. | |||||
r""" | |||||
Change input data to a target mode. | |||||
For example, most transforms use HWC mode image, | For example, most transforms use HWC mode image, | ||||
while the neural network might use CHW mode input tensor. | while the neural network might use CHW mode input tensor. | ||||
@@ -301,7 +302,8 @@ class TorchTransformCompose(VisionTransform): | |||||
class Pad(VisionTransform): | class Pad(VisionTransform): | ||||
r"""Pad the input data. | |||||
r""" | |||||
Pad the input data. | |||||
:param size: padding size of input image, it could be integer or sequence. | :param size: padding size of input image, it could be integer or sequence. | ||||
If it is an integer, the input image will be padded in four directions. | If it is an integer, the input image will be padded in four directions. | ||||
@@ -348,7 +350,8 @@ class Pad(VisionTransform): | |||||
class Resize(VisionTransform): | class Resize(VisionTransform): | ||||
r"""Resize the input data. | |||||
r""" | |||||
Resize the input data. | |||||
:param output_size: target size of image, with (height, width) shape. | :param output_size: target size of image, with (height, width) shape. | ||||
:param interpolation: interpolation method. All methods are listed below: | :param interpolation: interpolation method. All methods are listed below: | ||||
@@ -474,7 +477,8 @@ class ShortestEdgeResize(VisionTransform): | |||||
class RandomResize(VisionTransform): | class RandomResize(VisionTransform): | ||||
r"""Resize the input data randomly. | |||||
r""" | |||||
Resize the input data randomly. | |||||
:param scale_range: range of scaling. | :param scale_range: range of scaling. | ||||
:param order: the same with :class:`VisionTransform`. | :param order: the same with :class:`VisionTransform`. | ||||
@@ -518,7 +522,8 @@ class RandomResize(VisionTransform): | |||||
class RandomCrop(VisionTransform): | class RandomCrop(VisionTransform): | ||||
r"""Crop the input data randomly. Before applying the crop transform, | |||||
r""" | |||||
Crop the input data randomly. Before applying the crop transform, | |||||
pad the image first. If target size is still bigger than the size of | pad the image first. If target size is still bigger than the size of | ||||
padded image, pad the image size to target size. | padded image, pad the image size to target size. | ||||
@@ -575,7 +580,8 @@ class RandomCrop(VisionTransform): | |||||
class RandomResizedCrop(VisionTransform): | class RandomResizedCrop(VisionTransform): | ||||
r"""Crop the input data to random size and aspect ratio. | |||||
r""" | |||||
Crop the input data to random size and aspect ratio. | |||||
A crop of random size (default: of 0.08 to 1.0) of the original size and a random | A crop of random size (default: of 0.08 to 1.0) of the original size and a random | ||||
aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | ||||
After applying crop transfrom, the input data will be resized to given size. | After applying crop transfrom, the input data will be resized to given size. | ||||
@@ -664,7 +670,8 @@ class RandomResizedCrop(VisionTransform): | |||||
class CenterCrop(VisionTransform): | class CenterCrop(VisionTransform): | ||||
r"""Crops the given the input data at the center. | |||||
r""" | |||||
Crops the given the input data at the center. | |||||
:param output_size: target size of output image, with (height, width) shape. | :param output_size: target size of output image, with (height, width) shape. | ||||
:param order: the same with :class:`VisionTransform`. | :param order: the same with :class:`VisionTransform`. | ||||
@@ -707,7 +714,8 @@ class CenterCrop(VisionTransform): | |||||
class RandomHorizontalFlip(VisionTransform): | class RandomHorizontalFlip(VisionTransform): | ||||
r"""Horizontally flip the input data randomly with a given probability. | |||||
r""" | |||||
Horizontally flip the input data randomly with a given probability. | |||||
:param p: probability of the input data being flipped. Default: 0.5 | :param p: probability of the input data being flipped. Default: 0.5 | ||||
:param order: the same with :class:`VisionTransform`. | :param order: the same with :class:`VisionTransform`. | ||||
@@ -739,7 +747,8 @@ class RandomHorizontalFlip(VisionTransform): | |||||
class RandomVerticalFlip(VisionTransform): | class RandomVerticalFlip(VisionTransform): | ||||
r"""Vertically flip the input data randomly with a given probability. | |||||
r""" | |||||
Vertically flip the input data randomly with a given probability. | |||||
:param p: probability of the input data being flipped. Default: 0.5 | :param p: probability of the input data being flipped. Default: 0.5 | ||||
:param order: the same with :class:`VisionTransform`. | :param order: the same with :class:`VisionTransform`. | ||||
@@ -771,7 +780,8 @@ class RandomVerticalFlip(VisionTransform): | |||||
class Normalize(VisionTransform): | class Normalize(VisionTransform): | ||||
r"""Normalize the input data with mean and standard deviation. | |||||
r""" | |||||
Normalize the input data with mean and standard deviation. | |||||
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, | Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, | ||||
this transform will normalize each channel of the input data. | this transform will normalize each channel of the input data. | ||||
``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | ||||
@@ -797,7 +807,8 @@ class Normalize(VisionTransform): | |||||
class GaussianNoise(VisionTransform): | class GaussianNoise(VisionTransform): | ||||
r"""Add random gaussian noise to the input data. | |||||
r""" | |||||
Add random gaussian noise to the input data. | |||||
Gaussian noise is generated with given mean and std. | Gaussian noise is generated with given mean and std. | ||||
:param mean: Gaussian mean used to generate noise. | :param mean: Gaussian mean used to generate noise. | ||||
@@ -824,7 +835,8 @@ class GaussianNoise(VisionTransform): | |||||
class BrightnessTransform(VisionTransform): | class BrightnessTransform(VisionTransform): | ||||
r"""Adjust brightness of the input data. | |||||
r""" | |||||
Adjust brightness of the input data. | |||||
:param value: how much to adjust the brightness. Can be any | :param value: how much to adjust the brightness. Can be any | ||||
non negative number. 0 gives the original image. | non negative number. 0 gives the original image. | ||||
@@ -855,7 +867,8 @@ class BrightnessTransform(VisionTransform): | |||||
class ContrastTransform(VisionTransform): | class ContrastTransform(VisionTransform): | ||||
r"""Adjust contrast of the input data. | |||||
r""" | |||||
Adjust contrast of the input data. | |||||
:param value: how much to adjust the contrast. Can be any | :param value: how much to adjust the contrast. Can be any | ||||
non negative number. 0 gives the original image. | non negative number. 0 gives the original image. | ||||
@@ -886,7 +899,8 @@ class ContrastTransform(VisionTransform): | |||||
class SaturationTransform(VisionTransform): | class SaturationTransform(VisionTransform): | ||||
r"""Adjust saturation of the input data. | |||||
r""" | |||||
Adjust saturation of the input data. | |||||
:param value: how much to adjust the saturation. Can be any | :param value: how much to adjust the saturation. Can be any | ||||
non negative number. 0 gives the original image. | non negative number. 0 gives the original image. | ||||
@@ -917,7 +931,8 @@ class SaturationTransform(VisionTransform): | |||||
class HueTransform(VisionTransform): | class HueTransform(VisionTransform): | ||||
r"""Adjust hue of the input data. | |||||
r""" | |||||
Adjust hue of the input data. | |||||
:param value: how much to adjust the hue. Can be any number | :param value: how much to adjust the hue. Can be any number | ||||
between 0 and 0.5, 0 gives the original image. | between 0 and 0.5, 0 gives the original image. | ||||
@@ -955,7 +970,8 @@ class HueTransform(VisionTransform): | |||||
class ColorJitter(VisionTransform): | class ColorJitter(VisionTransform): | ||||
r"""Randomly change the brightness, contrast, saturation and hue of an image. | |||||
r""" | |||||
Randomly change the brightness, contrast, saturation and hue of an image. | |||||
:param brightness: how much to jitter brightness. | :param brightness: how much to jitter brightness. | ||||
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | ||||
@@ -40,7 +40,8 @@ def _str2device_type(type_str: str, allow_unspec: bool = True): | |||||
def get_device_count(device_type: str) -> int: | def get_device_count(device_type: str) -> int: | ||||
"""Gets number of devices installed on this system. | |||||
""" | |||||
Gets number of devices installed on this system. | |||||
:param device_type: device type, one of 'gpu' or 'cpu' | :param device_type: device type, one of 'gpu' or 'cpu' | ||||
""" | """ | ||||
@@ -54,7 +55,8 @@ def get_device_count(device_type: str) -> int: | |||||
def is_cuda_available() -> bool: | def is_cuda_available() -> bool: | ||||
"""Returns whether cuda device is available on this system. | |||||
""" | |||||
Returns whether cuda device is available on this system. | |||||
""" | """ | ||||
t = _str2device_type("gpu") | t = _str2device_type("gpu") | ||||
@@ -62,7 +64,8 @@ def is_cuda_available() -> bool: | |||||
def set_default_device(device: str = "xpux"): | def set_default_device(device: str = "xpux"): | ||||
r"""Sets default computing node. | |||||
r""" | |||||
Sets default computing node. | |||||
:param device: default device type. The type can be 'cpu0', 'cpu1', etc., | :param device: default device type. The type can be 'cpu0', 'cpu1', etc., | ||||
or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | ||||
@@ -81,7 +84,8 @@ def set_default_device(device: str = "xpux"): | |||||
def get_default_device() -> str: | def get_default_device() -> str: | ||||
r"""Gets default computing node. | |||||
r""" | |||||
Gets default computing node. | |||||
It returns the value set by :func:`~.set_default_device`. | It returns the value set by :func:`~.set_default_device`. | ||||
""" | """ | ||||
@@ -98,7 +102,8 @@ def set_prealloc_config( | |||||
growth_factor=2.0, | growth_factor=2.0, | ||||
device_type=DeviceType.CUDA, | device_type=DeviceType.CUDA, | ||||
): | ): | ||||
"""Specifies how to pre-allocate from raw device allocator. | |||||
""" | |||||
Specifies how to pre-allocate from raw device allocator. | |||||
:param alignment: specifies the alignment in bytes. | :param alignment: specifies the alignment in bytes. | ||||
:param min_req: min request size in bytes. | :param min_req: min request size in bytes. | ||||
@@ -123,7 +123,8 @@ def collective_comm(inp, mode, group, device): | |||||
def reduce_sum( | def reduce_sum( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create reduce_sum operator for collective communication. | |||||
""" | |||||
Create reduce_sum operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -136,7 +137,8 @@ def reduce_sum( | |||||
def broadcast( | def broadcast( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create broadcast operator for collective communication. | |||||
""" | |||||
Create broadcast operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -149,7 +151,8 @@ def broadcast( | |||||
def all_gather( | def all_gather( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create all_gather operator for collective communication. | |||||
""" | |||||
Create all_gather operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -162,7 +165,8 @@ def all_gather( | |||||
def reduce_scatter_sum( | def reduce_scatter_sum( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create reduce_scatter_sum operator for collective communication. | |||||
""" | |||||
Create reduce_scatter_sum operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -175,7 +179,8 @@ def reduce_scatter_sum( | |||||
def all_reduce_sum( | def all_reduce_sum( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create all_reduce_sum operator for collective communication. | |||||
""" | |||||
Create all_reduce_sum operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -188,7 +193,8 @@ def all_reduce_sum( | |||||
def all_reduce_max( | def all_reduce_max( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create all_reduce_max operator for collective communication. | |||||
""" | |||||
Create all_reduce_max operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -201,7 +207,8 @@ def all_reduce_max( | |||||
def all_reduce_min( | def all_reduce_min( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create all_reduce_min operator for collective communication. | |||||
""" | |||||
Create all_reduce_min operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -214,7 +221,8 @@ def all_reduce_min( | |||||
def gather( | def gather( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create gather operator for collective communication. | |||||
""" | |||||
Create gather operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -227,7 +235,8 @@ def gather( | |||||
def scatter( | def scatter( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create scatter operator for collective communication. | |||||
""" | |||||
Create scatter operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -240,7 +249,8 @@ def scatter( | |||||
def all_to_all( | def all_to_all( | ||||
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Create all_to_all operator for collective communication. | |||||
""" | |||||
Create all_to_all operator for collective communication. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -251,7 +261,8 @@ def all_to_all( | |||||
def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | ||||
"""Send a Tensor to a remote process. | |||||
""" | |||||
Send a Tensor to a remote process. | |||||
:param inp: tensor to send. | :param inp: tensor to send. | ||||
:param dest_rank: destination process rank. | :param dest_rank: destination process rank. | ||||
@@ -266,7 +277,8 @@ def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | |||||
def remote_recv( | def remote_recv( | ||||
src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None | src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Receive a Tensor from a remote process. | |||||
""" | |||||
Receive a Tensor from a remote process. | |||||
:param src_rank: source process rank. | :param src_rank: source process rank. | ||||
:param shape: the shape of the tensor to receive. | :param shape: the shape of the tensor to receive. | ||||
@@ -81,7 +81,8 @@ def init_process_group( | |||||
device: int, | device: int, | ||||
backend: Optional[str] = "nccl", | backend: Optional[str] = "nccl", | ||||
) -> None: | ) -> None: | ||||
"""Initialize the distributed process group and specify the device used in the current process | |||||
""" | |||||
Initialize the distributed process group and specify the device used in the current process | |||||
:param master_ip: ip address of the master node. | :param master_ip: ip address of the master node. | ||||
:param port: port available for all processes to communicate. | :param port: port available for all processes to communicate. | ||||
@@ -140,7 +140,8 @@ class TensorFuture(Future): | |||||
def synchronized(func: Callable): | def synchronized(func: Callable): | ||||
"""Decorator. Decorated function will synchronize when finished. | |||||
""" | |||||
Decorator. Decorated function will synchronize when finished. | |||||
Specifically, we use this to prevent data race during hub.load""" | Specifically, we use this to prevent data race during hub.load""" | ||||
@functools.wraps(func) | @functools.wraps(func) | ||||
@@ -161,7 +162,8 @@ def _get_device_count_worker(queue, device_type): | |||||
def get_device_count_by_fork(device_type: str): | def get_device_count_by_fork(device_type: str): | ||||
"""Get device count in fork thread. | |||||
""" | |||||
Get device count in fork thread. | |||||
See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork | See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork | ||||
for more information. | for more information. | ||||
""" | """ | ||||
@@ -173,7 +175,8 @@ def get_device_count_by_fork(device_type: str): | |||||
def bcast_list_(inps: list, group: Group = WORLD): | def bcast_list_(inps: list, group: Group = WORLD): | ||||
"""Broadcast tensors between given group. | |||||
""" | |||||
Broadcast tensors between given group. | |||||
:param inps: input tensors. | :param inps: input tensors. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -183,7 +186,8 @@ def bcast_list_(inps: list, group: Group = WORLD): | |||||
class AllreduceCallback: | class AllreduceCallback: | ||||
"""Allreduce Callback with tensor fusion optimization. | |||||
""" | |||||
Allreduce Callback with tensor fusion optimization. | |||||
:param reduce_method: the method to reduce gradiants. | :param reduce_method: the method to reduce gradiants. | ||||
:param group: communication group. | :param group: communication group. | ||||
@@ -21,7 +21,8 @@ from .util import get_free_ports | |||||
class Methods: | class Methods: | ||||
"""Distributed Server Method. | |||||
""" | |||||
Distributed Server Method. | |||||
Used for exchange information between distributed nodes. | Used for exchange information between distributed nodes. | ||||
:param mm_server_port: multiple machine rpc server port. | :param mm_server_port: multiple machine rpc server port. | ||||
@@ -45,7 +46,8 @@ class Methods: | |||||
return self.mm_server_port | return self.mm_server_port | ||||
def set_is_grad(self, key, is_grad): | def set_is_grad(self, key, is_grad): | ||||
"""Mark send/recv need gradiants by key. | |||||
""" | |||||
Mark send/recv need gradiants by key. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
:param is_grad: whether this op need grad. | :param is_grad: whether this op need grad. | ||||
@@ -56,7 +58,8 @@ class Methods: | |||||
return True | return True | ||||
def check_is_grad(self, key): | def check_is_grad(self, key): | ||||
"""Check whether send/recv need gradiants. | |||||
""" | |||||
Check whether send/recv need gradiants. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
""" | """ | ||||
@@ -68,7 +71,8 @@ class Methods: | |||||
return ret | return ret | ||||
def set_remote_tracer(self, key, tracer_set): | def set_remote_tracer(self, key, tracer_set): | ||||
"""Set tracer dict for tracing send/recv op. | |||||
""" | |||||
Set tracer dict for tracing send/recv op. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
:param tracer_set: valid tracer set. | :param tracer_set: valid tracer set. | ||||
@@ -79,7 +83,8 @@ class Methods: | |||||
return True | return True | ||||
def check_remote_tracer(self, key): | def check_remote_tracer(self, key): | ||||
"""Get tracer dict for send/recv op. | |||||
""" | |||||
Get tracer dict for send/recv op. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
""" | """ | ||||
@@ -91,7 +96,8 @@ class Methods: | |||||
return ret | return ret | ||||
def group_barrier(self, key, size): | def group_barrier(self, key, size): | ||||
"""A barrier wait for all group member. | |||||
""" | |||||
A barrier wait for all group member. | |||||
:param key: group key to match each other. | :param key: group key to match each other. | ||||
:param size: group size. | :param size: group size. | ||||
@@ -114,7 +120,8 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): | |||||
def start_server(py_server_port, mm_server_port): | def start_server(py_server_port, mm_server_port): | ||||
"""Start python distributed server and multiple machine server. | |||||
""" | |||||
Start python distributed server and multiple machine server. | |||||
:param py_server_port: python server port. | :param py_server_port: python server port. | ||||
:param mm_server_port: multiple machine server port. | :param mm_server_port: multiple machine server port. | ||||
@@ -125,7 +132,8 @@ def start_server(py_server_port, mm_server_port): | |||||
class Server: | class Server: | ||||
"""Distributed Server for distributed training. | |||||
""" | |||||
Distributed Server for distributed training. | |||||
Should be running at master node. | Should be running at master node. | ||||
:param port: python server port. | :param port: python server port. | ||||
@@ -143,7 +151,8 @@ class Server: | |||||
class Client: | class Client: | ||||
"""Distributed Client for distributed training. | |||||
""" | |||||
Distributed Client for distributed training. | |||||
:param master_ip: ip address of master node. | :param master_ip: ip address of master node. | ||||
:param port: port of server at master node. | :param port: port of server at master node. | ||||
@@ -171,7 +180,8 @@ class Client: | |||||
return self.proxy.get_mm_server_port() | return self.proxy.get_mm_server_port() | ||||
def set_is_grad(self, key, is_grad): | def set_is_grad(self, key, is_grad): | ||||
"""Mark send/recv need gradiants by key. | |||||
""" | |||||
Mark send/recv need gradiants by key. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
:param is_grad: whether this op need grad. | :param is_grad: whether this op need grad. | ||||
@@ -179,14 +189,16 @@ class Client: | |||||
self.proxy.set_is_grad(key, is_grad) | self.proxy.set_is_grad(key, is_grad) | ||||
def check_is_grad(self, key): | def check_is_grad(self, key): | ||||
"""Check whether send/recv need gradiants. | |||||
""" | |||||
Check whether send/recv need gradiants. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
""" | """ | ||||
return self.proxy.check_is_grad(key) | return self.proxy.check_is_grad(key) | ||||
def set_remote_tracer(self, key, tracer_set): | def set_remote_tracer(self, key, tracer_set): | ||||
"""Set tracer dict for tracing send/recv op. | |||||
""" | |||||
Set tracer dict for tracing send/recv op. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
:param tracer_set: valid tracer set. | :param tracer_set: valid tracer set. | ||||
@@ -194,14 +206,16 @@ class Client: | |||||
self.proxy.set_remote_tracer(key, tracer_set) | self.proxy.set_remote_tracer(key, tracer_set) | ||||
def check_remote_tracer(self, key): | def check_remote_tracer(self, key): | ||||
"""Get tracer dict for send/recv op. | |||||
""" | |||||
Get tracer dict for send/recv op. | |||||
:param key: key to match send/recv op. | :param key: key to match send/recv op. | ||||
""" | """ | ||||
return self.proxy.check_remote_tracer(key) | return self.proxy.check_remote_tracer(key) | ||||
def group_barrier(self, key, size): | def group_barrier(self, key, size): | ||||
"""A barrier wait for all group member. | |||||
""" | |||||
A barrier wait for all group member. | |||||
:param key: group key to match each other. | :param key: group key to match each other. | ||||
:param size: group size. | :param size: group size. | ||||
@@ -12,7 +12,8 @@ from typing import List | |||||
def get_free_ports(num: int) -> List[int]: | def get_free_ports(num: int) -> List[int]: | ||||
"""Get one or more free ports. | |||||
""" | |||||
Get one or more free ports. | |||||
""" | """ | ||||
socks, ports = [], [] | socks, ports = [], [] | ||||
for i in range(num): | for i in range(num): | ||||
@@ -12,7 +12,8 @@ _conv_execution_strategy = os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY", "HEURI | |||||
def get_conv_execution_strategy() -> str: | def get_conv_execution_strategy() -> str: | ||||
"""Returns the execuation strategy of :class:`~.Conv2d`. | |||||
""" | |||||
Returns the execuation strategy of :class:`~.Conv2d`. | |||||
See :func:`~.set_conv_execution_strategy` for possible return values | See :func:`~.set_conv_execution_strategy` for possible return values | ||||
""" | """ | ||||
@@ -20,7 +21,8 @@ def get_conv_execution_strategy() -> str: | |||||
def set_conv_execution_strategy(option: str): | def set_conv_execution_strategy(option: str): | ||||
"""Sets the execuation strategy of :class:`~.Conv2d`. | |||||
""" | |||||
Sets the execuation strategy of :class:`~.Conv2d`. | |||||
:param option: Decides how :class:`~.Conv2d` algorithm is chosen. | :param option: Decides how :class:`~.Conv2d` algorithm is chosen. | ||||
Available values: | Available values: | ||||
@@ -100,7 +100,8 @@ def _elemwise_multi_type(*args, mode, **kwargs): | |||||
def add(x, y): | def add(x, y): | ||||
"""Element-wise `addition`. | |||||
""" | |||||
Element-wise `addition`. | |||||
At least one operand should be tensor. | At least one operand should be tensor. | ||||
Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | ||||
@@ -193,7 +194,8 @@ def log1p(x): | |||||
def sqrt(x: Tensor) -> Tensor: | def sqrt(x: Tensor) -> Tensor: | ||||
"""Element-wise `sqrt`. | |||||
""" | |||||
Element-wise `sqrt`. | |||||
Returns ``NaN`` for negative input value. | Returns ``NaN`` for negative input value. | ||||
:param x: input tensor. | :param x: input tensor. | ||||
@@ -209,7 +211,7 @@ def sqrt(x: Tensor) -> Tensor: | |||||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | ||||
out = F.sqrt(x) | out = F.sqrt(x) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -239,7 +241,7 @@ def square(x: Tensor) -> Tensor: | |||||
data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | ||||
out = F.square(data) | out = F.square(data) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -281,7 +283,8 @@ def minimum(x, y): | |||||
def cos(x): | def cos(x): | ||||
"""Element-wise `cosine`. | |||||
""" | |||||
Element-wise `cosine`. | |||||
:param x: input tensor. | :param x: input tensor. | ||||
:return: computed tensor. | :return: computed tensor. | ||||
@@ -296,7 +299,7 @@ def cos(x): | |||||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | ||||
out = F.cos(x) | out = F.cos(x) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -374,7 +377,8 @@ def atanh(x): | |||||
def left_shift(x, y): | def left_shift(x, y): | ||||
"""Element-wise `bitwise binary: x << y`. | |||||
""" | |||||
Element-wise `bitwise binary: x << y`. | |||||
:param x: input tensor, should be int. | :param x: input tensor, should be int. | ||||
:param y: how many bits to be left-shifted. | :param y: how many bits to be left-shifted. | ||||
@@ -435,7 +439,8 @@ def logical_xor(x, y): | |||||
def equal(x, y): | def equal(x, y): | ||||
"""Element-wise `(x == y)`. | |||||
""" | |||||
Element-wise `(x == y)`. | |||||
:param x: input tensor 1. | :param x: input tensor 1. | ||||
:param y: input tensor 2. | :param y: input tensor 2. | ||||
@@ -494,7 +499,8 @@ def greater_equal(x, y): | |||||
def hswish(x): | def hswish(x): | ||||
"""Element-wise `x * relu6(x + 3) / 6`. | |||||
""" | |||||
Element-wise `x * relu6(x + 3) / 6`. | |||||
:param x: input tensor. | :param x: input tensor. | ||||
:return: computed tensor. | :return: computed tensor. | ||||
@@ -509,7 +515,7 @@ def hswish(x): | |||||
x = tensor(np.arange(5).astype(np.float32)) | x = tensor(np.arange(5).astype(np.float32)) | ||||
out = F.hswish(x) | out = F.hswish(x) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
.. testoutput:: | .. testoutput:: | ||||
@@ -540,7 +546,8 @@ def sigmoid(x): | |||||
def clip(x: Tensor, lower=None, upper=None) -> Tensor: | def clip(x: Tensor, lower=None, upper=None) -> Tensor: | ||||
r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||||
r""" | |||||
Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | |||||
a resulting tensor: | a resulting tensor: | ||||
.. math:: | .. math:: | ||||
@@ -24,7 +24,8 @@ __all__ = [ | |||||
def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | ||||
r"""Calculates the mean absolute error (MAE) between | |||||
r""" | |||||
Calculates the mean absolute error (MAE) between | |||||
each element in the pred :math:`x` and label :math:`y`. | each element in the pred :math:`x` and label :math:`y`. | ||||
The mean absolute error can be described as: | The mean absolute error can be described as: | ||||
@@ -70,7 +71,8 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | |||||
def square_loss(pred: Tensor, label: Tensor) -> Tensor: | def square_loss(pred: Tensor, label: Tensor) -> Tensor: | ||||
r"""Calculates the mean squared error (squared L2 norm) between | |||||
r""" | |||||
Calculates the mean squared error (squared L2 norm) between | |||||
each element in the pred :math:`x` and label :math:`y`. | each element in the pred :math:`x` and label :math:`y`. | ||||
The mean squared error can be described as: | The mean squared error can be described as: | ||||
@@ -127,7 +129,8 @@ def cross_entropy( | |||||
with_logits: bool = True, | with_logits: bool = True, | ||||
label_smooth: float = 0, | label_smooth: float = 0, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Computes the multi-class cross entropy loss (using logits by default). | |||||
r""" | |||||
Computes the multi-class cross entropy loss (using logits by default). | |||||
By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | ||||
class probabilities are given by softmax. | class probabilities are given by softmax. | ||||
@@ -161,7 +164,7 @@ def cross_entropy( | |||||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | ||||
label = tensor(np.ones(label_shape, dtype=np.int32)) | label = tensor(np.ones(label_shape, dtype=np.int32)) | ||||
loss = F.nn.cross_entropy(pred, label) | loss = F.nn.cross_entropy(pred, label) | ||||
print(loss.numpy()) | |||||
print(loss.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -195,7 +198,8 @@ def cross_entropy( | |||||
def binary_cross_entropy( | def binary_cross_entropy( | ||||
pred: Tensor, label: Tensor, with_logits: bool = True | pred: Tensor, label: Tensor, with_logits: bool = True | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Computes the binary cross entropy loss (using logits by default). | |||||
r""" | |||||
Computes the binary cross entropy loss (using logits by default). | |||||
By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | By default(``with_logitis`` is True), ``pred`` is assumed to be logits, | ||||
class probabilities are given by sigmoid. | class probabilities are given by sigmoid. | ||||
@@ -216,7 +220,7 @@ def binary_cross_entropy( | |||||
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | ||||
label = tensor(np.ones((1, 2), dtype=np.float32)) | label = tensor(np.ones((1, 2), dtype=np.float32)) | ||||
loss = F.nn.binary_cross_entropy(pred, label) | loss = F.nn.binary_cross_entropy(pred, label) | ||||
print(loss.numpy()) | |||||
print(loss.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -233,7 +237,8 @@ def binary_cross_entropy( | |||||
def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | ||||
r"""Caculates the hinge loss which is often used in SVM. | |||||
r""" | |||||
Caculates the hinge loss which is often used in SVM. | |||||
The hinge loss can be described as: | The hinge loss can be described as: | ||||
@@ -43,7 +43,8 @@ __all__ = [ | |||||
def isnan(inp: Tensor) -> Tensor: | def isnan(inp: Tensor) -> Tensor: | ||||
r"""Returns a new tensor representing if each element is ``NaN`` or not. | |||||
r""" | |||||
Returns a new tensor representing if each element is ``NaN`` or not. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:return: result tensor. | :return: result tensor. | ||||
@@ -69,7 +70,8 @@ def isnan(inp: Tensor) -> Tensor: | |||||
def isinf(inp: Tensor) -> Tensor: | def isinf(inp: Tensor) -> Tensor: | ||||
r"""Returns a new tensor representing if each element is ``Inf`` or not. | |||||
r""" | |||||
Returns a new tensor representing if each element is ``Inf`` or not. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:return: result tensor. | :return: result tensor. | ||||
@@ -95,7 +97,8 @@ def isinf(inp: Tensor) -> Tensor: | |||||
def sign(inp: Tensor): | def sign(inp: Tensor): | ||||
r"""Returns a new tensor representing the sign of each element in input tensor. | |||||
r""" | |||||
Returns a new tensor representing the sign of each element in input tensor. | |||||
:param: input tensor. | :param: input tensor. | ||||
:return: the sign of input tensor. | :return: the sign of input tensor. | ||||
@@ -125,7 +128,8 @@ def sum( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns the sum of input tensor along given axis. If axis is a list of dimensions, | |||||
r""" | |||||
Returns the sum of input tensor along given axis. If axis is a list of dimensions, | |||||
reduce over all of them. | reduce over all of them. | ||||
:param inp: input tensor. | :param inp: input tensor. | ||||
@@ -160,7 +164,8 @@ def sum( | |||||
def prod( | def prod( | ||||
inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None, keepdims=False | inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None, keepdims=False | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns the product of input tensor along given axis. If axis is a list of dimensions, | |||||
r""" | |||||
Returns the product of input tensor along given axis. If axis is a list of dimensions, | |||||
reduce over all of them. | reduce over all of them. | ||||
:param inp: input tensor. | :param inp: input tensor. | ||||
@@ -195,7 +200,8 @@ def mean( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Returns the mean value of input tensor along | |||||
""" | |||||
Returns the mean value of input tensor along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -231,7 +237,8 @@ def var( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Returns the variance value of input tensor along | |||||
""" | |||||
Returns the variance value of input tensor along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -250,7 +257,7 @@ def var( | |||||
data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | ||||
out = F.var(data) | out = F.var(data) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -271,7 +278,8 @@ def std( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Returns the standard deviation of input tensor along | |||||
""" | |||||
Returns the standard deviation of input tensor along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -290,7 +298,7 @@ def std( | |||||
data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) | ||||
out = F.std(data, axis=1) | out = F.std(data, axis=1) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -306,7 +314,8 @@ def min( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns the min value of input tensor along | |||||
r""" | |||||
Returns the min value of input tensor along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -342,7 +351,8 @@ def max( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns the max value of the input tensor along | |||||
r""" | |||||
Returns the max value of the input tensor along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -376,7 +386,8 @@ def max( | |||||
def norm( | def norm( | ||||
inp: Tensor, ord: float = None, axis: int = None, keepdims=False, | inp: Tensor, ord: float = None, axis: int = None, keepdims=False, | ||||
): | ): | ||||
"""Calculates ``p``-norm of input tensor along | |||||
""" | |||||
Calculates ``p``-norm of input tensor along | |||||
given axis. | given axis. | ||||
:param inp: input tensor. | :param inp: input tensor. | ||||
@@ -395,7 +406,7 @@ def norm( | |||||
x = tensor(np.arange(-3, 3, dtype=np.float32)) | x = tensor(np.arange(-3, 3, dtype=np.float32)) | ||||
out = F.norm(x) | out = F.norm(x) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -423,7 +434,8 @@ def argmin( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns the indices of the minimum values along | |||||
r""" | |||||
Returns the indices of the minimum values along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -481,7 +493,8 @@ def argmax( | |||||
axis: Optional[Union[int, Sequence[int]]] = None, | axis: Optional[Union[int, Sequence[int]]] = None, | ||||
keepdims: bool = False, | keepdims: bool = False, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns the indices of the maximum values along | |||||
r""" | |||||
Returns the indices of the maximum values along | |||||
given axis. If axis is a list of dimensions, | given axis. If axis is a list of dimensions, | ||||
reduce over all of them. | reduce over all of them. | ||||
@@ -537,7 +550,8 @@ def argmax( | |||||
def normalize( | def normalize( | ||||
inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12, | inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Performs :math:`L_p` normalization of input tensor along | |||||
r""" | |||||
Performs :math:`L_p` normalization of input tensor along | |||||
given axis. | given axis. | ||||
For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each | For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each | ||||
@@ -559,7 +573,8 @@ def normalize( | |||||
def argsort(inp: Tensor, descending: bool = False) -> Tensor: | def argsort(inp: Tensor, descending: bool = False) -> Tensor: | ||||
r"""Returns the indices that would sort the input tensor. | |||||
r""" | |||||
Returns the indices that would sort the input tensor. | |||||
:param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor. | :param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor. | ||||
:param descending: sort in descending order, where the largest comes first. Default: False | :param descending: sort in descending order, where the largest comes first. Default: False | ||||
@@ -600,7 +615,8 @@ def argsort(inp: Tensor, descending: bool = False) -> Tensor: | |||||
def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: | def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: | ||||
r"""Returns sorted tensor and the indices would sort the input tensor. | |||||
r""" | |||||
Returns sorted tensor and the indices would sort the input tensor. | |||||
:param inp: input tensor. If it's 2d, the result would be sorted by row. | :param inp: input tensor. If it's 2d, the result would be sorted by row. | ||||
:param descending: sort in descending order, where the largest comes first. Default: False | :param descending: sort in descending order, where the largest comes first. Default: False | ||||
@@ -647,7 +663,8 @@ def topk( | |||||
kth_only: bool = False, | kth_only: bool = False, | ||||
no_sort: bool = False, | no_sort: bool = False, | ||||
) -> Tuple[Tensor, Tensor]: | ) -> Tuple[Tensor, Tensor]: | ||||
r"""Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||||
r""" | |||||
Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||||
:param inp: input tensor. If input tensor is 2d, each row will be sorted. | :param inp: input tensor. If input tensor is 2d, each row will be sorted. | ||||
:param k: number of elements needed. | :param k: number of elements needed. | ||||
@@ -75,7 +75,8 @@ def expand_hw(x): | |||||
def linear(inp: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor: | def linear(inp: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor: | ||||
"""Applies a linear transformation to the input tensor. | |||||
""" | |||||
Applies a linear transformation to the input tensor. | |||||
Refer to :class:`~.module.linear.Linear` for more information. | Refer to :class:`~.module.linear.Linear` for more information. | ||||
@@ -101,7 +102,8 @@ def conv2d( | |||||
conv_mode="CROSS_CORRELATION", | conv_mode="CROSS_CORRELATION", | ||||
compute_mode="DEFAULT", | compute_mode="DEFAULT", | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""2D convolution operation. | |||||
""" | |||||
2D convolution operation. | |||||
Refer to :class:`~.Conv2d` for more information. | Refer to :class:`~.Conv2d` for more information. | ||||
@@ -166,7 +168,8 @@ def conv_transpose2d( | |||||
conv_mode="CROSS_CORRELATION", | conv_mode="CROSS_CORRELATION", | ||||
compute_mode="DEFAULT", | compute_mode="DEFAULT", | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""2D transposed convolution operation. | |||||
""" | |||||
2D transposed convolution operation. | |||||
Refer to :class:`~.ConvTranspose2d` for more information. | Refer to :class:`~.ConvTranspose2d` for more information. | ||||
@@ -227,7 +230,8 @@ def local_conv2d( | |||||
dilation: Union[int, Tuple[int, int]] = 1, | dilation: Union[int, Tuple[int, int]] = 1, | ||||
conv_mode="CROSS_CORRELATION", | conv_mode="CROSS_CORRELATION", | ||||
): | ): | ||||
"""Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||||
""" | |||||
Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||||
""" | """ | ||||
assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" | assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" | ||||
@@ -261,7 +265,8 @@ def max_pool2d( | |||||
stride: Optional[Union[int, Tuple[int, int]]] = None, | stride: Optional[Union[int, Tuple[int, int]]] = None, | ||||
padding: Union[int, Tuple[int, int]] = 0, | padding: Union[int, Tuple[int, int]] = 0, | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Applies a 2D max pooling over an input tensor. | |||||
""" | |||||
Applies a 2D max pooling over an input tensor. | |||||
Refer to :class:`~.MaxPool2d` for more information. | Refer to :class:`~.MaxPool2d` for more information. | ||||
@@ -298,7 +303,8 @@ def avg_pool2d( | |||||
padding: Union[int, Tuple[int, int]] = 0, | padding: Union[int, Tuple[int, int]] = 0, | ||||
mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", | mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Applies 2D average pooling over an input tensor. | |||||
""" | |||||
Applies 2D average pooling over an input tensor. | |||||
Refer to :class:`~.AvgPool2d` for more information. | Refer to :class:`~.AvgPool2d` for more information. | ||||
@@ -332,7 +338,8 @@ def avg_pool2d( | |||||
def adaptive_max_pool2d( | def adaptive_max_pool2d( | ||||
inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Applies a 2D max adaptive pooling over an input. | |||||
""" | |||||
Applies a 2D max adaptive pooling over an input. | |||||
Refer to :class:`~.MaxAdaptivePool2d` for more information. | Refer to :class:`~.MaxAdaptivePool2d` for more information. | ||||
@@ -353,7 +360,8 @@ def adaptive_max_pool2d( | |||||
def adaptive_avg_pool2d( | def adaptive_avg_pool2d( | ||||
inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Applies a 2D average adaptive pooling over an input. | |||||
""" | |||||
Applies a 2D average adaptive pooling over an input. | |||||
Refer to :class:`~.AvgAdaptivePool2d` for more information. | Refer to :class:`~.AvgAdaptivePool2d` for more information. | ||||
@@ -390,7 +398,8 @@ def leaky_relu(inp: Tensor, negative_slope: float = 0.01) -> Tensor: | |||||
def softplus(inp: Tensor) -> Tensor: | def softplus(inp: Tensor) -> Tensor: | ||||
r"""Applies the element-wise function: | |||||
r""" | |||||
Applies the element-wise function: | |||||
.. math:: | .. math:: | ||||
\text{softplus}(x) = \log(1 + \exp(x)) | \text{softplus}(x) = \log(1 + \exp(x)) | ||||
@@ -416,7 +425,7 @@ def softplus(inp: Tensor) -> Tensor: | |||||
x = tensor(np.arange(-3, 3, dtype=np.float32)) | x = tensor(np.arange(-3, 3, dtype=np.float32)) | ||||
y = F.softplus(x) | y = F.softplus(x) | ||||
print(y.numpy()) | |||||
print(y.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -429,7 +438,8 @@ def softplus(inp: Tensor) -> Tensor: | |||||
def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | ||||
r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | |||||
r""" | |||||
Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | |||||
input Tensor. The LogSoftmax formulation can be simplified as: | input Tensor. The LogSoftmax formulation can be simplified as: | ||||
.. math:: | .. math:: | ||||
@@ -456,7 +466,7 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | ||||
y = F.logsoftmax(x, axis=1) | y = F.logsoftmax(x, axis=1) | ||||
print(y.numpy()) | |||||
print(y.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -470,7 +480,8 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
def logsigmoid(inp: Tensor) -> Tensor: | def logsigmoid(inp: Tensor) -> Tensor: | ||||
r"""Applies the element-wise function: | |||||
r""" | |||||
Applies the element-wise function: | |||||
.. math:: | .. math:: | ||||
\text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)}) | \text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)}) | ||||
@@ -490,13 +501,13 @@ def logsigmoid(inp: Tensor) -> Tensor: | |||||
x = tensor(np.arange(-5, 5, dtype=np.float32)) | x = tensor(np.arange(-5, 5, dtype=np.float32)) | ||||
y = F.logsigmoid(x) | y = F.logsigmoid(x) | ||||
print(y.numpy()) | |||||
print(y.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
.. testoutput:: | .. testoutput:: | ||||
[-5.0067 -4.0181 -3.0486 -2.1269 -1.3133 -0.6931 -0.3133 -0.1269 -0.0486 | |||||
[-5.0067 -4.0182 -3.0486 -2.1269 -1.3133 -0.6931 -0.3133 -0.1269 -0.0486 | |||||
-0.0181] | -0.0181] | ||||
""" | """ | ||||
@@ -539,7 +550,7 @@ def logsumexp( | |||||
x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | ||||
y = F.logsumexp(x, axis=1, keepdims=False) | y = F.logsumexp(x, axis=1, keepdims=False) | ||||
print(y.numpy()) | |||||
print(y.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -589,7 +600,7 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor: | |||||
x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | ||||
out = F.softmax(x) | out = F.softmax(x) | ||||
print(out.numpy()) | |||||
print(out.numpy().round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -619,7 +630,8 @@ def batch_norm( | |||||
eps: float = 1e-5, | eps: float = 1e-5, | ||||
inplace: bool = True | inplace: bool = True | ||||
): | ): | ||||
r"""Applies batch normalization to the input. | |||||
r""" | |||||
Applies batch normalization to the input. | |||||
Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | ||||
@@ -734,7 +746,8 @@ def sync_batch_norm( | |||||
eps_mode="ADDITIVE", | eps_mode="ADDITIVE", | ||||
group=WORLD, | group=WORLD, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Applies synchronized batch normalization to the input. | |||||
r""" | |||||
Applies synchronized batch normalization to the input. | |||||
Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | ||||
@@ -835,7 +848,8 @@ def sync_batch_norm( | |||||
def one_hot(inp: Tensor, num_classes: int) -> Tensor: | def one_hot(inp: Tensor, num_classes: int) -> Tensor: | ||||
r"""Performs one-hot encoding for the input tensor. | |||||
r""" | |||||
Performs one-hot encoding for the input tensor. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param num_classes: number of classes denotes the last dimension of the output tensor. | :param num_classes: number of classes denotes the last dimension of the output tensor. | ||||
@@ -878,7 +892,8 @@ def warp_perspective( | |||||
border_val: float = 0.0, | border_val: float = 0.0, | ||||
interp_mode: str = "LINEAR", | interp_mode: str = "LINEAR", | ||||
): | ): | ||||
r"""Applies perspective transformation to batched 2D images. | |||||
r""" | |||||
Applies perspective transformation to batched 2D images. | |||||
The input images are transformed to the output images by the transformation matrix: | The input images are transformed to the output images by the transformation matrix: | ||||
@@ -1094,13 +1109,13 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | |||||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2,3)) | x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2,3)) | ||||
_, y, _ = F.svd(x) | _, y, _ = F.svd(x) | ||||
print(y.numpy()) | |||||
print(y.numpy().round(decimals=3)) | |||||
Outputs: | Outputs: | ||||
.. testoutput:: | .. testoutput:: | ||||
[7.3485 1. ] | |||||
[7.348 1. ] | |||||
""" | """ | ||||
op = builtin.SVD(full_matrices=full_matrices, compute_uv=compute_uv) | op = builtin.SVD(full_matrices=full_matrices, compute_uv=compute_uv) | ||||
@@ -1115,7 +1130,8 @@ def interpolate( | |||||
mode: str = "BILINEAR", | mode: str = "BILINEAR", | ||||
align_corners: bool = None, | align_corners: bool = None, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||||
r""" | |||||
Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param size: size of the output tensor. Default: None | :param size: size of the output tensor. Default: None | ||||
@@ -1257,7 +1273,8 @@ def interpolate( | |||||
def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | ||||
"""Returns a new tensor where each of the elements are randomly set to zero | |||||
""" | |||||
Returns a new tensor where each of the elements are randomly set to zero | |||||
with probability P = ``drop_prob``. Optionally rescale the output tensor if ``training`` is True. | with probability P = ``drop_prob``. Optionally rescale the output tensor if ``training`` is True. | ||||
:param inp: input tensor. | :param inp: input tensor. | ||||
@@ -1302,7 +1319,8 @@ def embedding( | |||||
max_norm: Optional[float] = None, | max_norm: Optional[float] = None, | ||||
norm_type: Optional[float] = None, | norm_type: Optional[float] = None, | ||||
): | ): | ||||
"""Applies lookup table for embedding. | |||||
""" | |||||
Applies lookup table for embedding. | |||||
:param inp: tensor with indices. | :param inp: tensor with indices. | ||||
:param weight: learnable weights which embeds from. | :param weight: learnable weights which embeds from. | ||||
@@ -1329,7 +1347,8 @@ def roi_pooling( | |||||
mode: str = "max", | mode: str = "max", | ||||
scale: float = 1.0, | scale: float = 1.0, | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Applies roi pooling on input feature. | |||||
""" | |||||
Applies roi pooling on input feature. | |||||
:param inp: tensor that represents the input feature, `(N, C, H, W)` images. | :param inp: tensor that represents the input feature, `(N, C, H, W)` images. | ||||
:param rois: `(K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | :param rois: `(K, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | ||||
@@ -1350,7 +1369,7 @@ def roi_pooling( | |||||
inp = tensor(np.random.randn(1, 1, 128, 128)) | inp = tensor(np.random.randn(1, 1, 128, 128)) | ||||
rois = tensor(np.random.random((4, 5))) | rois = tensor(np.random.random((4, 5))) | ||||
y = F.nn.roi_pooling(inp, rois, (2, 2)) | y = F.nn.roi_pooling(inp, rois, (2, 2)) | ||||
print(y.numpy()[0]) | |||||
print(y.numpy()[0].round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -1382,7 +1401,8 @@ def roi_align( | |||||
sample_points: Union[int, tuple, list] = 2, | sample_points: Union[int, tuple, list] = 2, | ||||
aligned: bool = True, | aligned: bool = True, | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Applies roi align on input feature. | |||||
""" | |||||
Applies roi align on input feature. | |||||
:param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | :param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | ||||
:param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | :param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | ||||
@@ -1407,7 +1427,7 @@ def roi_align( | |||||
inp = tensor(np.random.randn(1, 1, 128, 128)) | inp = tensor(np.random.randn(1, 1, 128, 128)) | ||||
rois = tensor(np.random.random((4, 5))) | rois = tensor(np.random.random((4, 5))) | ||||
y = F.nn.roi_align(inp, rois, (2, 2)) | y = F.nn.roi_align(inp, rois, (2, 2)) | ||||
print(y.numpy()[0]) | |||||
print(y.numpy()[0].round(decimals=4)) | |||||
Outputs: | Outputs: | ||||
@@ -1444,7 +1464,8 @@ def roi_align( | |||||
def indexing_one_hot( | def indexing_one_hot( | ||||
src: Tensor, index: Tensor, axis: int = 1, keepdims=False | src: Tensor, index: Tensor, axis: int = 1, keepdims=False | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""One-hot indexing for some axes. | |||||
r""" | |||||
One-hot indexing for some axes. | |||||
:param src: input tensor. | :param src: input tensor. | ||||
:param index: index tensor. | :param index: index tensor. | ||||
@@ -28,7 +28,8 @@ def conv_bias_activation( | |||||
conv_mode="CROSS_CORRELATION", | conv_mode="CROSS_CORRELATION", | ||||
compute_mode="DEFAULT", | compute_mode="DEFAULT", | ||||
) -> Tensor: | ) -> Tensor: | ||||
"""Convolution bias with activation operation, only for inference. | |||||
""" | |||||
Convolution bias with activation operation, only for inference. | |||||
:param inp: feature map of the convolution operation. | :param inp: feature map of the convolution operation. | ||||
:param weight: convolution kernel. | :param weight: convolution kernel. | ||||
@@ -58,7 +58,8 @@ __all__ = [ | |||||
def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | ||||
"""Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | |||||
""" | |||||
Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | |||||
:param shape: expected shape of output tensor. | :param shape: expected shape of output tensor. | ||||
:param dtype: data type. Default: None | :param dtype: data type. Default: None | ||||
@@ -100,7 +101,8 @@ def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Ten | |||||
def full(shape, value, dtype="float32", device=None): | def full(shape, value, dtype="float32", device=None): | ||||
"""Returns a tensor with given shape and value. | |||||
""" | |||||
Returns a tensor with given shape and value. | |||||
""" | """ | ||||
if isinstance(shape, int): | if isinstance(shape, int): | ||||
shape = (shape,) | shape = (shape,) | ||||
@@ -113,7 +115,8 @@ def full(shape, value, dtype="float32", device=None): | |||||
def ones(shape, dtype="float32", device=None): | def ones(shape, dtype="float32", device=None): | ||||
"""Returns a ones tensor with given shape. | |||||
""" | |||||
Returns a ones tensor with given shape. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:return: output zero tensor. | :return: output zero tensor. | ||||
@@ -139,13 +142,15 @@ def ones(shape, dtype="float32", device=None): | |||||
def zeros(shape, dtype="float32", device=None): | def zeros(shape, dtype="float32", device=None): | ||||
"""Returns a zero tensor with given shape. | |||||
""" | |||||
Returns a zero tensor with given shape. | |||||
""" | """ | ||||
return full(shape, 0.0, dtype=dtype, device=device) | return full(shape, 0.0, dtype=dtype, device=device) | ||||
def zeros_like(inp: Tensor) -> Tensor: | def zeros_like(inp: Tensor) -> Tensor: | ||||
"""Returns a zero tensor with the same shape as input tensor. | |||||
""" | |||||
Returns a zero tensor with the same shape as input tensor. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:return: output zero tensor. | :return: output zero tensor. | ||||
@@ -174,13 +179,15 @@ def zeros_like(inp: Tensor) -> Tensor: | |||||
def ones_like(inp: Tensor) -> Tensor: | def ones_like(inp: Tensor) -> Tensor: | ||||
"""Returns a ones tensor with the same shape as input tensor. | |||||
""" | |||||
Returns a ones tensor with the same shape as input tensor. | |||||
""" | """ | ||||
return ones(inp.shape, dtype=inp.dtype, device=inp.device) | return ones(inp.shape, dtype=inp.dtype, device=inp.device) | ||||
def full_like(inp: Tensor, value: Union[int, float]) -> Tensor: | def full_like(inp: Tensor, value: Union[int, float]) -> Tensor: | ||||
"""Returns a tensor filled with given value with the same shape as input tensor. | |||||
""" | |||||
Returns a tensor filled with given value with the same shape as input tensor. | |||||
""" | """ | ||||
return full(inp.shape, value, dtype=inp.dtype, device=inp.device) | return full(inp.shape, value, dtype=inp.dtype, device=inp.device) | ||||
@@ -274,7 +281,8 @@ def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: | |||||
def stack(inps, axis=0, device=None): | def stack(inps, axis=0, device=None): | ||||
"""Concats a sequence of tensors along a new axis. | |||||
""" | |||||
Concats a sequence of tensors along a new axis. | |||||
The input tensors must have the same shape. | The input tensors must have the same shape. | ||||
:param inps: input tensors. | :param inps: input tensors. | ||||
@@ -316,7 +324,8 @@ def stack(inps, axis=0, device=None): | |||||
def split(inp, nsplits_or_sections, axis=0): | def split(inp, nsplits_or_sections, axis=0): | ||||
"""Splits the input tensor into several smaller tensors. | |||||
""" | |||||
Splits the input tensor into several smaller tensors. | |||||
When nsplits_or_sections is int, the last tensor may be smaller than others. | When nsplits_or_sections is int, the last tensor may be smaller than others. | ||||
:param inp: input tensor. | :param inp: input tensor. | ||||
@@ -334,7 +343,7 @@ def split(inp, nsplits_or_sections, axis=0): | |||||
x = tensor(np.random.random((2,3,4,5)), dtype=np.float32) | x = tensor(np.random.random((2,3,4,5)), dtype=np.float32) | ||||
out = F.split(x, 2, axis=3) | out = F.split(x, 2, axis=3) | ||||
print(out[0].shape, out[1].shape) | |||||
print(out[0].numpy().shape, out[1].numpy().shape) | |||||
Outputs: | Outputs: | ||||
@@ -400,7 +409,8 @@ def _get_idx(index, axis): | |||||
def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | ||||
# TODO: rewrite doc | # TODO: rewrite doc | ||||
r"""Gathers data from input tensor on axis using index. | |||||
r""" | |||||
Gathers data from input tensor on axis using index. | |||||
For a 3-D tensor, the output is specified by:: | For a 3-D tensor, the output is specified by:: | ||||
@@ -472,7 +482,8 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||||
def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | ||||
# TODO: rewrite doc | # TODO: rewrite doc | ||||
r"""Writes all values from the tensor source into input tensor | |||||
r""" | |||||
Writes all values from the tensor source into input tensor | |||||
at the indices specified in the index tensor. | at the indices specified in the index tensor. | ||||
For each value in source, its output index is specified by its index | For each value in source, its output index is specified by its index | ||||
@@ -577,7 +588,8 @@ def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | |||||
def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: | def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: | ||||
r"""Selects elements either from Tensor x or Tensor y, according to mask. | |||||
r""" | |||||
Selects elements either from Tensor x or Tensor y, according to mask. | |||||
.. math:: | .. math:: | ||||
@@ -764,7 +776,8 @@ AxisDesc = AxisAddRemove.AxisDesc | |||||
def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor: | def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor: | ||||
r"""Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. | |||||
r""" | |||||
Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. | |||||
:param inp: input tensor. | :param inp: input tensor. | ||||
:param start_axis: start dimension that the sub-tensor to be flattened. Default: 0 | :param start_axis: start dimension that the sub-tensor to be flattened. Default: 0 | ||||
@@ -819,7 +832,7 @@ def expand_dims(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
x = tensor([1, 2]) | x = tensor([1, 2]) | ||||
out = F.expand_dims(x, 0) | out = F.expand_dims(x, 0) | ||||
print(out.shape) | |||||
print(out.numpy().shape) | |||||
Outputs: | Outputs: | ||||
@@ -865,7 +878,7 @@ def squeeze(inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None) -> Te | |||||
x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) | x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) | ||||
out = F.squeeze(x, 3) | out = F.squeeze(x, 3) | ||||
print(out.shape) | |||||
print(out.numpy().shape) | |||||
Outputs: | Outputs: | ||||
@@ -884,7 +897,8 @@ def linspace( | |||||
dtype="float32", | dtype="float32", | ||||
device: Optional[CompNode] = None, | device: Optional[CompNode] = None, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns equally spaced numbers over a specified interval. | |||||
r""" | |||||
Returns equally spaced numbers over a specified interval. | |||||
:param start: starting value of the squence, shoule be scalar. | :param start: starting value of the squence, shoule be scalar. | ||||
:param stop: last value of the squence, shoule be scalar. | :param stop: last value of the squence, shoule be scalar. | ||||
@@ -928,7 +942,8 @@ def arange( | |||||
dtype="float32", | dtype="float32", | ||||
device: Optional[CompNode] = None, | device: Optional[CompNode] = None, | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Returns a tensor with values from start to stop with adjacent interval step. | |||||
r""" | |||||
Returns a tensor with values from start to stop with adjacent interval step. | |||||
:param start: starting value of the squence, shoule be scalar. | :param start: starting value of the squence, shoule be scalar. | ||||
:param stop: ending value of the squence, shoule be scalar. | :param stop: ending value of the squence, shoule be scalar. | ||||
@@ -11,7 +11,8 @@ import functools | |||||
def get_ndtuple(value, *, n, allow_zero: bool = True): | def get_ndtuple(value, *, n, allow_zero: bool = True): | ||||
r"""Converts possibly 1D tuple to n-dim tuple. | |||||
r""" | |||||
Converts possibly 1D tuple to n-dim tuple. | |||||
:param value: value will be filled in generated tuple. | :param value: value will be filled in generated tuple. | ||||
:param n: how many elements will the tuple have. | :param n: how many elements will the tuple have. | ||||
@@ -43,7 +43,8 @@ PROTOCOLS = { | |||||
def _get_megengine_home() -> str: | def _get_megengine_home() -> str: | ||||
"""MGE_HOME setting complies with the XDG Base Directory Specification | |||||
""" | |||||
MGE_HOME setting complies with the XDG Base Directory Specification | |||||
""" | """ | ||||
megengine_home = os.path.expanduser( | megengine_home = os.path.expanduser( | ||||
os.getenv( | os.getenv( | ||||
@@ -94,7 +95,8 @@ def _init_hub( | |||||
commit: str = None, | commit: str = None, | ||||
protocol: str = DEFAULT_PROTOCOL, | protocol: str = DEFAULT_PROTOCOL, | ||||
): | ): | ||||
"""Imports hubmodule like python import. | |||||
""" | |||||
Imports hubmodule like python import. | |||||
:param repo_info: | :param repo_info: | ||||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
@@ -137,7 +139,8 @@ def list( | |||||
commit: str = None, | commit: str = None, | ||||
protocol: str = DEFAULT_PROTOCOL, | protocol: str = DEFAULT_PROTOCOL, | ||||
) -> List[str]: | ) -> List[str]: | ||||
"""Lists all entrypoints available in repo hubconf. | |||||
""" | |||||
Lists all entrypoints available in repo hubconf. | |||||
:param repo_info: | :param repo_info: | ||||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
@@ -175,7 +178,8 @@ def load( | |||||
protocol: str = DEFAULT_PROTOCOL, | protocol: str = DEFAULT_PROTOCOL, | ||||
**kwargs | **kwargs | ||||
) -> Any: | ) -> Any: | ||||
"""Loads model from github or gitlab repo, with pretrained weights. | |||||
""" | |||||
Loads model from github or gitlab repo, with pretrained weights. | |||||
:param repo_info: | :param repo_info: | ||||
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
@@ -215,7 +219,8 @@ def help( | |||||
commit: str = None, | commit: str = None, | ||||
protocol: str = DEFAULT_PROTOCOL, | protocol: str = DEFAULT_PROTOCOL, | ||||
) -> str: | ) -> str: | ||||
"""This function returns docstring of entrypoint ``entry`` by following steps: | |||||
""" | |||||
This function returns docstring of entrypoint ``entry`` by following steps: | |||||
1. Pull the repo code specified by git and repo_info. | 1. Pull the repo code specified by git and repo_info. | ||||
2. Load the entry defined in repo's hubconf.py | 2. Load the entry defined in repo's hubconf.py | ||||
@@ -250,7 +255,8 @@ def help( | |||||
def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | ||||
"""Loads MegEngine serialized object from the given URL. | |||||
""" | |||||
Loads MegEngine serialized object from the given URL. | |||||
If the object is already present in ``model_dir``, it's deserialized and | If the object is already present in ``model_dir``, it's deserialized and | ||||
returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | ||||
@@ -27,7 +27,8 @@ def load_module(name: str, path: str) -> types.ModuleType: | |||||
def check_module_exists(module: str) -> bool: | def check_module_exists(module: str) -> bool: | ||||
"""Checks whether python module exists or not. | |||||
""" | |||||
Checks whether python module exists or not. | |||||
:param module: name of module. | :param module: name of module. | ||||
""" | """ | ||||
@@ -36,7 +37,8 @@ def check_module_exists(module: str) -> bool: | |||||
@contextmanager | @contextmanager | ||||
def cd(target: str) -> Iterator[None]: | def cd(target: str) -> Iterator[None]: | ||||
"""Changes current directory to target. | |||||
""" | |||||
Changes current directory to target. | |||||
:param target: target directory. | :param target: target directory. | ||||
""" | """ | ||||
@@ -519,7 +519,8 @@ class trace: | |||||
optimize_for_inference=True, | optimize_for_inference=True, | ||||
**kwargs | **kwargs | ||||
): | ): | ||||
r"""Serializes trace to file system. | |||||
r""" | |||||
Serializes trace to file system. | |||||
:param file: output file, could be file object or filename. | :param file: output file, could be file object or filename. | ||||
:param arg_names: names of the input tensors in the traced function. | :param arg_names: names of the input tensors in the traced function. | ||||
@@ -17,7 +17,8 @@ _default_level = logging.getLevelName(_default_level_name.upper()) | |||||
def set_log_file(fout, mode="a"): | def set_log_file(fout, mode="a"): | ||||
r"""Sets log output file. | |||||
r""" | |||||
Sets log output file. | |||||
:type fout: str or file-like | :type fout: str or file-like | ||||
:param fout: file-like object that supports write and flush, or string for | :param fout: file-like object that supports write and flush, or string for | ||||
@@ -38,37 +39,44 @@ class MegEngineLogFormatter(logging.Formatter): | |||||
max_lines = 256 | max_lines = 256 | ||||
def _color_exc(self, msg): | def _color_exc(self, msg): | ||||
r"""Sets the color of message as the execution type. | |||||
r""" | |||||
Sets the color of message as the execution type. | |||||
""" | """ | ||||
return "\x1b[34m{}\x1b[0m".format(msg) | return "\x1b[34m{}\x1b[0m".format(msg) | ||||
def _color_dbg(self, msg): | def _color_dbg(self, msg): | ||||
r"""Sets the color of message as the debugging type. | |||||
r""" | |||||
Sets the color of message as the debugging type. | |||||
""" | """ | ||||
return "\x1b[36m{}\x1b[0m".format(msg) | return "\x1b[36m{}\x1b[0m".format(msg) | ||||
def _color_warn(self, msg): | def _color_warn(self, msg): | ||||
r"""Sets the color of message as the warning type. | |||||
r""" | |||||
Sets the color of message as the warning type. | |||||
""" | """ | ||||
return "\x1b[1;31m{}\x1b[0m".format(msg) | return "\x1b[1;31m{}\x1b[0m".format(msg) | ||||
def _color_err(self, msg): | def _color_err(self, msg): | ||||
r"""Sets the color of message as the error type. | |||||
r""" | |||||
Sets the color of message as the error type. | |||||
""" | """ | ||||
return "\x1b[1;4;31m{}\x1b[0m".format(msg) | return "\x1b[1;4;31m{}\x1b[0m".format(msg) | ||||
def _color_omitted(self, msg): | def _color_omitted(self, msg): | ||||
r"""Sets the color of message as the omitted type. | |||||
r""" | |||||
Sets the color of message as the omitted type. | |||||
""" | """ | ||||
return "\x1b[35m{}\x1b[0m".format(msg) | return "\x1b[35m{}\x1b[0m".format(msg) | ||||
def _color_normal(self, msg): | def _color_normal(self, msg): | ||||
r"""Sets the color of message as the normal type. | |||||
r""" | |||||
Sets the color of message as the normal type. | |||||
""" | """ | ||||
return msg | return msg | ||||
def _color_date(self, msg): | def _color_date(self, msg): | ||||
r"""Sets the color of message the same as date. | |||||
r""" | |||||
Sets the color of message the same as date. | |||||
""" | """ | ||||
return "\x1b[32m{}\x1b[0m".format(msg) | return "\x1b[32m{}\x1b[0m".format(msg) | ||||
@@ -142,7 +150,8 @@ class MegEngineLogFormatter(logging.Formatter): | |||||
def get_logger(name=None, formatter=MegEngineLogFormatter): | def get_logger(name=None, formatter=MegEngineLogFormatter): | ||||
r"""Gets megengine logger with given name. | |||||
r""" | |||||
Gets megengine logger with given name. | |||||
""" | """ | ||||
logger = logging.getLogger(name) | logger = logging.getLogger(name) | ||||
@@ -161,7 +170,8 @@ def get_logger(name=None, formatter=MegEngineLogFormatter): | |||||
def set_log_level(level, update_existing=True): | def set_log_level(level, update_existing=True): | ||||
"""Sets default logging level. | |||||
""" | |||||
Sets default logging level. | |||||
:type level: int e.g. logging.INFO | :type level: int e.g. logging.INFO | ||||
:param level: loggin level given by python :mod:`logging` module | :param level: loggin level given by python :mod:`logging` module | ||||
@@ -198,7 +208,8 @@ try: | |||||
_imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Debug) | _imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Debug) | ||||
def set_mgb_log_level(level): | def set_mgb_log_level(level): | ||||
r"""Sets megbrain log level | |||||
r""" | |||||
Sets megbrain log level | |||||
:type level: int e.g. logging.INFO | :type level: int e.g. logging.INFO | ||||
:param level: new log level | :param level: new log level | ||||
@@ -218,7 +229,8 @@ except ImportError as exc: | |||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def replace_mgb_log_level(level): | def replace_mgb_log_level(level): | ||||
r"""Replaces megbrain log level in a block and restore after exiting. | |||||
r""" | |||||
Replaces megbrain log level in a block and restore after exiting. | |||||
:type level: int e.g. logging.INFO | :type level: int e.g. logging.INFO | ||||
:param level: new log level | :param level: new log level | ||||
@@ -231,7 +243,8 @@ def replace_mgb_log_level(level): | |||||
def enable_debug_log(): | def enable_debug_log(): | ||||
r"""Sets logging level to debug for all components. | |||||
r""" | |||||
Sets logging level to debug for all components. | |||||
""" | """ | ||||
set_log_level(logging.DEBUG) | set_log_level(logging.DEBUG) | ||||
set_mgb_log_level(logging.DEBUG) | set_mgb_log_level(logging.DEBUG) |
@@ -27,7 +27,8 @@ class _AdaptivePoolNd(Module): | |||||
class AdaptiveMaxPool2d(_AdaptivePoolNd): | class AdaptiveMaxPool2d(_AdaptivePoolNd): | ||||
r"""Applies a 2D max adaptive pooling over an input. | |||||
r""" | |||||
Applies a 2D max adaptive pooling over an input. | |||||
For instance, given an input of the size :math:`(N, C, H, W)` and | For instance, given an input of the size :math:`(N, C, H, W)` and | ||||
an output shape :math:`(OH, OW)`, this layer generates the output of | an output shape :math:`(OH, OW)`, this layer generates the output of | ||||
@@ -62,7 +63,7 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||||
.. testoutput:: | .. testoutput:: | ||||
[[[[5. 7.] | |||||
[[[[ 5. 7.] | |||||
[13. 15.]]]] | [13. 15.]]]] | ||||
""" | """ | ||||
@@ -72,7 +73,8 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||||
class AdaptiveAvgPool2d(_AdaptivePoolNd): | class AdaptiveAvgPool2d(_AdaptivePoolNd): | ||||
r"""Applies a 2D average pooling over an input. | |||||
r""" | |||||
Applies a 2D average pooling over an input. | |||||
For instance, given an input of the size :math:`(N, C, H, W)` and | For instance, given an input of the size :math:`(N, C, H, W)` and | ||||
an output shape :math:`(OH, OW)`, this layer generates the output of | an output shape :math:`(OH, OW)`, this layer generates the output of | ||||
@@ -105,7 +107,7 @@ class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||||
.. testoutput:: | .. testoutput:: | ||||
[[[[2.5 4.5] | |||||
[[[[ 2.5 4.5] | |||||
[10.5 12.5]]]] | [10.5 12.5]]]] | ||||
""" | """ | ||||
@@ -87,7 +87,8 @@ class _ConvNd(Module): | |||||
class Conv2d(_ConvNd): | class Conv2d(_ConvNd): | ||||
r"""Applies a 2D convolution over an input tensor. | |||||
r""" | |||||
Applies a 2D convolution over an input tensor. | |||||
For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, | For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, | ||||
this layer generates an output of the size | this layer generates an output of the size | ||||
@@ -145,7 +146,7 @@ class Conv2d(_ConvNd): | |||||
m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3) | ||||
inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4)) | ||||
oup = m(inp) | oup = m(inp) | ||||
print(oup.shape) | |||||
print(oup.numpy().shape) | |||||
Outputs: | Outputs: | ||||
@@ -232,7 +233,8 @@ class Conv2d(_ConvNd): | |||||
class ConvTranspose2d(_ConvNd): | class ConvTranspose2d(_ConvNd): | ||||
r"""Applies a 2D transposed convolution over an input tensor. | |||||
r""" | |||||
Applies a 2D transposed convolution over an input tensor. | |||||
This module is also known as a deconvolution or a fractionally-strided convolution. | This module is also known as a deconvolution or a fractionally-strided convolution. | ||||
:class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | :class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | ||||
@@ -340,7 +342,8 @@ class ConvTranspose2d(_ConvNd): | |||||
class LocalConv2d(Conv2d): | class LocalConv2d(Conv2d): | ||||
r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||||
r""" | |||||
Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||||
It is also known as the locally connected layer. | It is also known as the locally connected layer. | ||||
:param in_channels: number of input channels. | :param in_channels: number of input channels. | ||||
@@ -11,7 +11,8 @@ from .module import Module | |||||
class Dropout(Module): | class Dropout(Module): | ||||
r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||||
r""" | |||||
Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||||
Commonly used in large networks to prevent overfitting. | Commonly used in large networks to prevent overfitting. | ||||
Note that we perform dropout only during training, we also rescale(multiply) the output tensor | Note that we perform dropout only during training, we also rescale(multiply) the output tensor | ||||
by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | ||||
@@ -93,7 +93,7 @@ class Embedding(Module): | |||||
) | ) | ||||
self.reset_parameters() | self.reset_parameters() | ||||
else: | else: | ||||
if initial_weight.shape != (num_embeddings, embedding_dim): | |||||
if initial_weight.numpy().shape != (num_embeddings, embedding_dim): | |||||
raise ValueError( | raise ValueError( | ||||
"The weight shape should match num_embeddings and embedding_dim" | "The weight shape should match num_embeddings and embedding_dim" | ||||
) | ) | ||||
@@ -18,7 +18,8 @@ from ..tensor import Tensor | |||||
def fill_(tensor: Tensor, val: Union[float, int]) -> None: | def fill_(tensor: Tensor, val: Union[float, int]) -> None: | ||||
"""Fills the given ``tensor`` with value ``val``. | |||||
""" | |||||
Fills the given ``tensor`` with value ``val``. | |||||
:param tensor: tensor to be initialized. | :param tensor: tensor to be initialized. | ||||
:param val: value to be filled throughout the tensor. | :param val: value to be filled throughout the tensor. | ||||
@@ -27,7 +28,8 @@ def fill_(tensor: Tensor, val: Union[float, int]) -> None: | |||||
def zeros_(tensor: Tensor) -> None: | def zeros_(tensor: Tensor) -> None: | ||||
"""Fills the given ``tensor`` with scalar value `0`. | |||||
""" | |||||
Fills the given ``tensor`` with scalar value `0`. | |||||
:param tensor: tensor to be initialized. | :param tensor: tensor to be initialized. | ||||
""" | """ | ||||
@@ -35,7 +37,8 @@ def zeros_(tensor: Tensor) -> None: | |||||
def ones_(tensor: Tensor) -> None: | def ones_(tensor: Tensor) -> None: | ||||
"""Fills the given ``tensor`` with the scalar value `1`. | |||||
""" | |||||
Fills the given ``tensor`` with the scalar value `1`. | |||||
:param tensor: tensor to be initialized. | :param tensor: tensor to be initialized. | ||||
""" | """ | ||||
@@ -43,7 +46,8 @@ def ones_(tensor: Tensor) -> None: | |||||
def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | ||||
r"""Fills the given ``tensor`` with random value sampled from uniform distribution | |||||
r""" | |||||
Fills the given ``tensor`` with random value sampled from uniform distribution | |||||
:math:`\mathcal{U}(\text{a}, \text{b})`. | :math:`\mathcal{U}(\text{a}, \text{b})`. | ||||
:param tensor: tensor to be initialized. | :param tensor: tensor to be initialized. | ||||
@@ -54,7 +58,8 @@ def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | |||||
def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | ||||
r"""Fills the given ``tensor`` with random value sampled from normal distribution | |||||
r""" | |||||
Fills the given ``tensor`` with random value sampled from normal distribution | |||||
:math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | ||||
:param tensor: tensor to be initialized. | :param tensor: tensor to be initialized. | ||||
@@ -67,7 +72,8 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||||
def calculate_gain( | def calculate_gain( | ||||
nonlinearity: str, param: Optional[Union[int, float]] = None | nonlinearity: str, param: Optional[Union[int, float]] = None | ||||
) -> float: | ) -> float: | ||||
r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||||
r""" | |||||
Returns a recommended gain value (see the table below) for the given nonlinearity | |||||
function. | function. | ||||
================= ==================================================== | ================= ==================================================== | ||||
@@ -168,7 +174,8 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||||
def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | ||||
r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||||
r""" | |||||
Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||||
where | where | ||||
.. math:: | .. math:: | ||||
@@ -188,7 +195,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||||
def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | ||||
r"""Fills tensor with random values sampled from | |||||
r""" | |||||
Fills tensor with random values sampled from | |||||
:math:`\mathcal{N}(0, \text{std}^2)` where | :math:`\mathcal{N}(0, \text{std}^2)` where | ||||
.. math:: | .. math:: | ||||
@@ -209,7 +217,8 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||||
def msra_uniform_( | def msra_uniform_( | ||||
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | ||||
) -> None: | ) -> None: | ||||
r"""Fills tensor wilth random values sampled from | |||||
r""" | |||||
Fills tensor wilth random values sampled from | |||||
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where | :math:`\mathcal{U}(-\text{bound}, \text{bound})` where | ||||
.. math:: | .. math:: | ||||
@@ -238,7 +247,8 @@ def msra_uniform_( | |||||
def msra_normal_( | def msra_normal_( | ||||
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | ||||
) -> None: | ) -> None: | ||||
r"""Fills tensor wilth random values sampled from | |||||
r""" | |||||
Fills tensor wilth random values sampled from | |||||
:math:`\mathcal{N}(0, \text{std}^2)` where | :math:`\mathcal{N}(0, \text{std}^2)` where | ||||
.. math:: | .. math:: | ||||
@@ -14,7 +14,8 @@ from .module import Module | |||||
class Linear(Module): | class Linear(Module): | ||||
r"""Applies a linear transformation to the input. For instance, if input | |||||
r""" | |||||
Applies a linear transformation to the input. For instance, if input | |||||
is x, then output y is: | is x, then output y is: | ||||
.. math:: | .. math:: | ||||
@@ -39,7 +40,7 @@ class Linear(Module): | |||||
m = M.Linear(in_features=3, out_features=1) | m = M.Linear(in_features=3, out_features=1) | ||||
inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | inp = mge.tensor(np.arange(0, 6).astype("float32").reshape(2, 3)) | ||||
oup = m(inp) | oup = m(inp) | ||||
print(oup.shape) | |||||
print(oup.numpy().shape) | |||||
Outputs: | Outputs: | ||||
@@ -57,7 +57,8 @@ def _is_module(obj): | |||||
class Module(metaclass=ABCMeta): | class Module(metaclass=ABCMeta): | ||||
"""Base Module class. | |||||
""" | |||||
Base Module class. | |||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
@@ -76,7 +77,8 @@ class Module(metaclass=ABCMeta): | |||||
pass | pass | ||||
def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | ||||
"""Registers a hook to handle forward inputs. `hook` should be a function. | |||||
""" | |||||
Registers a hook to handle forward inputs. `hook` should be a function. | |||||
:param hook: a function that receive `module` and `inputs`, then return | :param hook: a function that receive `module` and `inputs`, then return | ||||
a modified `inputs` or `None`. | a modified `inputs` or `None`. | ||||
@@ -85,7 +87,8 @@ class Module(metaclass=ABCMeta): | |||||
return HookHandler(self._forward_pre_hooks, hook) | return HookHandler(self._forward_pre_hooks, hook) | ||||
def register_forward_hook(self, hook: Callable) -> HookHandler: | def register_forward_hook(self, hook: Callable) -> HookHandler: | ||||
"""Registers a hook to handle forward results. `hook` should be a function that | |||||
""" | |||||
Registers a hook to handle forward results. `hook` should be a function that | |||||
receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | ||||
This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | ||||
@@ -118,7 +121,8 @@ class Module(metaclass=ABCMeta): | |||||
predicate: Callable[[Any], bool] = lambda _: True, | predicate: Callable[[Any], bool] = lambda _: True, | ||||
seen: Optional[Set[int]] = None | seen: Optional[Set[int]] = None | ||||
) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: | ) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: | ||||
"""Scans the module object and returns an iterable for the :class:`~.Tensor` | |||||
""" | |||||
Scans the module object and returns an iterable for the :class:`~.Tensor` | |||||
and :class:`~.Module` attributes that agree with the ``predicate``. For multiple | and :class:`~.Module` attributes that agree with the ``predicate``. For multiple | ||||
calls of this function with same arguments, the order of objects within the | calls of this function with same arguments, the order of objects within the | ||||
returned iterable is guaranteed to be identical, as long as all the involved | returned iterable is guaranteed to be identical, as long as all the involved | ||||
@@ -165,7 +169,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: | def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: | ||||
r"""Returns an iterable for the :class:`~.Parameter` of the module. | |||||
r""" | |||||
Returns an iterable for the :class:`~.Parameter` of the module. | |||||
:param recursive: If ``True``, returns all :class:`~.Parameter` within this | :param recursive: If ``True``, returns all :class:`~.Parameter` within this | ||||
module, else only returns :class:`~.Parameter` that are direct attributes | module, else only returns :class:`~.Parameter` that are direct attributes | ||||
@@ -190,7 +195,8 @@ class Module(metaclass=ABCMeta): | |||||
def named_parameters( | def named_parameters( | ||||
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | ||||
) -> Iterable[Tuple[str, Parameter]]: | ) -> Iterable[Tuple[str, Parameter]]: | ||||
"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||||
""" | |||||
Returns an iterable for key :class:`~.Parameter` pairs of the module, where | |||||
``key`` is the dotted path from this module to the :class:`~.Parameter`. | ``key`` is the dotted path from this module to the :class:`~.Parameter`. | ||||
:param prefix: prefix prepended to the keys. | :param prefix: prefix prepended to the keys. | ||||
@@ -219,7 +225,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: | def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: | ||||
"""Returns an iterable for the buffers of the module. | |||||
""" | |||||
Returns an iterable for the buffers of the module. | |||||
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | ||||
@@ -234,7 +241,8 @@ class Module(metaclass=ABCMeta): | |||||
def named_buffers( | def named_buffers( | ||||
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | ||||
) -> Iterable[Tuple[str, Tensor]]: | ) -> Iterable[Tuple[str, Tensor]]: | ||||
"""Returns an iterable for key buffer pairs of the module, where | |||||
""" | |||||
Returns an iterable for key buffer pairs of the module, where | |||||
``key`` is the dotted path from this module to the buffer. | ``key`` is the dotted path from this module to the buffer. | ||||
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | ||||
@@ -253,7 +261,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def children(self, **kwargs) -> "Iterable[Module]": | def children(self, **kwargs) -> "Iterable[Module]": | ||||
"""Returns an iterable for all the submodules that are direct attributes of this | |||||
""" | |||||
Returns an iterable for all the submodules that are direct attributes of this | |||||
module. | module. | ||||
""" | """ | ||||
yield from self._flatten( | yield from self._flatten( | ||||
@@ -261,7 +270,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": | def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": | ||||
"""Returns an iterable of key-submodule pairs for all the submodules that are | |||||
""" | |||||
Returns an iterable of key-submodule pairs for all the submodules that are | |||||
direct attributes of this module, where 'key' is the attribute name of | direct attributes of this module, where 'key' is the attribute name of | ||||
submodules. | submodules. | ||||
""" | """ | ||||
@@ -270,7 +280,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def modules(self, **kwargs) -> "Iterable[Module]": | def modules(self, **kwargs) -> "Iterable[Module]": | ||||
"""Returns an iterable for all the modules within this module, including itself. | |||||
""" | |||||
Returns an iterable for all the modules within this module, including itself. | |||||
""" | """ | ||||
if "with_parent" in kwargs and kwargs["with_parent"]: | if "with_parent" in kwargs and kwargs["with_parent"]: | ||||
yield self, None | yield self, None | ||||
@@ -281,7 +292,8 @@ class Module(metaclass=ABCMeta): | |||||
def named_modules( | def named_modules( | ||||
self, prefix: Optional[str] = None, **kwargs | self, prefix: Optional[str] = None, **kwargs | ||||
) -> "Iterable[Tuple[str, Module]]": | ) -> "Iterable[Tuple[str, Module]]": | ||||
"""Returns an iterable of key-module pairs for all the modules within this | |||||
""" | |||||
Returns an iterable of key-module pairs for all the modules within this | |||||
module, including itself, where 'key' is the dotted path from this module to the | module, including itself, where 'key' is the dotted path from this module to the | ||||
submodules. | submodules. | ||||
@@ -296,7 +308,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def apply(self, fn: "Callable[[Module], Any]") -> None: | def apply(self, fn: "Callable[[Module], Any]") -> None: | ||||
"""Applies function ``fn`` to all the modules within this module, including | |||||
""" | |||||
Applies function ``fn`` to all the modules within this module, including | |||||
itself. | itself. | ||||
:param fn: the function to be applied on modules. | :param fn: the function to be applied on modules. | ||||
@@ -306,14 +319,16 @@ class Module(metaclass=ABCMeta): | |||||
@deprecated(version="1.0") | @deprecated(version="1.0") | ||||
def zero_grad(self) -> None: | def zero_grad(self) -> None: | ||||
"""Sets all parameters' grads to zero | |||||
""" | |||||
Sets all parameters' grads to zero | |||||
""" | """ | ||||
for param in self.parameters(): | for param in self.parameters(): | ||||
if param.grad is not None: | if param.grad is not None: | ||||
param.grad.reset_zero() | param.grad.reset_zero() | ||||
def train(self, mode: bool = True, recursive: bool = True) -> None: | def train(self, mode: bool = True, recursive: bool = True) -> None: | ||||
"""Sets training mode of all the modules within this module (including itself) to | |||||
""" | |||||
Sets training mode of all the modules within this module (including itself) to | |||||
``mode``. This effectively sets the ``training`` attributes of those modules | ``mode``. This effectively sets the ``training`` attributes of those modules | ||||
to ``mode``, but only has effect on certain modules (e.g. | to ``mode``, but only has effect on certain modules (e.g. | ||||
:class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | ||||
@@ -331,7 +346,8 @@ class Module(metaclass=ABCMeta): | |||||
self.apply(fn) | self.apply(fn) | ||||
def eval(self) -> None: | def eval(self) -> None: | ||||
"""Sets training mode of all the modules within this module (including itself) to | |||||
""" | |||||
Sets training mode of all the modules within this module (including itself) to | |||||
``False``. See :meth:`~.Module.train` for details. | ``False``. See :meth:`~.Module.train` for details. | ||||
""" | """ | ||||
self.train(False) | self.train(False) | ||||
@@ -351,7 +367,8 @@ class Module(metaclass=ABCMeta): | |||||
def replace_param( | def replace_param( | ||||
self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | ||||
): | ): | ||||
"""Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||||
""" | |||||
Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to | |||||
speedup multimachine training. | speedup multimachine training. | ||||
""" | """ | ||||
offset = 0 | offset = 0 | ||||
@@ -377,7 +394,8 @@ class Module(metaclass=ABCMeta): | |||||
return offset | return offset | ||||
def state_dict(self, rst=None, prefix="", keep_var=False): | def state_dict(self, rst=None, prefix="", keep_var=False): | ||||
r"""Returns a dictionary containing whole states of the module. | |||||
r""" | |||||
Returns a dictionary containing whole states of the module. | |||||
""" | """ | ||||
def is_state(obj): | def is_state(obj): | ||||
@@ -407,7 +425,8 @@ class Module(metaclass=ABCMeta): | |||||
state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | ||||
strict=True, | strict=True, | ||||
): | ): | ||||
r"""Loads a given dictionary created by :func:`state_dict` into this module. | |||||
r""" | |||||
Loads a given dictionary created by :func:`state_dict` into this module. | |||||
If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | ||||
returned by :func:`state_dict`. | returned by :func:`state_dict`. | ||||
@@ -485,7 +504,8 @@ class Module(metaclass=ABCMeta): | |||||
) | ) | ||||
def _load_state_dict_with_closure(self, closure): | def _load_state_dict_with_closure(self, closure): | ||||
"""Advance state_dict load through callable ``closure`` whose signature is | |||||
""" | |||||
Advance state_dict load through callable ``closure`` whose signature is | |||||
``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` | ``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` | ||||
""" | """ | ||||
assert callable(closure), "closure must be a function" | assert callable(closure), "closure must be a function" | ||||
@@ -536,7 +556,8 @@ class Module(metaclass=ABCMeta): | |||||
super().__delattr__(name) | super().__delattr__(name) | ||||
def _module_info_string(self) -> str: | def _module_info_string(self) -> str: | ||||
r"""Set the extra representation of the module. | |||||
r""" | |||||
Set the extra representation of the module. | |||||
""" | """ | ||||
return "" | return "" | ||||
@@ -36,7 +36,8 @@ class _PoolNd(Module): | |||||
class MaxPool2d(_PoolNd): | class MaxPool2d(_PoolNd): | ||||
r"""Applies a 2D max pooling over an input. | |||||
r""" | |||||
Applies a 2D max pooling over an input. | |||||
For instance, given an input of the size :math:`(N, C, H, W)` and | For instance, given an input of the size :math:`(N, C, H, W)` and | ||||
:attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | ||||
@@ -83,7 +84,8 @@ class MaxPool2d(_PoolNd): | |||||
class AvgPool2d(_PoolNd): | class AvgPool2d(_PoolNd): | ||||
r"""Applies a 2D average pooling over an input. | |||||
r""" | |||||
Applies a 2D average pooling over an input. | |||||
For instance, given an input of the size :math:`(N, C, H, W)` and | For instance, given an input of the size :math:`(N, C, H, W)` and | ||||
:attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of | ||||
@@ -19,7 +19,8 @@ from .module import QuantizedModule | |||||
class Conv2d(Float.Conv2d, QuantizedModule): | class Conv2d(Float.Conv2d, QuantizedModule): | ||||
r"""Quantized version of :class:`~.qat.conv.Conv2d`.""" | r"""Quantized version of :class:`~.qat.conv.Conv2d`.""" | ||||
r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||||
r""" | |||||
Applies a 2D convolution over a quantized input tensor, used for inference only. | |||||
The parameter is same with :class: `~.Conv2d`. | The parameter is same with :class: `~.Conv2d`. | ||||
""" | """ | ||||
@@ -11,7 +11,8 @@ from .conv import Conv2d | |||||
class _ConvBnActivation2d(Conv2d): | class _ConvBnActivation2d(Conv2d): | ||||
r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||||
r""" | |||||
Applies a 2D convolution over a quantized input tensor, used for inference only. | |||||
The parameter is same with :class: `~.Conv2d`. | The parameter is same with :class: `~.Conv2d`. | ||||
""" | """ | ||||
@@ -12,7 +12,8 @@ from .module import Module | |||||
class Sequential(Module): | class Sequential(Module): | ||||
r"""A sequential container. | |||||
r""" | |||||
A sequential container. | |||||
Modules will be added to it in the order they are passed in the constructor. | Modules will be added to it in the order they are passed in the constructor. | ||||
Alternatively, an ordered dict of modules can also be passed in. | Alternatively, an ordered dict of modules can also be passed in. | ||||
@@ -29,10 +30,9 @@ class Sequential(Module): | |||||
from collections import OrderedDict | from collections import OrderedDict | ||||
batch_size = 64 | batch_size = 64 | ||||
data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32) | |||||
data = mge.tensor(np.zeros((batch_size, 28 * 28)), dtype=np.float32) | |||||
label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | ||||
data = data.reshape(batch_size, -1) | |||||
net0 = M.Sequential( | net0 = M.Sequential( | ||||
M.Linear(28 * 28, 320), | M.Linear(28 * 28, 320), | ||||
M.Linear(320, 10) | M.Linear(320, 10) | ||||
@@ -40,10 +40,9 @@ class Sequential(Module): | |||||
pred0 = net0(data) | pred0 = net0(data) | ||||
modules = OrderedDict() | modules = OrderedDict() | ||||
modules["fc0"] = nn.Linear(28 * 28, 320) | |||||
modules["fc1"] = nn.Linear(320, 10) | |||||
net1 = nn.Sequential(modules) | |||||
modules["fc0"] = M.Linear(28 * 28, 320) | |||||
modules["fc1"] = M.Linear(320, 10) | |||||
net1 = M.Sequential(modules) | |||||
pred1 = net1(data) | pred1 = net1(data) | ||||
""" | """ | ||||
@@ -16,7 +16,8 @@ from .optimizer import Optimizer | |||||
class Adadelta(Optimizer): | class Adadelta(Optimizer): | ||||
r"""Implements Adadelta algorithm. | |||||
r""" | |||||
Implements Adadelta algorithm. | |||||
It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" <https://arxiv.org/abs/1212.5701>`_. | It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" <https://arxiv.org/abs/1212.5701>`_. | ||||
@@ -16,7 +16,8 @@ from .optimizer import Optimizer | |||||
class Adagrad(Optimizer): | class Adagrad(Optimizer): | ||||
r"""Implements Adagrad algorithm. | |||||
r""" | |||||
Implements Adagrad algorithm. | |||||
It has been proposed in `"Adaptive Subgradient Methods for Online Learning | It has been proposed in `"Adaptive Subgradient Methods for Online Learning | ||||
and Stochastic Optimization" <http://jmlr.org/papers/v12/duchi11a.html>`_. | and Stochastic Optimization" <http://jmlr.org/papers/v12/duchi11a.html>`_. | ||||
@@ -13,7 +13,8 @@ from .optimizer import Optimizer | |||||
class Adam(Optimizer): | class Adam(Optimizer): | ||||
r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||||
r""" | |||||
Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" <https://arxiv.org/abs/1412.6980>`_. | |||||
:param params: iterable of parameters to optimize or dicts defining | :param params: iterable of parameters to optimize or dicts defining | ||||
parameter groups. | parameter groups. | ||||
@@ -12,7 +12,8 @@ from .optimizer import Optimizer | |||||
class LRScheduler(metaclass=ABCMeta): | class LRScheduler(metaclass=ABCMeta): | ||||
r"""Base class for all learning rate based schedulers. | |||||
r""" | |||||
Base class for all learning rate based schedulers. | |||||
:param optimizer: wrapped optimizer. | :param optimizer: wrapped optimizer. | ||||
:param current_epoch: the index of current epoch. Default: -1 | :param current_epoch: the index of current epoch. Default: -1 | ||||
@@ -44,14 +45,16 @@ class LRScheduler(metaclass=ABCMeta): | |||||
self.step() | self.step() | ||||
def state_dict(self): | def state_dict(self): | ||||
r"""Returns the state of the scheduler as a :class:`dict`. | |||||
r""" | |||||
Returns the state of the scheduler as a :class:`dict`. | |||||
It contains an entry for every variable in self.__dict__ which | It contains an entry for every variable in self.__dict__ which | ||||
is not the optimizer. | is not the optimizer. | ||||
""" | """ | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def load_state_dict(self, state_dict): | def load_state_dict(self, state_dict): | ||||
r"""Loads the schedulers state. | |||||
r""" | |||||
Loads the schedulers state. | |||||
:type state_dict: dict | :type state_dict: dict | ||||
:param state_dict: scheduler state. | :param state_dict: scheduler state. | ||||
@@ -14,7 +14,8 @@ from .optimizer import Optimizer | |||||
class MultiStepLR(LRScheduler): | class MultiStepLR(LRScheduler): | ||||
r"""Decays the learning rate of each parameter group by gamma once the | |||||
r""" | |||||
Decays the learning rate of each parameter group by gamma once the | |||||
number of epoch reaches one of the milestones. | number of epoch reaches one of the milestones. | ||||
:param optimizer: wrapped optimizer. | :param optimizer: wrapped optimizer. | ||||
@@ -44,7 +45,8 @@ class MultiStepLR(LRScheduler): | |||||
super().__init__(optimizer, current_epoch) | super().__init__(optimizer, current_epoch) | ||||
def state_dict(self): | def state_dict(self): | ||||
r"""Returns the state of the scheduler as a :class:`dict`. | |||||
r""" | |||||
Returns the state of the scheduler as a :class:`dict`. | |||||
It contains an entry for every variable in self.__dict__ which | It contains an entry for every variable in self.__dict__ which | ||||
is not the optimizer. | is not the optimizer. | ||||
""" | """ | ||||
@@ -55,7 +57,8 @@ class MultiStepLR(LRScheduler): | |||||
} | } | ||||
def load_state_dict(self, state_dict): | def load_state_dict(self, state_dict): | ||||
r"""Loads the schedulers state. | |||||
r""" | |||||
Loads the schedulers state. | |||||
:type state_dict: dict | :type state_dict: dict | ||||
:param state_dict: scheduler state. | :param state_dict: scheduler state. | ||||
@@ -28,7 +28,8 @@ required = _RequiredParameter() | |||||
class Optimizer(metaclass=ABCMeta): | class Optimizer(metaclass=ABCMeta): | ||||
r"""Base class for all optimizers. | |||||
r""" | |||||
Base class for all optimizers. | |||||
:param params: specifies what Tensors should be optimized. | :param params: specifies what Tensors should be optimized. | ||||
:param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | :param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. | ||||
@@ -72,7 +73,8 @@ class Optimizer(metaclass=ABCMeta): | |||||
self._create_state(group) | self._create_state(group) | ||||
def add_param_group(self, param_group: dict): | def add_param_group(self, param_group: dict): | ||||
r"""Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||||
r""" | |||||
Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. | |||||
This can be useful when fine tuning a pre-trained network as frozen layers can be made | This can be useful when fine tuning a pre-trained network as frozen layers can be made | ||||
trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. | trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. | ||||
@@ -137,7 +139,8 @@ class Optimizer(metaclass=ABCMeta): | |||||
return params | return params | ||||
def step(self): | def step(self): | ||||
r"""Performs a single optimization step. | |||||
r""" | |||||
Performs a single optimization step. | |||||
""" | """ | ||||
for group in self.param_groups: | for group in self.param_groups: | ||||
@@ -158,14 +161,16 @@ class Optimizer(metaclass=ABCMeta): | |||||
param.grad.reset_zero() | param.grad.reset_zero() | ||||
def clear_grad(self): | def clear_grad(self): | ||||
r"""Set the grad attribute to None for all parameters. | |||||
r""" | |||||
Set the grad attribute to None for all parameters. | |||||
""" | """ | ||||
for param_group in self.param_groups: | for param_group in self.param_groups: | ||||
for param in param_group["params"]: | for param in param_group["params"]: | ||||
param.grad = None | param.grad = None | ||||
def state_dict(self) -> Dict: | def state_dict(self) -> Dict: | ||||
r"""Export the optimizer state. | |||||
r""" | |||||
Export the optimizer state. | |||||
:return: optimizer state. Can be loaded by :meth:`load_state_dict`. | :return: optimizer state. Can be loaded by :meth:`load_state_dict`. | ||||
""" | """ | ||||
@@ -191,7 +196,8 @@ class Optimizer(metaclass=ABCMeta): | |||||
return {"param_groups": param_groups, "state": state} | return {"param_groups": param_groups, "state": state} | ||||
def load_state_dict(self, state: dict): | def load_state_dict(self, state: dict): | ||||
r"""Loads the optimizer state. | |||||
r""" | |||||
Loads the optimizer state. | |||||
:param state: optimizer state. Should be an object returned | :param state: optimizer state. Should be an object returned | ||||
from a call to :meth:`state_dict`. | from a call to :meth:`state_dict`. | ||||
@@ -13,7 +13,8 @@ from .optimizer import Optimizer | |||||
class SGD(Optimizer): | class SGD(Optimizer): | ||||
r"""Implements stochastic gradient descent. | |||||
r""" | |||||
Implements stochastic gradient descent. | |||||
Nesterov momentum is based on the formula from | Nesterov momentum is based on the formula from | ||||
`"On the importance of initialization and momentum in deep learning" <http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf>`_ . | `"On the importance of initialization and momentum in deep learning" <http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf>`_ . | ||||
@@ -174,7 +174,8 @@ class HistogramObserver(MinMaxObserver): | |||||
self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32") | self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32") | ||||
def _non_linear_param_search(self): | def _non_linear_param_search(self): | ||||
r"""Non-linear parameter search. | |||||
r""" | |||||
Non-linear parameter search. | |||||
An approximation for L2 error minimization for selecting min/max. | An approximation for L2 error minimization for selecting min/max. | ||||
By selecting new min/max, we filter out outliers in input distribution. | By selecting new min/max, we filter out outliers in input distribution. | ||||
""" | """ | ||||
@@ -43,7 +43,8 @@ def register_method_to_class(cls): | |||||
class QuantMode(Enum): | class QuantMode(Enum): | ||||
"""Quantization mode enumerate class. | |||||
""" | |||||
Quantization mode enumerate class. | |||||
""" | """ | ||||
SYMMERTIC = 1 | SYMMERTIC = 1 | ||||
@@ -63,13 +64,15 @@ qparam_dict = { | |||||
def get_qparam_dict(mode: QuantMode): | def get_qparam_dict(mode: QuantMode): | ||||
"""Return the quantization parameters dictionary according to the mode. | |||||
""" | |||||
Return the quantization parameters dictionary according to the mode. | |||||
""" | """ | ||||
return qparam_dict.get(mode, None) | return qparam_dict.get(mode, None) | ||||
def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor: | def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor: | ||||
"""Apply fake quantization to the inp tensor. | |||||
""" | |||||
Apply fake quantization to the inp tensor. | |||||
:param inp: the input tensor which need to be faked. | :param inp: the input tensor which need to be faked. | ||||
:param qmin: the minimum value which the integer limit to. | :param qmin: the minimum value which the integer limit to. | ||||
@@ -91,7 +94,8 @@ def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor | |||||
def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: | def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: | ||||
"""Apply fake quantization to bias, with the special scale from input tensor | |||||
""" | |||||
Apply fake quantization to bias, with the special scale from input tensor | |||||
and weight tensor, the quantized type set to qint32 also. | and weight tensor, the quantized type set to qint32 also. | ||||
:param bias: the bias tensor which need to be faked. | :param bias: the bias tensor which need to be faked. | ||||
@@ -21,7 +21,8 @@ __all__ = ["normal", "uniform"] | |||||
def normal( | def normal( | ||||
mean: float = 0, std: float = 1, size: Optional[Iterable[int]] = None | mean: float = 0, std: float = 1, size: Optional[Iterable[int]] = None | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||||
r""" | |||||
Random variable with Gaussian distribution :math:`N(\mu, \sigma)`. | |||||
:param size: output tensor size. | :param size: output tensor size. | ||||
:param mean: the mean or expectation of the distribution. | :param mean: the mean or expectation of the distribution. | ||||
@@ -59,7 +60,8 @@ def normal( | |||||
def uniform( | def uniform( | ||||
low: float = 0, high: float = 1, size: Optional[Iterable[int]] = None | low: float = 0, high: float = 1, size: Optional[Iterable[int]] = None | ||||
) -> Tensor: | ) -> Tensor: | ||||
r"""Random variable with uniform distribution $U(0, 1)$. | |||||
r""" | |||||
Random variable with uniform distribution $U(0, 1)$. | |||||
:param size: output tensor size. | :param size: output tensor size. | ||||
:param low: lower range. | :param low: lower range. | ||||
@@ -14,7 +14,8 @@ from .utils.max_recursion_limit import max_recursion_limit | |||||
def save(obj, f, pickle_module=pickle, pickle_protocol=pickle.HIGHEST_PROTOCOL): | def save(obj, f, pickle_module=pickle, pickle_protocol=pickle.HIGHEST_PROTOCOL): | ||||
r"""Save an object to disk file. | |||||
r""" | |||||
Save an object to disk file. | |||||
:type obj: object | :type obj: object | ||||
:param obj: object to save. Only ``module`` or ``state_dict`` are allowed. | :param obj: object to save. Only ``module`` or ``state_dict`` are allowed. | ||||
@@ -81,7 +82,8 @@ def _get_callable_map_location(map_location): | |||||
def load(f, map_location=None, pickle_module=pickle): | def load(f, map_location=None, pickle_module=pickle): | ||||
r"""Load an object saved with save() from a file. | |||||
r""" | |||||
Load an object saved with save() from a file. | |||||
:type f: text file object | :type f: text file object | ||||
:param f: a string of file name or a text file object from which to load. | :param f: a string of file name or a text file object from which to load. | ||||
@@ -97,5 +97,6 @@ tensor = Tensor | |||||
class Parameter(Tensor): | class Parameter(Tensor): | ||||
r"""A kind of Tensor that is to be considered a module parameter. | |||||
r""" | |||||
A kind of Tensor that is to be considered a module parameter. | |||||
""" | """ |
@@ -17,7 +17,8 @@ from ..core.tensor.raw_tensor import as_raw_tensor | |||||
def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | ||||
"""Returns :class:`.tensor.core.megbrain_graph.VarNode` of type ``var_type`` that input ``var`` | |||||
""" | |||||
Returns :class:`.tensor.core.megbrain_graph.VarNode` of type ``var_type`` that input ``var`` | |||||
depands on. If ``var_type`` is None, returns all types. | depands on. If ``var_type`` is None, returns all types. | ||||
""" | """ | ||||
outputs = [] | outputs = [] | ||||
@@ -46,14 +47,16 @@ def get_dep_vars(var: VarNode, var_type: str = None) -> List[VarNode]: | |||||
def get_owner_opr_inputs(var: VarNode) -> List[VarNode]: | def get_owner_opr_inputs(var: VarNode) -> List[VarNode]: | ||||
"""Gets the inputs of owner opr of a variable. | |||||
""" | |||||
Gets the inputs of owner opr of a variable. | |||||
""" | """ | ||||
assert isinstance(var, VarNode) | assert isinstance(var, VarNode) | ||||
return var.owner.inputs | return var.owner.inputs | ||||
def get_owner_opr_type(var: VarNode) -> str: | def get_owner_opr_type(var: VarNode) -> str: | ||||
"""Gets the type of owner opr of a variable. | |||||
""" | |||||
Gets the type of owner opr of a variable. | |||||
""" | """ | ||||
assert isinstance(var, VarNode) | assert isinstance(var, VarNode) | ||||
@@ -61,14 +64,16 @@ def get_owner_opr_type(var: VarNode) -> str: | |||||
def get_opr_type(opr: OperatorNode) -> str: | def get_opr_type(opr: OperatorNode) -> str: | ||||
"""Gets the type of an opr. | |||||
""" | |||||
Gets the type of an opr. | |||||
""" | """ | ||||
assert isinstance(opr, OperatorNode) | assert isinstance(opr, OperatorNode) | ||||
return opr.type | return opr.type | ||||
def graph_traversal(outputs: VarNode): | def graph_traversal(outputs: VarNode): | ||||
"""Helper function to traverse the computing graph and return enough useful information. | |||||
""" | |||||
Helper function to traverse the computing graph and return enough useful information. | |||||
:param outputs: model outputs. | :param outputs: model outputs. | ||||
:return: tuple (map_oprs, map_vars, var2oprs, opr2receivers, indegree2opr, opr2indegree) | :return: tuple (map_oprs, map_vars, var2oprs, opr2receivers, indegree2opr, opr2indegree) | ||||
@@ -124,7 +129,8 @@ def graph_traversal(outputs: VarNode): | |||||
def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNode]: | def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNode]: | ||||
"""Gets oprs in some topological order for a dumped model. | |||||
""" | |||||
Gets oprs in some topological order for a dumped model. | |||||
:param outputs: model outputs. | :param outputs: model outputs. | ||||
:param prune_reshape: whether to prune the useless operators during inference. | :param prune_reshape: whether to prune the useless operators during inference. | ||||
@@ -194,7 +200,8 @@ def get_oprs_seq(outputs: List[VarNode], prune_reshape=False) -> List[OperatorNo | |||||
def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | ||||
"""Replaces vars in the graph. | |||||
""" | |||||
Replaces vars in the graph. | |||||
:param dst: target vars representing the graph. | :param dst: target vars representing the graph. | ||||
:param varmap: the map that specifies how to replace the vars. | :param varmap: the map that specifies how to replace the vars. | ||||
@@ -221,7 +228,8 @@ def replace_vars(dst: VarNode, varmap: Dict[VarNode, VarNode]) -> List[VarNode]: | |||||
def replace_oprs( | def replace_oprs( | ||||
dst: List[VarNode], oprmap: Dict[OperatorNode, OperatorNode] | dst: List[VarNode], oprmap: Dict[OperatorNode, OperatorNode] | ||||
) -> List[VarNode]: | ) -> List[VarNode]: | ||||
"""Replaces operators in the graph. | |||||
""" | |||||
Replaces operators in the graph. | |||||
:param dst: target vars representing the graph. | :param dst: target vars representing the graph. | ||||
:param oprmap: the map that specifies how to replace the operators. | :param oprmap: the map that specifies how to replace the operators. | ||||
@@ -246,7 +254,8 @@ def replace_oprs( | |||||
def set_priority_to_id(dest_vars): | def set_priority_to_id(dest_vars): | ||||
"""For all oprs in the subgraph constructed by dest_vars, | |||||
""" | |||||
For all oprs in the subgraph constructed by dest_vars, | |||||
sets its priority to id if its original priority is zero. | sets its priority to id if its original priority is zero. | ||||
:param dest_vars: target vars representing the graph. | :param dest_vars: target vars representing the graph. | ||||
""" | """ | ||||
@@ -258,7 +267,8 @@ def set_priority_to_id(dest_vars): | |||||
def load_and_inference(file, inp_data_list: List[numpy.ndarray]) -> List[numpy.ndarray]: | def load_and_inference(file, inp_data_list: List[numpy.ndarray]) -> List[numpy.ndarray]: | ||||
"""Loads a serialized computing graph and run inference with input data. | |||||
""" | |||||
Loads a serialized computing graph and run inference with input data. | |||||
:param file: path or handle of the input file. | :param file: path or handle of the input file. | ||||
:param inp_data_list: list of input data. | :param inp_data_list: list of input data. | ||||
@@ -16,7 +16,8 @@ if platform.system() != "Windows": | |||||
class AlternativeRecursionLimit: | class AlternativeRecursionLimit: | ||||
r"""A reentrant context manager for setting global recursion limits. | |||||
r""" | |||||
A reentrant context manager for setting global recursion limits. | |||||
""" | """ | ||||
def __init__(self, new_py_limit): | def __init__(self, new_py_limit): | ||||
@@ -73,6 +74,7 @@ _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | |||||
def max_recursion_limit(): | def max_recursion_limit(): | ||||
r"""Sets recursion limit to the max possible value. | |||||
r""" | |||||
Sets recursion limit to the max possible value. | |||||
""" | """ | ||||
return _max_recursion_limit_context_manager | return _max_recursion_limit_context_manager |
@@ -12,7 +12,8 @@ import numpy as np | |||||
def load_tensor_binary(fobj): | def load_tensor_binary(fobj): | ||||
"""Load a tensor dumped by the :class:`BinaryOprIODump` plugin; the actual | |||||
""" | |||||
Load a tensor dumped by the :class:`BinaryOprIODump` plugin; the actual | |||||
tensor value dump is implemented by ``mgb::debug::dump_tensor``. | tensor value dump is implemented by ``mgb::debug::dump_tensor``. | ||||
Multiple values can be compared by ``tools/compare_binary_iodump.py``. | Multiple values can be compared by ``tools/compare_binary_iodump.py``. | ||||
@@ -57,7 +57,8 @@ def _tabulate_confluence(tab, **kwargs): | |||||
def main(passed_args=None): # pylint: disable=too-many-statements | def main(passed_args=None): # pylint: disable=too-many-statements | ||||
"""Analyses profile info from :mod:`~.utils.profile_analyzer` . | |||||
""" | |||||
Analyses profile info from :mod:`~.utils.profile_analyzer` . | |||||
Run this file with ``--help`` to get more usage. | Run this file with ``--help`` to get more usage. | ||||
""" | """ | ||||
@@ -15,7 +15,8 @@ import numpy as np | |||||
class NonExistNum: | class NonExistNum: | ||||
"""An object that behaves like a number but means a field does not exist; It is | |||||
""" | |||||
An object that behaves like a number but means a field does not exist; It is | |||||
always greater than any real number. | always greater than any real number. | ||||
""" | """ | ||||
@@ -64,15 +65,18 @@ class OprProfRst: | |||||
"""A dict containing operator info: name, id and type.""" | """A dict containing operator info: name, id and type.""" | ||||
time_dict = None | time_dict = None | ||||
"""A mapping from ``"host"`` or ``"device"`` to list of profiling | |||||
""" | |||||
A mapping from ``"host"`` or ``"device"`` to list of profiling | |||||
results.""" | results.""" | ||||
footprint = None | footprint = None | ||||
"""A mapping from ``"memory"`` or ``"computation"`` to the actual number | |||||
""" | |||||
A mapping from ``"memory"`` or ``"computation"`` to the actual number | |||||
of corresponding operations.""" | of corresponding operations.""" | ||||
def __init__(self, entry: dict): | def __init__(self, entry: dict): | ||||
"""Opr profiling initialization, which sets up name, type and id of opr_info. | |||||
""" | |||||
Opr profiling initialization, which sets up name, type and id of opr_info. | |||||
:param entry: profiling json exec_graph items. | :param entry: profiling json exec_graph items. | ||||
""" | """ | ||||
@@ -84,7 +88,8 @@ class OprProfRst: | |||||
self.footprint = collections.defaultdict(NonExistNum) | self.footprint = collections.defaultdict(NonExistNum) | ||||
def update_device_prof_info(self, dev_time: dict): | def update_device_prof_info(self, dev_time: dict): | ||||
"""Updates device profiling info. | |||||
""" | |||||
Updates device profiling info. | |||||
:param dev_time: device time for single opr, | :param dev_time: device time for single opr, | ||||
is an attribute of profiling result. | is an attribute of profiling result. | ||||
@@ -93,7 +98,8 @@ class OprProfRst: | |||||
self.time_dict["device"].append(copy.deepcopy(dev_time)) | self.time_dict["device"].append(copy.deepcopy(dev_time)) | ||||
def update_host_prof_info(self, host_time: dict): | def update_host_prof_info(self, host_time: dict): | ||||
"""Updates host profiling info. | |||||
""" | |||||
Updates host profiling info. | |||||
:param host_time: host time for single opr, | :param host_time: host time for single opr, | ||||
is an attribute of profiling result. | is an attribute of profiling result. | ||||
@@ -102,7 +108,8 @@ class OprProfRst: | |||||
self.time_dict["host"].append(copy.deepcopy(host_time)) | self.time_dict["host"].append(copy.deepcopy(host_time)) | ||||
def update_footprint(self, footprint: dict): | def update_footprint(self, footprint: dict): | ||||
"""Updates opr footprint. | |||||
""" | |||||
Updates opr footprint. | |||||
:param footprint: footprint for single opr, | :param footprint: footprint for single opr, | ||||
is an attribute of profiling result. | is an attribute of profiling result. | ||||
@@ -128,7 +135,8 @@ class Record: | |||||
] | ] | ||||
def __init__(self, time: float, info: dict, footprint: dict): | def __init__(self, time: float, info: dict, footprint: dict): | ||||
"""Initializes single record. | |||||
""" | |||||
Initializes single record. | |||||
:param time: opr running time, evaluated by applying users providing | :param time: opr running time, evaluated by applying users providing | ||||
function to OprProfRst. | function to OprProfRst. | ||||
@@ -153,7 +161,8 @@ class Record: | |||||
self.opr_id = int(self.opr_id) | self.opr_id = int(self.opr_id) | ||||
def get_column_by_name(self, name: str = None): | def get_column_by_name(self, name: str = None): | ||||
"""Extracts column value by its column name. | |||||
""" | |||||
Extracts column value by its column name. | |||||
:param name: column name, None for time. | :param name: column name, None for time. | ||||
""" | """ | ||||
@@ -165,7 +174,8 @@ class Record: | |||||
class ProfileAnalyzer: | class ProfileAnalyzer: | ||||
def __init__(self, obj: dict, opr_filter: Callable = lambda opr, inp, out: True): | def __init__(self, obj: dict, opr_filter: Callable = lambda opr, inp, out: True): | ||||
"""Initializes ProfileAnalyzer. | |||||
""" | |||||
Initializes ProfileAnalyzer. | |||||
:param obj: dict dumped from json str. | :param obj: dict dumped from json str. | ||||
:param opr_filter: function that filter oprs. | :param opr_filter: function that filter oprs. | ||||
@@ -202,7 +212,8 @@ class ProfileAnalyzer: | |||||
def _aggregate( | def _aggregate( | ||||
self, records: List[Record], aop: Union[str, Callable], atype: Optional[str] | self, records: List[Record], aop: Union[str, Callable], atype: Optional[str] | ||||
) -> List[Record]: | ) -> List[Record]: | ||||
"""Aggregate operation. | |||||
""" | |||||
Aggregate operation. | |||||
:param records: selected records. | :param records: selected records. | ||||
:param aop: aggregate operation, if aop is str, we would replace it | :param aop: aggregate operation, if aop is str, we would replace it | ||||
@@ -247,7 +258,8 @@ class ProfileAnalyzer: | |||||
return rst | return rst | ||||
def _sort(self, records: List[Record], sort_by: str) -> List[Record]: | def _sort(self, records: List[Record], sort_by: str) -> List[Record]: | ||||
"""Sort operation. | |||||
""" | |||||
Sort operation. | |||||
:param records: the records after aggregate operation. | :param records: the records after aggregate operation. | ||||
:param sort_by: keyword for sorting the list. | :param sort_by: keyword for sorting the list. | ||||
@@ -271,7 +283,8 @@ class ProfileAnalyzer: | |||||
sort_by: str = None, | sort_by: str = None, | ||||
top_k: int = 0, | top_k: int = 0, | ||||
) -> List[Record]: | ) -> List[Record]: | ||||
"""Select operation. | |||||
""" | |||||
Select operation. | |||||
:param time_func: time_func provided by user, would apply to every | :param time_func: time_func provided by user, would apply to every | ||||
OprProfRst. | OprProfRst. | ||||
@@ -304,7 +317,8 @@ class TimeFuncHelper: | |||||
@staticmethod | @staticmethod | ||||
def _eval_time(prof_type, end_key, func, opr_prof): | def _eval_time(prof_type, end_key, func, opr_prof): | ||||
"""Eval time. | |||||
""" | |||||
Eval time. | |||||
:type prof_type: str | :type prof_type: str | ||||
:param prof_type: 'host' or 'device'. | :param prof_type: 'host' or 'device'. | ||||
@@ -325,7 +339,8 @@ class TimeFuncHelper: | |||||
@staticmethod | @staticmethod | ||||
def eval_time_func(prof_type: str, end_key: str, func: Callable) -> float: | def eval_time_func(prof_type: str, end_key: str, func: Callable) -> float: | ||||
"""Eval oprerator profile time. | |||||
""" | |||||
Eval oprerator profile time. | |||||
:param prof_type: 'host' or 'device'. | :param prof_type: 'host' or 'device'. | ||||
:param end_key: 'kern' or 'end'. | :param end_key: 'kern' or 'end'. | ||||
@@ -338,7 +353,8 @@ class TimeFuncHelper: | |||||
def _min_start( | def _min_start( | ||||
prof_type, end_key, func, opr_prof | prof_type, end_key, func, opr_prof | ||||
): # pylint: disable=unused-argument | ): # pylint: disable=unused-argument | ||||
"""Eval minimum start time. | |||||
""" | |||||
Eval minimum start time. | |||||
:type prof_type: str | :type prof_type: str | ||||
:param prof_type: 'host' or 'device'. | :param prof_type: 'host' or 'device'. | ||||
@@ -360,7 +376,8 @@ class TimeFuncHelper: | |||||
def min_start_func( | def min_start_func( | ||||
prof_type: str, end_key: str, func: Callable | prof_type: str, end_key: str, func: Callable | ||||
) -> float: # pylint: disable=unused-argument | ) -> float: # pylint: disable=unused-argument | ||||
"""Eval oprerator profile min start time. | |||||
""" | |||||
Eval oprerator profile min start time. | |||||
:param prof_type: 'host' or 'device'. | :param prof_type: 'host' or 'device'. | ||||
:param end_key: 'kern' or 'end'. | :param end_key: 'kern' or 'end'. | ||||
@@ -371,7 +388,8 @@ class TimeFuncHelper: | |||||
@staticmethod | @staticmethod | ||||
def _max_end(prof_type, end_key, func, opr_prof): # pylint: disable=unused-argument | def _max_end(prof_type, end_key, func, opr_prof): # pylint: disable=unused-argument | ||||
"""Eval maximum end time | |||||
""" | |||||
Eval maximum end time | |||||
:type prof_type: str | :type prof_type: str | ||||
:param prof_type: 'host' or 'device'. | :param prof_type: 'host' or 'device'. | ||||
@@ -391,7 +409,8 @@ class TimeFuncHelper: | |||||
@staticmethod | @staticmethod | ||||
def max_end_func(prof_type: str, end_key: str, func: Callable) -> float: | def max_end_func(prof_type: str, end_key: str, func: Callable) -> float: | ||||
"""Eval oprerator profile max end time. | |||||
""" | |||||
Eval oprerator profile max end time. | |||||
:param prof_type: 'host' or 'device'. | :param prof_type: 'host' or 'device'. | ||||
:param end_key: 'kern' or 'end'. | :param end_key: 'kern' or 'end'. | ||||
@@ -169,7 +169,7 @@ class Profiler: | |||||
Examples: | Examples: | ||||
.. testcode:: | |||||
.. code-block:: | |||||
import megengine as mge | import megengine as mge | ||||
import megengine.module as M | import megengine.module as M | ||||
@@ -3,7 +3,8 @@ from ..core._imperative_rt.imperative import sync | |||||
class TensorSanityCheck: | class TensorSanityCheck: | ||||
r"""An object that checks whether the input tensors of each operator have changed before and after the operation. | |||||
r""" | |||||
An object that checks whether the input tensors of each operator have changed before and after the operation. | |||||
Examples: | Examples: | ||||
@@ -11,7 +11,8 @@ import functools | |||||
def get_ndtuple(value, *, n, allow_zero=True): | def get_ndtuple(value, *, n, allow_zero=True): | ||||
r"""Converts possibly 1D tuple to nd tuple. | |||||
r""" | |||||
Converts possibly 1D tuple to nd tuple. | |||||
:type allow_zero: bool | :type allow_zero: bool | ||||
:param allow_zero: whether to allow zero tuple value.""" | :param allow_zero: whether to allow zero tuple value.""" | ||||
@@ -1,12 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
# | |||||
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
# | |||||
# Unless required by applicable law or agreed to in writing, | |||||
# software distributed under the License is distributed on an | |||||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
[pytest] | |||||
markers = | |||||
isolated_distributed: marks distributed tests that should runs without cuda use | |||||
in main thread (deselect with '-m "not "isolated_distributed"') |
@@ -1,6 +1,6 @@ | |||||
#!/bin/bash -e | #!/bin/bash -e | ||||
test_dirs="test megengine" | |||||
test_dirs="megengine test" | |||||
TEST_PLAT=$1 | TEST_PLAT=$1 | ||||