From 67013463981d6394fd90f1200fd9b84c7bfedbe2 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 13 Oct 2020 18:37:23 +0800 Subject: [PATCH] docs(mge): fix some docstring format problem GitOrigin-RevId: cbc5ab04b368246f1ae6d9e797703c92f2e524c2 --- .../python/megengine/autodiff/grad_manager.py | 44 +++++++++++----------- imperative/python/megengine/data/dataloader.py | 1 - imperative/python/megengine/functional/__init__.py | 2 +- imperative/python/megengine/functional/loss.py | 10 +++-- imperative/python/megengine/functional/nn.py | 8 ++-- .../python/megengine/module/adaptive_pooling.py | 16 ++++---- imperative/python/megengine/module/module.py | 11 +++--- 7 files changed, 46 insertions(+), 46 deletions(-) diff --git a/imperative/python/megengine/autodiff/grad_manager.py b/imperative/python/megengine/autodiff/grad_manager.py index 001c9f9d..f63deede 100644 --- a/imperative/python/megengine/autodiff/grad_manager.py +++ b/imperative/python/megengine/autodiff/grad_manager.py @@ -20,42 +20,42 @@ class GradManager: the forward operations start and when all resources should be released. A typical usage of GradManager is as follows: - .. code-block:: + .. code-block:: - gm = GradManager() - gm.attach(model.parameters()) - with gm: - # forward operations - ... - # backward gradients - gm.backward(loss) + gm = GradManager() + gm.attach(model.parameters()) + with gm: + # forward operations + ... + # backward gradients + gm.backward(loss) - You can also use `record()` and `release()` method instead of `with` context: + You can also use ``record()`` and ``release()`` method instead of ``with`` context: - .. code-block:: + .. code-block:: - gm = GradManager() - gm.attach(model.parameters()) + gm = GradManager() + gm.attach(model.parameters()) - gm.record() + gm.record() - # forward operations - ... - # backward gradients - gm.backward(loss) + # forward operations + ... + # backward gradients + gm.backward(loss) - gm.release() + gm.release() Typically, in data parallel, we would like to average the gradients across processes. Users will finally get the averaged gradients if an "AllReduce" callback is registered as follows: - .. code-block:: + .. code-block:: - import megengine.distributed as dist + import megengine.distributed as dist - gm = GradManager() - gm.attach(model.parameters(), callback=dist.make_allreduce_cb("MEAN")) + gm = GradManager() + gm.attach(model.parameters(), callback=dist.make_allreduce_cb("MEAN")) """ diff --git a/imperative/python/megengine/data/dataloader.py b/imperative/python/megengine/data/dataloader.py index a92dff7a..2a818a29 100644 --- a/imperative/python/megengine/data/dataloader.py +++ b/imperative/python/megengine/data/dataloader.py @@ -50,7 +50,6 @@ class DataLoader: :param dataset: dataset from which to load the minibatch. :type sampler: Sampler :param sampler: defines the strategy to sample data from the dataset. - If specified, :attr:`shuffle` must be ``False``. :type transform: Transform :param transform: defined the transforming strategy for a sampled batch. Default: None diff --git a/imperative/python/megengine/functional/__init__.py b/imperative/python/megengine/functional/__init__.py index 37455891..2d3240fa 100644 --- a/imperative/python/megengine/functional/__init__.py +++ b/imperative/python/megengine/functional/__init__.py @@ -17,4 +17,4 @@ from . import distributed # isort:skip # delete namespace # pylint: disable=undefined-variable -# del elemwise, graph, loss, math, nn, tensor # type: ignore[name-defined] +del elemwise, graph, loss, math, nn, quantized, tensor, utils # type: ignore[name-defined] diff --git a/imperative/python/megengine/functional/loss.py b/imperative/python/megengine/functional/loss.py index 67a29667..0ef622d5 100644 --- a/imperative/python/megengine/functional/loss.py +++ b/imperative/python/megengine/functional/loss.py @@ -127,9 +127,10 @@ def cross_entropy( with_logits: bool = True, label_smooth: float = 0, ) -> Tensor: - r"""Compute the multi-class cross entropy loss (using logits by default). + r"""Computes the multi-class cross entropy loss (using logits by default). - By default, prediction is assumed to be logits, whose softmax gives probabilities. + By default(``with_logitis`` is True), ``pred`` is assumed to be logits, + class probabilities are given by softmax. It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`. @@ -194,9 +195,10 @@ def cross_entropy( def binary_cross_entropy( pred: Tensor, label: Tensor, with_logits: bool = True ) -> Tensor: - r"""Compute the binary cross entropy loss (using logits by default). + r"""Computes the binary cross entropy loss (using logits by default). - By default, prediction is assumed to be logits, whose sigmoid gives probabilities. + By default(``with_logitis`` is True), ``pred`` is assumed to be logits, + class probabilities are given by sigmoid. :param pred: `(N, *)`, where `*` means any number of additional dimensions. :param label: `(N, *)`, same shape as the input. diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py index 7d120b8e..7a5f8829 100644 --- a/imperative/python/megengine/functional/nn.py +++ b/imperative/python/megengine/functional/nn.py @@ -335,8 +335,8 @@ def adaptive_max_pool2d( Refer to :class:`~.MaxAdaptivePool2d` for more information. - :param inp: The input tensor. - :param oshp: (OH, OW) size of the output shape. + :param inp: input tensor. + :param oshp: `(OH, OW)` size of the output shape. :return: output tensor. """ assert isinstance(inp, (Tensor, megbrain_graph.VarNode)), "inp must be Tensor type" @@ -356,8 +356,8 @@ def adaptive_avg_pool2d( Refer to :class:`~.AvgAdaptivePool2d` for more information. - :param inp: The input tensor. - :param oshp: (OH, OW) size of the output shape. + :param inp: input tensor. + :param oshp: `(OH, OW)` size of the output shape. :return: output tensor. """ assert isinstance(inp, (Tensor, megbrain_graph.VarNode)), "inp must be Tensor type" diff --git a/imperative/python/megengine/module/adaptive_pooling.py b/imperative/python/megengine/module/adaptive_pooling.py index 99e7c57d..c0cbf3b2 100644 --- a/imperative/python/megengine/module/adaptive_pooling.py +++ b/imperative/python/megengine/module/adaptive_pooling.py @@ -40,10 +40,10 @@ class AdaptiveMaxPool2d(_AdaptivePoolNd): \text{stride[1]} \times w + n) \end{aligned} - Kernel_size and stride can be inferred from input shape and out shape: - padding: (0, 0) - stride: (floor(IH / OH), floor(IW / OW)) - kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) + ``kernel_size`` and ``stride`` can be inferred from input shape and out shape: + * padding: (0, 0) + * stride: (floor(IH / OH), floor(IW / OW)) + * kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) Examples: @@ -83,10 +83,10 @@ class AdaptiveAvgPool2d(_AdaptivePoolNd): out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) - Kernel_size and stride can be inferred from input shape and out shape: - padding: (0, 0) - stride: (floor(IH / OH), floor(IW / OW)) - kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) + ``kernel_size`` and ``stride`` can be inferred from input shape and out shape: + * padding: (0, 0) + * stride: (floor(IH / OH), floor(IW / OW)) + * kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) Examples: diff --git a/imperative/python/megengine/module/module.py b/imperative/python/megengine/module/module.py index 856c0f01..3295d77c 100644 --- a/imperative/python/megengine/module/module.py +++ b/imperative/python/megengine/module/module.py @@ -351,7 +351,7 @@ class Module(metaclass=ABCMeta): def replace_param( self, params: dict, start_pos: int, seen: Optional[Set[int]] = None ): - """Replaces module's parameters with `params`, used by :class:`~.ParamPack` to + """Replaces module's parameters with ``params``, used by :class:`~.ParamPack` to speedup multimachine training. """ offset = 0 @@ -411,7 +411,7 @@ class Module(metaclass=ABCMeta): If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys returned by :func:`state_dict`. - Users can also pass a closure: `Function[key: str, var: Tensor] -> Optional[np.ndarray]` + Users can also pass a closure: ``Function[key: str, var: Tensor] -> Optional[np.ndarray]`` as a `state_dict`, in order to handle complex situations. For example, load everything except for the final linear classifier: @@ -423,7 +423,7 @@ class Module(metaclass=ABCMeta): for k, v in state_dict.items() }, strict=False) - Here returning `None` means skipping parameter `k`. + Here returning ``None`` means skipping parameter ``k``. To prevent shape mismatch (e.g. load PyTorch weights), we can reshape before loading: @@ -485,9 +485,8 @@ class Module(metaclass=ABCMeta): ) def _load_state_dict_with_closure(self, closure): - """Advance state_dict load through callable `closure` whose signature is - - `closure(key: str, var: Tensor) -> Union[np.ndarry, None]` + """Advance state_dict load through callable ``closure`` whose signature is + ``closure(key: str, var: Tensor) -> Union[np.ndarry, None]`` """ assert callable(closure), "closure must be a function"