docs(mge): formatting math in docstring

GitOrigin-RevId: e6447a2786
4 years ago · e0e1896450
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -441,22 +441,22 @@ def softplus(inp: Tensor) -> Tensor:

 def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
    r"""
    Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional
    input Tensor. The LogSoftmax formulation can be simplified as:
    Applies the :math:`\log(\text{softmax}(x))` function to an n-dimensional
    input tensor. The :math:`\text{logsoftmax}(x)` formulation can be simplified as:

    .. math::
        \text{LogSoftmax}(x_{i}) = \log(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} )
        \text{logsoftmax}(x_{i}) = \log(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} )

    For numerical stability the implementation follows this transformation:

    .. math::
        \operatorname{logsoftmax}(x)
        \text{logsoftmax}(x)
        = \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))})
        = x - \log (\sum_{i}(\exp (x_{i})))
        = x - logsumexp(x)
        = x - \text{logsumexp}(x)

    :param inp: input tensor.
    :param axis: axis along which logsoftmax will be applied.
    :param axis: axis along which :math:`\text{logsoftmax}(x)` will be applied.

    Examples:

@@ -487,8 +487,8 @@ def logsigmoid(inp: Tensor) -> Tensor:

    .. math::
        \text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)})
        = \log(1/(1 + exp(-x)))
        = - \log(1 + exp(-x))
        = \log(1/(1 + \exp(-x)))
        = - \log(1 + \exp(-x))
        = - \text{softplus}(-x)

    :param inp: input tensor.
@@ -524,14 +524,14 @@ def logsumexp(

    .. math::

        \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
        \text{logsumexp}(x)= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)

    For numerical stability, the implementation follows this transformation:

    .. math::

        \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
        = \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right)
        \text{logsumexp}(x)= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
        = \text{logsumexp}(x)=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right)

    where

@@ -578,10 +578,10 @@ def _get_softmax_axis(ndim: int) -> int:

 def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
    r"""
    Applies a softmax function. Softmax is defined as:
    Applies a :math:`\text{softmax}(x)` function. :math:`\text{softmax}(x)` is defined as:

    .. math::
            \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
            \text{softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}

    It is applied to all elements along axis, and rescales elements so that
    they stay in the range `[0, 1]` and sum to 1.
@@ -589,8 +589,8 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
    See :class:`~megengine.module.activation.Softmax` for more details.

    :param inp: input tensor.
    :param axis: an axis along which softmax will be applied. By default,
        softmax will apply along the highest ranked axis.
    :param axis: an axis along which :math:`\text{softmax}(x)` will be applied. By default,
        :math:`\text{softmax}(x)` will apply along the highest ranked axis.

    Examples: