docs(mge): fix docstring in loss and dataset

GitOrigin-RevId: 6b56673415
5 years ago · e6d7c5e99a
--- a/python_module/megengine/data/dataset/vision/imagenet.py
+++ b/python_module/megengine/data/dataset/vision/imagenet.py
@@ -69,20 +69,26 @@ class ImageNet(ImageFolder):
    """

    def __init__(self, root: str = None, train: bool = True, **kwargs):
        r"""initilization
        r"""
        initialization:

        if ``root`` contains ``self.target_folder`` depent on ``train``:
            initialize ImageFolder with target_folder
        else:
            if all raw files are in ``root``:
                parse ``self.target_folder`` from raw files
                initialize ImageFolder with ``self.target_folder``
            else:
                raise error
        * if ``root`` contains ``self.target_folder`` depent on ``train``:

          * initialize ImageFolder with target_folder

        * else:

          * if all raw files are in ``root``:

            * parse ``self.target_folder`` from raw files
            * initialize ImageFolder with ``self.target_folder``

          * else:

            * raise error

        :param root: root directory of imagenet data, if root is ``None``, used default_dataset_root
        :param train: if ``True``, load the train split, otherwise load the validation split
        :param **kwarg: other keyword arguments for ImageFolder init
        """

        # process the root path
--- a/python_module/megengine/data/transform/vision/transform.py
+++ b/python_module/megengine/data/transform/vision/transform.py
@@ -50,29 +50,25 @@ class VisionTransform(Transform):

    :param order: Input type order. Input is a tuple contains different structures,
        order is used to specify the order of structures. For example, if your input
        is (image, boxes) type, then the order should be ("image", "boxes"). 
        is (image, boxes) type, then the order should be ("image", "boxes").
        Current available strings & data type are describe below:
            "image": 
                input image, with shape of (H, W, C)
            "coords": 
                coordinates, with shape of (N, 2)
            "boxes": 
                bounding boxes, with shape of (N, 4), "xyxy" format,
                the 1st "xy" represents top left point of a box,
                the 2nd "xy" represents right bottom point.
            "mask": 
                map used for segmentation, with shape of (H, W, 1)
            "keypoints": 
                keypoints with shape of (N, K, 3), N for number of instances, and K for number of keypoints in one instance. The first two dimensions
                of last axis is coordinate of keypoints and the the 3rd dimension is
                the label of keypoints.
            "polygons": A sequence contains numpy array, its length is number of instances.
                Each numpy array represents polygon coordinate of one instance.
            "category": categories for some data type. For example, "image_category"
                means category of the input image and "boxes_category" means categories of
                bounding boxes.
            "info": 
                information for images such as image shapes and image path.

        * "image": input image, with shape of (H, W, C)
        * "coords": coordinates, with shape of (N, 2)
        * "boxes": bounding boxes, with shape of (N, 4), "xyxy" format,
          the 1st "xy" represents top left point of a box,
          the 2nd "xy" represents right bottom point.
        * "mask": map used for segmentation, with shape of (H, W, 1)
        * "keypoints": keypoints with shape of (N, K, 3), N for number of instances,
          and K for number of keypoints in one instance. The first two dimensions
          of last axis is coordinate of keypoints and the the 3rd dimension is
          the label of keypoints.
        * "polygons": A sequence contains numpy array, its length is number of instances.
          Each numpy array represents polygon coordinate of one instance.
        * "category": categories for some data type. For example, "image_category"
          means category of the input image and "boxes_category" means categories of
          bounding boxes.
        * "info": information for images such as image shapes and image path.

        You can also customize your data types only if you implement the corresponding
        _apply_*() methods, otherwise ``NotImplementedError`` will be raised.
@@ -356,7 +352,7 @@ class Resize(VisionTransform):

    :param output_size: Target size of image, with (height, width) shape.
    :param interpolation: Interpolation method. All methods are listed below:
        

        * cv2.INTER_NEAREST – a nearest-neighbor interpolation.
        * cv2.INTER_LINEAR – a bilinear interpolation (used by default).
        * cv2.INTER_AREA – resampling using pixel area relation.
--- a/python_module/megengine/functional/loss.py
+++ b/python_module/megengine/functional/loss.py
@@ -117,8 +117,8 @@ def cross_entropy(
        import numpy as np
        from megengine import tensor
        import megengine.functional as F
        
        


        data_shape = (1, 2)
        label_shape = (1, )

@@ -156,8 +156,10 @@ def cross_entropy_with_softmax(
    It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`.

    When using label smoothing, the label distribution is as follows:

    .. math::
        y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K

    where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
    k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes.

@@ -197,7 +199,7 @@ def triplet_margin_loss(
    Creates a criterion that measures the triplet loss given an input tensors.

    .. math::
        L(a, p, n) = max\left\{d\left(a_{i},p_{i}\right)-d\left(a_{i}, n_{i}\right)+margin, 0\right\},\ 
        L(a, p, n) = max\left\{d\left(a_{i},p_{i}\right)-d\left(a_{i}, n_{i}\right)+margin, 0\right\},\
        d\left(x_{i},y_{i}\right)=\left\|x_{i}-y_{i}\right\|_{p}

    :param anchor: The input tensor representing the anchor samples.