diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py
index ce2e1655..90bd8d44 100644
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -513,7 +513,22 @@ def local_conv2d(
     dilation: Union[int, Tuple[int, int]] = 1,
     conv_mode="cross_correlation",
 ):
-    r"""Applies spatial 2D convolution over an groupped channeled image with untied kernels."""
+    r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor.
+    It is also known as the locally connected layer.
+
+    Args:
+        inp: input feature map.
+        weight: convolution kernel.
+            weight usually has shape ``(out_channels, in_channels, height, width)``.
+        bias: bias added to the result of convolution (if given).
+        stride: stride of the 2D convolution operation. Default: 1
+        padding: size of the paddings added to the input on both sides of its
+            spatial dimensions. Only zero-padding is supported. Default: 0
+        dilation: dilation of the 2D convolution operation. Default: 1
+
+    Returns:
+        output tensor.
+    """
     assert (
         conv_mode.lower() == "cross_correlation"
         or conv_mode.name == "CROSS_CORRELATION"
@@ -529,6 +544,7 @@ def local_conv2d(
     if weight.dtype != dtype:
         weight = weight.astype(dtype)
 
+    # local conv only support "dense" mode, but weight could contain group dimension.
     op = builtin.GroupLocal(
         stride_h=stride_h,
         stride_w=stride_w,
@@ -568,6 +584,11 @@ def conv_transpose3d(
         padding: size of the paddings added to the input on all sides of its
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 3D convolution operation. Default: 1
+        groups: number of groups into which the input and output channels are divided,
+            so as to perform a ``grouped convolution``. When ``groups`` is not 1,
+            ``in_channels`` and ``out_channels`` must be divisible by groups,
+            and the shape of weight should be ``(groups, in_channels // groups,
+            out_channels // groups, depth, height, width)``. Default: 1
 
     Returns:
         output tensor.
diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py
index a4117d46..5baa22ad 100644
--- a/imperative/python/megengine/module/conv.py
+++ b/imperative/python/megengine/module/conv.py
@@ -128,11 +128,11 @@ class Conv1d(_ConvNd):
         padding: size of the paddings added to the input on both sides of its
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 1D convolution operation. Default: 1
-        groups: number of groups into which the input and output channels are divided,
+        groups: number of groups to divide input and output channels into,
             so as to perform a "grouped convolution". When ``groups`` is not 1,
             ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
-            and there would be an extra dimension at the beginning of the weight's
-            shape. Default: 1
+            and the shape of weight should be ``(groups, out_channel // groups,
+            in_channels // groups, kernel_size)``. Default: 1
         bias: whether to add a bias onto the result of convolution. Default: True
         conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
         compute_mode: When set to "default", no special requirements will be
@@ -290,10 +290,10 @@ class Conv2d(_ConvNd):
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 2D convolution operation. Default: 1
         groups: number of groups into which the input and output channels are divided,
-            so as to perform a "grouped convolution". When ``groups`` is not 1,
+            so as to perform a ``grouped convolution``. When ``groups`` is not 1,
             ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
-            and there would be an extra dimension at the beginning of the weight's
-            shape. Default: 1
+            and the shape of weight should be ``(groups, out_channel // groups,
+            in_channels // groups, height, width)``. Default: 1
         bias: whether to add a bias onto the result of convolution. Default: True
         conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
         compute_mode: When set to "default", no special requirements will be
@@ -436,10 +436,10 @@ class Conv3d(_ConvNd):
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 3D convolution operation. Default: 1
         groups: number of groups into which the input and output channels are divided,
-            so as to perform a "grouped convolution". When ``groups`` is not 1,
+            so as to perform a ``grouped convolution``. When ``groups`` is not 1,
             ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
-            and there would be an extra dimension at the beginning of the weight's
-            shape. Default: 1
+            and the shape of weight should be ``(groups, out_channel // groups,
+            in_channels // groups, depth, height, width)``. Default: 1
         bias: whether to add a bias onto the result of convolution. Default: True
         conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
 
@@ -560,10 +560,10 @@ class ConvTranspose2d(_ConvNd):
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 2D convolution operation. Default: 1
         groups: number of groups into which the input and output channels are divided,
-            so as to perform a "grouped convolution". When ``groups`` is not 1,
-            ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
-            and there would be an extra dimension at the beginning of the weight's
-            shape. Default: 1
+            so as to perform a ``grouped convolution``. When ``groups`` is not 1,
+            ``in_channels`` and ``out_channels`` must be divisible by groups,
+            and the shape of weight should be ``(groups, in_channels // groups,
+            out_channels // groups, height, width)``. Default: 1
         bias: wether to add a bias onto the result of convolution. Default: True
             conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
         compute_mode: When set to "default", no special requirements will be
@@ -667,6 +667,7 @@ class LocalConv2d(Conv2d):
         stride: stride of the 2D convolution operation. Default: 1
         padding: size of the paddings added to the input on both sides of its
             spatial dimensions. Only zero-padding is supported. Default: 0
+        dilation: dilation of the 2D convolution operation. Default: 1
         groups: number of groups into which the input and output channels are divided,
             so as to perform a "grouped convolution". When ``groups`` is not 1,
             ``in_channels`` and ``out_channels`` must be divisible by ``groups``. Default: 1
@@ -759,10 +760,10 @@ class DeformableConv2d(_ConvNd):
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 2D convolution operation. Default: 1
         groups: number of groups into which the input and output channels are divided,
-            so as to perform a "grouped convolution". When ``groups`` is not 1,
-            ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
-            and there would be an extra dimension at the beginning of the weight's
-            shape. Default: 1
+            so as to perform a ``grouped convolution``. When ``groups`` is not 1,
+            ``in_channels`` and ``out_channels`` must be divisible by groups,
+            and the shape of weight should be ``(groups, out_channel // groups,
+            in_channels // groups, height, width)``. Default: 1
         bias: whether to add a bias onto the result of convolution. Default: True
         conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
         compute_mode: When set to "default", no special requirements will be
@@ -875,6 +876,11 @@ class ConvTranspose3d(_ConvNd):
         padding: size of the paddings added to the input on all sides of its
             spatial dimensions. Only zero-padding is supported. Default: 0
         dilation: dilation of the 3D convolution operation. Default: 1
+        groups: number of groups into which the input and output channels are divided,
+            so as to perform a ``grouped convolution``. When ``groups`` is not 1,
+            ``in_channels`` and ``out_channels`` must be divisible by groups,
+            and the shape of weight should be ``(groups, in_channels // groups,
+            out_channels // groups, depth, height, width)``. Default: 1
         bias: wether to add a bias onto the result of convolution. Default: True
 
     Note:
@@ -890,8 +896,8 @@ class ConvTranspose3d(_ConvNd):
         stride: Union[int, Tuple[int, int, int]] = 1,
         padding: Union[int, Tuple[int, int, int]] = 0,
         dilation: Union[int, Tuple[int, int, int]] = 1,
-        bias: bool = True,
         groups: int = 1,
+        bias: bool = True,
     ):
         kernel_size = _triple_nonzero(kernel_size)
         stride = _triple_nonzero(stride)
diff --git a/imperative/python/megengine/tools/network_visualize.py b/imperative/python/megengine/tools/network_visualize.py
index 0b2e148e..d1a6da78 100755
--- a/imperative/python/megengine/tools/network_visualize.py
+++ b/imperative/python/megengine/tools/network_visualize.py
@@ -53,8 +53,11 @@ def visualize(
     Args:
       model_path: dir path for megengine dumped model.
       log_path: dir path for tensorboard graph log.
-      input: user defined input data for running model and calculating stats, alternative with inp_dict, used when the model has only one input.
-      inp_dict: input dict for running model and calculating stats, alternative with input, used when the model has more than one input. When both input and inp_dict are None, a random input will be used.
+      input: user defined input data for running model and calculating stats,
+        alternative with inp_dict, used when the model has only one input.
+      inp_dict: input dict for running model and calculating stats, alternative with
+        input, used when the model has more than one input.
+        When both input and inp_dict are None, a random input will be used.
       cal_params: whether calculate and record params size.
       cal_flops: whether calculate and record op flops.
       cal_activations: whether calculate and record op activations.