From a891f9b30cf0bc912f0721e2a7ec4d368e942866 Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Tue, 31 May 2022 16:07:16 +0800
Subject: [PATCH] docs(api/lite): add megenginelite.network api doc

GitOrigin-RevId: e0b8eb207426d0907f2dd6835d8cd00a20b8d4fa
---
 lite/pylite/megenginelite/network.py | 379 +++++++++++++++++++++++++++++++----
 1 file changed, 339 insertions(+), 40 deletions(-)

diff --git a/lite/pylite/megenginelite/network.py b/lite/pylite/megenginelite/network.py
index 87a7a219..f2b270d3 100644
--- a/lite/pylite/megenginelite/network.py
+++ b/lite/pylite/megenginelite/network.py
@@ -11,7 +11,82 @@ from .tensor import *
 
 class LiteOptions(Structure):
     """
-    the inference options will be used to config a network
+    the inference options which can optimize the network forwarding
+    performance
+
+    Attributes:
+        weight_preprocess: is the option which optimize the inference performance
+            with processing the weights of the network ahead
+
+        fuse_preprocess: fuse preprocess patten, like astype + pad_channel +
+            dimshuffle
+
+        fake_next_exec: whether only to perform non-computing tasks (like
+            memory allocation and queue initialization) for next exec. This will be
+            reset to false when the graph is executed.
+
+        var_sanity_check_first_run: Disable var sanity check on the first run.
+            Var sanity check is enabled on the first-time execution by default, and can
+            be used to find some potential memory access errors in the operator
+
+        const_shape: used to reduce memory usage and improve performance since some
+            static inference data structures can be omitted and some operators can be
+            compute before forwarding
+
+        force_dynamic_alloc: force dynamic allocate memory for all vars
+
+        force_output_dynamic_alloc: force dynamic allocate memory for output tensor
+            which are used as the input of CallbackCaller Operator
+
+        no_profiling_on_shape_change: do not re-profile to select best implement
+            algo when input shape changes (use previous algo)
+
+        jit_level: Execute supported operators with JIT (support MLIR,
+            NVRTC). Can only be used on Nvidia GPUs and X86 CPU, this value indicates JIT level:
+
+            level 1: for JIT execute with basic elemwise operator
+
+            level 2: for JIT execute elemwise and reduce operators
+
+        record_level: flags to optimize the inference performance with record the
+            kernel tasks in first run, hereafter the inference all need is to execute the
+            recorded tasks.
+
+            level = 0 means the normal inference
+
+            level = 1 means use record inference
+
+            level = 2 means record inference with free the extra memory
+
+
+        graph_opt_level: network optimization level:
+
+            0: disable
+
+            1: level-1: inplace arith transformations during graph construction
+
+            2: level-2: level-1, plus global optimization before graph compiling
+
+            3: also enable JIT
+
+        async_exec_level: level of dispatch on separate threads for different comp_node.
+
+            0: do not perform async dispatch
+
+            1: dispatch async if there are more than one comp node with limited queue
+
+            mask 0b10: async if there are multiple comp nodes with
+
+            mask 0b100: always async
+
+    Examples:
+        .. code-block::
+
+            from megenginelite import *
+            options = LiteOptions()
+            options.weight_preprocess = true
+            options.record_level = 1
+            options.fuse_preprocess = true
     """
 
     _fields_ = [
@@ -39,6 +114,7 @@ class LiteOptions(Structure):
     ]
 
     def __init__(self):
+
         self.weight_preprocess = False
         self.fuse_preprocess = False
         self.fake_next_exec = False
@@ -76,17 +152,34 @@ class LiteOptions(Structure):
 
 class LiteConfig(Structure):
     """
-    Configuration when load and compile the graph
+    Configuration when load and compile a network
+
+    Attributes:
+        has_compression: flag whether the model is compressed, the compress
+            method is stored in the model
 
-    bare_model_cryption_name: is the bare model cryption method name, bare
-    model is not pack model info inside
+        device_id: configure the device id of a network
 
-    use_loader_dynamic_param: when model forward with device loader of npu,
-    use_loader_dynamic_param used to flag whether the loader use device input or
-    output, if use device input or output it will set Non-zero , else set zero
+        device_type: configure the device type of a network
 
-    has_compression: flag whether the model is compressed, the compress
-    method will used to read the model
+        backend: configure the inference backend of a network, now only support
+            megengine
+
+        bare_model_cryption_name: is the bare model encryption method name, bare
+            model is not packed with json information, this encryption method name is
+            useful to decrypt the encrypted bare model
+
+        options: configuration of Options
+
+    Examples:
+        .. code-block::
+
+            from megenginelite import *
+            config = LiteConfig()
+            config.has_compression = false
+            config.device_type = LiteDeviceType.LITE_CPU
+            config.backend = LiteBackend.LITE_DEFAULT
+            config.bare_model_cryption_name = "AES_default".encode("utf-8")
     """
 
     _fields_ = [
@@ -161,23 +254,43 @@ class LiteExtraConfig(Structure):
 
 class LiteIO(Structure):
     """
-    config the network input and output item
+    config the network input and output item, the input and output tensor
+    information will describe there
+
+    Attributes:
+        name: the tensor name in the graph corresponding to the IO
+            is_host: Used to mark where the input tensor comes from and where the output
+            tensor will copy to, if is_host is true, the input is from host and output copy
+            to host, otherwise in device. Sometimes the input is from device and output no need
+            copy to host, default is true.
+
+        io_type: The IO type, it can be SHAPE or VALUE, when SHAPE is set, the input or
+            output tensor value is invaid, only shape will be set, default is VALUE
+
+        config_layout: The layout of the config from user, if other layout is set before
+            forward or get after forward, this layout will by pass. if no other
+            layout is set before forward, this layout will work. if this layout is
+            no set, the model will forward with its origin layout. if in output, it
+            will used to check.
+
+    Note:
+        if other layout is set to input tensor before forwarding, this layout will not work
 
-    name: the tensor name in the graph corresponding to the IO
+        if no layout is set before forwarding, the model will forward with its origin layout
 
-    is_host: Used to mark where the input tensor comes from and the output where copy
-    to, if is_host is true, the input is from host and output copy to host,
-    otherwise device. Sometimes The input is from device and output no need
-    copy to host, default is true.
+        if layout is set in output tensor, it will used to check whether the layout computed from the network is correct
 
-    io_type: The IO type, it can be SHAPE or VALUE, when SHAPE is set, the input or
-    output tensor value is invaid, only shape will be set, default is VALUE
+    Examples:
+        .. code-block::
+
+            from megenginelite import *
+            io = LiteIO(
+                "data2",
+                is_host=True,
+                io_type=LiteIOType.LITE_IO_SHAPE,
+                layout=LiteLayout([2, 4, 4]),
+            )
 
-    config_layout: The layout of the config from user, if other layout is set before
-    forward or get after forward, this layout will by pass. if no other
-    layout is set before forward, this layout will work. if this layout is
-    no set, the model will forward with its origin layout. if in output, it
-    will used to check.
     """
 
     _fields_ = [
@@ -205,10 +318,16 @@ class LiteIO(Structure):
 
     @property
     def name(self):
+        """
+        get the name of IO item
+        """
         return self._name.decode("utf-8")
 
     @name.setter
     def name(self, name):
+        """
+        set the name of IO item
+        """
         if isinstance(name, str):
             self._name = name.encode("utf-8")
         else:
@@ -229,9 +348,6 @@ class LiteIO(Structure):
 
 
 class _LiteNetworkIO(Structure):
-    """
-    the input and output information when load the network
-    """
 
     _fields_ = [
         ("inputs", POINTER(LiteIO)),
@@ -249,7 +365,24 @@ class _LiteNetworkIO(Structure):
 
 class LiteNetworkIO(object):
     """
-    the input and output information for user to construct _LiteNetWorkIO
+    the input and output information when load the network for user
+    the NetworkIO will remain in the network until the network is destroyed.
+
+    Attributes:
+        inputs: The all input tensors information that will configure to the network
+
+        outputs: The all output tensors information that will configure to the network
+
+    Examples:
+        .. code-block::
+
+            from megenginelite import *
+            input_io = LiteIO("data", is_host=False, io_type=LiteIOType.LITE_IO_VALUE)
+            io = LiteNetworkIO()
+            io.add_input(input_io)
+            output_io = LiteIO("out", is_host=True, layout=LiteLayout([1, 1000]))
+            io.add_output(output_io)
+
     """
 
     def __init__(self, inputs=None, outputs=None):
@@ -277,6 +410,9 @@ class LiteNetworkIO(object):
     def add_input(
         self, obj, is_host=True, io_type=LiteIOType.LITE_IO_VALUE, layout=None
     ):
+        """
+        add input information into LiteNetworkIO
+        """
         if isinstance(obj, LiteIO):
             self.inputs.append(obj)
         else:
@@ -286,6 +422,9 @@ class LiteNetworkIO(object):
     def add_output(
         self, obj, is_host=True, io_type=LiteIOType.LITE_IO_VALUE, layout=None
     ):
+        """
+        add output information into LiteNetworkIO
+        """
         if isinstance(obj, LiteIO):
             self.outputs.append(obj)
         else:
@@ -397,6 +536,27 @@ class _NetworkAPI(_LiteCObjBase):
 class LiteNetwork(object):
     """
     the network to load a model and forward
+
+    Examples:
+
+        .. code-block::
+
+            from megenginelite import *
+            config = LiteConfig()
+            config.device_type = LiteDeviceType.LITE_CPU
+            network = LiteNetwork(config)
+            network.load("model_path")
+
+            input_name = network.get_input_name(0)
+            input_tensor = network.get_io_tensor(input_name)
+            output_name = network.get_output_name(0)
+            output_tensor = network.get_io_tensor(output_name)
+
+            input_tensor.set_data_by_copy(input_data)
+
+            network.forward()
+            network.wait()
+
     """
 
     _api = _NetworkAPI()._lib
@@ -428,18 +588,33 @@ class LiteNetwork(object):
         self._api.LITE_destroy_network(self._network)
 
     def load(self, path):
+        """
+        load network from given path
+        """
         c_path = c_char_p(path.encode("utf-8"))
         self._api.LITE_load_model_from_path(self._network, c_path)
 
     def forward(self):
+        """
+        forward the network with filled input data and fill the output data
+        to the output tensor
+        """
         self._api.LITE_forward(self._network)
 
     def wait(self):
+        """
+        wait until forward finish in sync model
+        """
         self._api.LITE_wait(self._network)
 
     def is_cpu_inplace_mode(self):
         """
         whether the network run in cpu inpalce mode
+
+        Returns:
+            if use inpalce mode return True, else return False
+
+
         """
         inplace = c_int()
         self._api.LITE_is_cpu_inplace_mode(self._network, byref(inplace))
@@ -449,13 +624,20 @@ class LiteNetwork(object):
         """
         set cpu forward in inplace mode with which cpu forward only create one
         thread
-        Note: this must be set before the network loaded
+
+        Note:
+            this must be set before the network loaded
+
         """
         self._api.LITE_set_cpu_inplace_mode(self._network)
 
     def use_tensorrt(self):
         """
-        Note: this must be set before the network loaded
+        use TensorRT
+
+        Note:
+            this must be set before the network loaded
+
         """
         self._api.LITE_use_tensorrt(self._network)
 
@@ -463,6 +645,9 @@ class LiteNetwork(object):
     def device_id(self):
         """
         get the device id
+
+        Returns:
+            the device id of current network used
         """
         device_id = c_int()
         self._api.LITE_get_device_id(self._network, byref(device_id))
@@ -472,7 +657,10 @@ class LiteNetwork(object):
     def device_id(self, device_id):
         """
         set the device id
-        Note: this must be set before the network loaded
+
+        Note:
+            this must be set before the network loaded
+
         """
         self._api.LITE_set_device_id(self._network, device_id)
 
@@ -480,6 +668,9 @@ class LiteNetwork(object):
     def stream_id(self):
         """
         get the stream id
+
+        Returns:
+            the value of stream id set for detwork
         """
         stream_id = c_int()
         self._api.LITE_get_stream_id(self._network, byref(stream_id))
@@ -489,7 +680,9 @@ class LiteNetwork(object):
     def stream_id(self, stream_id):
         """
         set the stream id
-        Note: this must be set before the network loaded
+
+        Note:
+            this must be set before the network loaded
         """
         self._api.LITE_set_stream_id(self._network, stream_id)
 
@@ -497,6 +690,9 @@ class LiteNetwork(object):
     def threads_number(self):
         """
         get the thread number of the netwrok
+
+        Returns:
+            the number of thread set in the network
         """
         nr_thread = c_size_t()
         self._api.LITE_get_cpu_threads_number(self._network, byref(nr_thread))
@@ -506,13 +702,22 @@ class LiteNetwork(object):
     def threads_number(self, nr_threads):
         """
         set the network forward in multithread mode, and the thread number
-        Note: this must be set before the network loaded
+
+        Note:
+            this must be set before the network loaded
         """
         self._api.LITE_set_cpu_threads_number(self._network, nr_threads)
 
     def get_io_tensor(self, name, phase=LiteTensorPhase.LITE_IO):
         """
         get input or output tensor by its name
+
+        Args:
+            name: the name of io tensor
+            phase: the type of LiteTensor, this is useful to separate input or output tensor with the same name
+
+        Returns:
+            the tensor with given name and type
         """
         if type(name) == str:
             c_name = c_char_p(name.encode("utf-8"))
@@ -528,6 +733,12 @@ class LiteNetwork(object):
     def get_input_name(self, index):
         """
         get the input name by the index in the network
+
+        Args:
+            index: the index of the input name
+
+        Returns:
+            the name of input tesor with given index
         """
         c_name = c_char_p()
         self._api.LITE_get_input_name(self._network, index, byref(c_name))
@@ -536,6 +747,12 @@ class LiteNetwork(object):
     def get_output_name(self, index):
         """
         get the output name by the index in the network
+
+        Args:
+            index: the index of the output name
+
+        Returns:
+            the name of output tesor with given index
         """
         c_name = c_char_p()
         self._api.LITE_get_output_name(self._network, index, byref(c_name))
@@ -544,6 +761,9 @@ class LiteNetwork(object):
     def get_all_input_name(self):
         """
         get all the input tensor name in the network
+
+        Returns:
+            the names of all input tesor in the network
         """
         nr_input = c_size_t()
         self._api.LITE_get_all_input_name(self._network, byref(nr_input), None)
@@ -557,6 +777,9 @@ class LiteNetwork(object):
     def get_all_output_name(self):
         """
         get all the output tensor name in the network
+
+        Returns:
+            the names of all output tesor in the network
         """
         nr_output = c_size_t()
         self._api.LITE_get_all_output_name(self._network, byref(nr_output), None)
@@ -576,6 +799,9 @@ class LiteNetwork(object):
     def share_weights_with(self, src_network):
         """
         share weights with the loaded network
+
+        Args:
+            src_network: the network to share weights
         """
         assert isinstance(src_network, LiteNetwork)
         self._api.LITE_shared_weight_with_network(self._network, src_network._network)
@@ -583,11 +809,21 @@ class LiteNetwork(object):
     def share_runtime_memroy(self, src_network):
         """
         share runtime memory with the srouce network
+
+        Args:
+            src_network: the network to share runtime memory
         """
         assert isinstance(src_network, LiteNetwork)
         self._api.LITE_share_runtime_memroy(self._network, src_network._network)
 
     def async_with_callback(self, async_callback):
+        """
+        set the network forwarding in async mode and set the AsyncCallback callback
+        function
+
+        Args:
+            async_callback: the callback to set for network
+        """
         callback = wrap_async_callback(async_callback)
         self._api.LITE_set_async_callback(self._network, callback)
 
@@ -596,6 +832,9 @@ class LiteNetwork(object):
         when the network start forward, the callback will be called,
         the start_callback with param mapping from LiteIO to the corresponding
         LiteTensor
+
+        Args:
+            start_callback: the callback to set for network
         """
         callback = start_finish_callback(start_callback)
         self._api.LITE_set_start_callback(self._network, callback)
@@ -605,28 +844,49 @@ class LiteNetwork(object):
         when the network finish forward, the callback will be called,
         the finish_callback with param mapping from LiteIO to the corresponding
         LiteTensor
+
+        Args:
+            finish_callback: the callback to set for network
         """
         callback = start_finish_callback(finish_callback)
         self._api.LITE_set_finish_callback(self._network, callback)
 
     def enable_profile_performance(self, profile_file):
+        """
+        enable get the network performance profiled information and save into given file
+
+        Args:
+            profile_file: the file to save profile information
+        """
         c_file = profile_file.encode("utf-8")
         self._api.LITE_enable_profile_performance(self._network, c_file)
 
     def set_network_algo_workspace_limit(self, size_limit):
+        """
+        set the opr workspace limitation in the target network, some opr
+        maybe use large of workspace to get good performance, set workspace limitation
+        can save memory but may influence the performance
+
+        Args:
+            size_limit: the byte size of workspace limitation
+        """
         self._api.LITE_set_network_algo_workspace_limit(self._network, size_limit)
 
     def set_network_algo_policy(
         self, policy, shared_batch_size=0, binary_equal_between_batch=False
     ):
         """
-        shared_batch_size: the batch size used by fastrun,
-                    Non-zero value means that fastrun use this batch size
-                    regardless of the batch size of the model. Zero means
-                    fastrun use batch size of the model
-        binary_equal_between_batch: if the content of each input batch is
-                    binary equal,whether the content of each output batch is
-                    promised to be equal
+        set the network algorithm search policy for fast-run
+
+        Args:
+            shared_batch_size: the batch size used by fastrun,
+                Non-zero value means that fastrun use this batch size
+                regardless of the batch size of the model. Zero means
+                fastrun use batch size of the model
+
+            binary_equal_between_batch: if the content of each input batch is
+                binary equal,whether the content of each output batch is
+                promised to be equal
 
         """
         self._api.LITE_set_network_algo_policy(self._network, policy)
@@ -635,29 +895,68 @@ class LiteNetwork(object):
         )
 
     def io_txt_dump(self, txt_file):
+        """
+        dump all input/output tensor of all operators to the output file, in txt
+        format, user can use this function to debug compute error
+
+        Args:
+            txt_file: the txt file
+        """
         c_file = txt_file.encode("utf-8")
         self._api.LITE_enable_io_txt_dump(self._network, c_file)
 
     def io_bin_dump(self, bin_dir):
+        """
+        dump all input/output tensor of all operators to the output file, in
+        binary format, user can use this function to debug compute error
+
+        Args:
+            bin_dir: the binary file directory
+        """
         c_dir = bin_dir.encode("utf-8")
         self._api.LITE_enable_io_bin_dump(self._network, c_dir)
 
     def get_static_memory_alloc_info(self, log_dir="logs/test"):
+        """
+        get static peak memory info showed by Graph visualization
+
+        Args:
+            log_dir: the directory to save information log
+        """
         c_log_dir = log_dir.encode("utf-8")
         self._api.LITE_get_static_memory_alloc_info(self._network, c_log_dir)
 
     def enable_global_layout_transform(self):
+        """
+        set global layout transform optimization for network, global
+        layout optimization can auto determine the layout of every operator in
+        the network by profile, thus it can improve the performance of the
+        network forwarding
+        """
         self._api.LITE_enable_global_layout_transform(self._network)
 
     def dump_layout_transform_model(self, model_file):
+        """
+        dump network after global layout transform optimization to the
+        specific path
+
+        Args:
+            model_file: the file path to dump model
+        """
         c_file = model_file.encode("utf-8")
         self._api.LITE_dump_layout_transform_model(self._network, c_file)
 
 
 def get_model_io_info(model_path, config=None):
     """
-    get the model IO information before create the NetWork, this IO
-    information can be used to configuration the NetWork.
+    get the model io information before model loaded by model path.
+
+    Args:
+        model_path: the model path to get the model IO information
+        config the model configuration
+
+    Returns:
+        the input and output information in the network configuration
     """
     api = _NetworkAPI()._lib
     c_path = c_char_p(model_path.encode("utf-8"))