|
|
@@ -11,7 +11,82 @@ from .tensor import * |
|
|
|
|
|
|
|
class LiteOptions(Structure): |
|
|
|
""" |
|
|
|
the inference options will be used to config a network |
|
|
|
the inference options which can optimize the network forwarding |
|
|
|
performance |
|
|
|
|
|
|
|
Attributes: |
|
|
|
weight_preprocess: is the option which optimize the inference performance |
|
|
|
with processing the weights of the network ahead |
|
|
|
|
|
|
|
fuse_preprocess: fuse preprocess patten, like astype + pad_channel + |
|
|
|
dimshuffle |
|
|
|
|
|
|
|
fake_next_exec: whether only to perform non-computing tasks (like |
|
|
|
memory allocation and queue initialization) for next exec. This will be |
|
|
|
reset to false when the graph is executed. |
|
|
|
|
|
|
|
var_sanity_check_first_run: Disable var sanity check on the first run. |
|
|
|
Var sanity check is enabled on the first-time execution by default, and can |
|
|
|
be used to find some potential memory access errors in the operator |
|
|
|
|
|
|
|
const_shape: used to reduce memory usage and improve performance since some |
|
|
|
static inference data structures can be omitted and some operators can be |
|
|
|
compute before forwarding |
|
|
|
|
|
|
|
force_dynamic_alloc: force dynamic allocate memory for all vars |
|
|
|
|
|
|
|
force_output_dynamic_alloc: force dynamic allocate memory for output tensor |
|
|
|
which are used as the input of CallbackCaller Operator |
|
|
|
|
|
|
|
no_profiling_on_shape_change: do not re-profile to select best implement |
|
|
|
algo when input shape changes (use previous algo) |
|
|
|
|
|
|
|
jit_level: Execute supported operators with JIT (support MLIR, |
|
|
|
NVRTC). Can only be used on Nvidia GPUs and X86 CPU, this value indicates JIT level: |
|
|
|
|
|
|
|
level 1: for JIT execute with basic elemwise operator |
|
|
|
|
|
|
|
level 2: for JIT execute elemwise and reduce operators |
|
|
|
|
|
|
|
record_level: flags to optimize the inference performance with record the |
|
|
|
kernel tasks in first run, hereafter the inference all need is to execute the |
|
|
|
recorded tasks. |
|
|
|
|
|
|
|
level = 0 means the normal inference |
|
|
|
|
|
|
|
level = 1 means use record inference |
|
|
|
|
|
|
|
level = 2 means record inference with free the extra memory |
|
|
|
|
|
|
|
|
|
|
|
graph_opt_level: network optimization level: |
|
|
|
|
|
|
|
0: disable |
|
|
|
|
|
|
|
1: level-1: inplace arith transformations during graph construction |
|
|
|
|
|
|
|
2: level-2: level-1, plus global optimization before graph compiling |
|
|
|
|
|
|
|
3: also enable JIT |
|
|
|
|
|
|
|
async_exec_level: level of dispatch on separate threads for different comp_node. |
|
|
|
|
|
|
|
0: do not perform async dispatch |
|
|
|
|
|
|
|
1: dispatch async if there are more than one comp node with limited queue |
|
|
|
|
|
|
|
mask 0b10: async if there are multiple comp nodes with |
|
|
|
|
|
|
|
mask 0b100: always async |
|
|
|
|
|
|
|
Examples: |
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
from megenginelite import * |
|
|
|
options = LiteOptions() |
|
|
|
options.weight_preprocess = true |
|
|
|
options.record_level = 1 |
|
|
|
options.fuse_preprocess = true |
|
|
|
""" |
|
|
|
|
|
|
|
_fields_ = [ |
|
|
@@ -39,6 +114,7 @@ class LiteOptions(Structure): |
|
|
|
] |
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
|
|
self.weight_preprocess = False |
|
|
|
self.fuse_preprocess = False |
|
|
|
self.fake_next_exec = False |
|
|
@@ -76,17 +152,34 @@ class LiteOptions(Structure): |
|
|
|
|
|
|
|
class LiteConfig(Structure): |
|
|
|
""" |
|
|
|
Configuration when load and compile the graph |
|
|
|
Configuration when load and compile a network |
|
|
|
|
|
|
|
Attributes: |
|
|
|
has_compression: flag whether the model is compressed, the compress |
|
|
|
method is stored in the model |
|
|
|
|
|
|
|
bare_model_cryption_name: is the bare model cryption method name, bare |
|
|
|
model is not pack model info inside |
|
|
|
device_id: configure the device id of a network |
|
|
|
|
|
|
|
use_loader_dynamic_param: when model forward with device loader of npu, |
|
|
|
use_loader_dynamic_param used to flag whether the loader use device input or |
|
|
|
output, if use device input or output it will set Non-zero , else set zero |
|
|
|
device_type: configure the device type of a network |
|
|
|
|
|
|
|
has_compression: flag whether the model is compressed, the compress |
|
|
|
method will used to read the model |
|
|
|
backend: configure the inference backend of a network, now only support |
|
|
|
megengine |
|
|
|
|
|
|
|
bare_model_cryption_name: is the bare model encryption method name, bare |
|
|
|
model is not packed with json information, this encryption method name is |
|
|
|
useful to decrypt the encrypted bare model |
|
|
|
|
|
|
|
options: configuration of Options |
|
|
|
|
|
|
|
Examples: |
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
from megenginelite import * |
|
|
|
config = LiteConfig() |
|
|
|
config.has_compression = false |
|
|
|
config.device_type = LiteDeviceType.LITE_CPU |
|
|
|
config.backend = LiteBackend.LITE_DEFAULT |
|
|
|
config.bare_model_cryption_name = "AES_default".encode("utf-8") |
|
|
|
""" |
|
|
|
|
|
|
|
_fields_ = [ |
|
|
@@ -161,23 +254,43 @@ class LiteExtraConfig(Structure): |
|
|
|
|
|
|
|
class LiteIO(Structure): |
|
|
|
""" |
|
|
|
config the network input and output item |
|
|
|
config the network input and output item, the input and output tensor |
|
|
|
information will describe there |
|
|
|
|
|
|
|
Attributes: |
|
|
|
name: the tensor name in the graph corresponding to the IO |
|
|
|
is_host: Used to mark where the input tensor comes from and where the output |
|
|
|
tensor will copy to, if is_host is true, the input is from host and output copy |
|
|
|
to host, otherwise in device. Sometimes the input is from device and output no need |
|
|
|
copy to host, default is true. |
|
|
|
|
|
|
|
io_type: The IO type, it can be SHAPE or VALUE, when SHAPE is set, the input or |
|
|
|
output tensor value is invaid, only shape will be set, default is VALUE |
|
|
|
|
|
|
|
config_layout: The layout of the config from user, if other layout is set before |
|
|
|
forward or get after forward, this layout will by pass. if no other |
|
|
|
layout is set before forward, this layout will work. if this layout is |
|
|
|
no set, the model will forward with its origin layout. if in output, it |
|
|
|
will used to check. |
|
|
|
|
|
|
|
Note: |
|
|
|
if other layout is set to input tensor before forwarding, this layout will not work |
|
|
|
|
|
|
|
name: the tensor name in the graph corresponding to the IO |
|
|
|
if no layout is set before forwarding, the model will forward with its origin layout |
|
|
|
|
|
|
|
is_host: Used to mark where the input tensor comes from and the output where copy |
|
|
|
to, if is_host is true, the input is from host and output copy to host, |
|
|
|
otherwise device. Sometimes The input is from device and output no need |
|
|
|
copy to host, default is true. |
|
|
|
if layout is set in output tensor, it will used to check whether the layout computed from the network is correct |
|
|
|
|
|
|
|
io_type: The IO type, it can be SHAPE or VALUE, when SHAPE is set, the input or |
|
|
|
output tensor value is invaid, only shape will be set, default is VALUE |
|
|
|
Examples: |
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
from megenginelite import * |
|
|
|
io = LiteIO( |
|
|
|
"data2", |
|
|
|
is_host=True, |
|
|
|
io_type=LiteIOType.LITE_IO_SHAPE, |
|
|
|
layout=LiteLayout([2, 4, 4]), |
|
|
|
) |
|
|
|
|
|
|
|
config_layout: The layout of the config from user, if other layout is set before |
|
|
|
forward or get after forward, this layout will by pass. if no other |
|
|
|
layout is set before forward, this layout will work. if this layout is |
|
|
|
no set, the model will forward with its origin layout. if in output, it |
|
|
|
will used to check. |
|
|
|
""" |
|
|
|
|
|
|
|
_fields_ = [ |
|
|
@@ -205,10 +318,16 @@ class LiteIO(Structure): |
|
|
|
|
|
|
|
@property |
|
|
|
def name(self): |
|
|
|
""" |
|
|
|
get the name of IO item |
|
|
|
""" |
|
|
|
return self._name.decode("utf-8") |
|
|
|
|
|
|
|
@name.setter |
|
|
|
def name(self, name): |
|
|
|
""" |
|
|
|
set the name of IO item |
|
|
|
""" |
|
|
|
if isinstance(name, str): |
|
|
|
self._name = name.encode("utf-8") |
|
|
|
else: |
|
|
@@ -229,9 +348,6 @@ class LiteIO(Structure): |
|
|
|
|
|
|
|
|
|
|
|
class _LiteNetworkIO(Structure): |
|
|
|
""" |
|
|
|
the input and output information when load the network |
|
|
|
""" |
|
|
|
|
|
|
|
_fields_ = [ |
|
|
|
("inputs", POINTER(LiteIO)), |
|
|
@@ -249,7 +365,24 @@ class _LiteNetworkIO(Structure): |
|
|
|
|
|
|
|
class LiteNetworkIO(object): |
|
|
|
""" |
|
|
|
the input and output information for user to construct _LiteNetWorkIO |
|
|
|
the input and output information when load the network for user |
|
|
|
the NetworkIO will remain in the network until the network is destroyed. |
|
|
|
|
|
|
|
Attributes: |
|
|
|
inputs: The all input tensors information that will configure to the network |
|
|
|
|
|
|
|
outputs: The all output tensors information that will configure to the network |
|
|
|
|
|
|
|
Examples: |
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
from megenginelite import * |
|
|
|
input_io = LiteIO("data", is_host=False, io_type=LiteIOType.LITE_IO_VALUE) |
|
|
|
io = LiteNetworkIO() |
|
|
|
io.add_input(input_io) |
|
|
|
output_io = LiteIO("out", is_host=True, layout=LiteLayout([1, 1000])) |
|
|
|
io.add_output(output_io) |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
def __init__(self, inputs=None, outputs=None): |
|
|
@@ -277,6 +410,9 @@ class LiteNetworkIO(object): |
|
|
|
def add_input( |
|
|
|
self, obj, is_host=True, io_type=LiteIOType.LITE_IO_VALUE, layout=None |
|
|
|
): |
|
|
|
""" |
|
|
|
add input information into LiteNetworkIO |
|
|
|
""" |
|
|
|
if isinstance(obj, LiteIO): |
|
|
|
self.inputs.append(obj) |
|
|
|
else: |
|
|
@@ -286,6 +422,9 @@ class LiteNetworkIO(object): |
|
|
|
def add_output( |
|
|
|
self, obj, is_host=True, io_type=LiteIOType.LITE_IO_VALUE, layout=None |
|
|
|
): |
|
|
|
""" |
|
|
|
add output information into LiteNetworkIO |
|
|
|
""" |
|
|
|
if isinstance(obj, LiteIO): |
|
|
|
self.outputs.append(obj) |
|
|
|
else: |
|
|
@@ -397,6 +536,27 @@ class _NetworkAPI(_LiteCObjBase): |
|
|
|
class LiteNetwork(object): |
|
|
|
""" |
|
|
|
the network to load a model and forward |
|
|
|
|
|
|
|
Examples: |
|
|
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
from megenginelite import * |
|
|
|
config = LiteConfig() |
|
|
|
config.device_type = LiteDeviceType.LITE_CPU |
|
|
|
network = LiteNetwork(config) |
|
|
|
network.load("model_path") |
|
|
|
|
|
|
|
input_name = network.get_input_name(0) |
|
|
|
input_tensor = network.get_io_tensor(input_name) |
|
|
|
output_name = network.get_output_name(0) |
|
|
|
output_tensor = network.get_io_tensor(output_name) |
|
|
|
|
|
|
|
input_tensor.set_data_by_copy(input_data) |
|
|
|
|
|
|
|
network.forward() |
|
|
|
network.wait() |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
_api = _NetworkAPI()._lib |
|
|
@@ -428,18 +588,33 @@ class LiteNetwork(object): |
|
|
|
self._api.LITE_destroy_network(self._network) |
|
|
|
|
|
|
|
def load(self, path): |
|
|
|
""" |
|
|
|
load network from given path |
|
|
|
""" |
|
|
|
c_path = c_char_p(path.encode("utf-8")) |
|
|
|
self._api.LITE_load_model_from_path(self._network, c_path) |
|
|
|
|
|
|
|
def forward(self): |
|
|
|
""" |
|
|
|
forward the network with filled input data and fill the output data |
|
|
|
to the output tensor |
|
|
|
""" |
|
|
|
self._api.LITE_forward(self._network) |
|
|
|
|
|
|
|
def wait(self): |
|
|
|
""" |
|
|
|
wait until forward finish in sync model |
|
|
|
""" |
|
|
|
self._api.LITE_wait(self._network) |
|
|
|
|
|
|
|
def is_cpu_inplace_mode(self): |
|
|
|
""" |
|
|
|
whether the network run in cpu inpalce mode |
|
|
|
|
|
|
|
Returns: |
|
|
|
if use inpalce mode return True, else return False |
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
|
inplace = c_int() |
|
|
|
self._api.LITE_is_cpu_inplace_mode(self._network, byref(inplace)) |
|
|
@@ -449,13 +624,20 @@ class LiteNetwork(object): |
|
|
|
""" |
|
|
|
set cpu forward in inplace mode with which cpu forward only create one |
|
|
|
thread |
|
|
|
Note: this must be set before the network loaded |
|
|
|
|
|
|
|
Note: |
|
|
|
this must be set before the network loaded |
|
|
|
|
|
|
|
""" |
|
|
|
self._api.LITE_set_cpu_inplace_mode(self._network) |
|
|
|
|
|
|
|
def use_tensorrt(self): |
|
|
|
""" |
|
|
|
Note: this must be set before the network loaded |
|
|
|
use TensorRT |
|
|
|
|
|
|
|
Note: |
|
|
|
this must be set before the network loaded |
|
|
|
|
|
|
|
""" |
|
|
|
self._api.LITE_use_tensorrt(self._network) |
|
|
|
|
|
|
@@ -463,6 +645,9 @@ class LiteNetwork(object): |
|
|
|
def device_id(self): |
|
|
|
""" |
|
|
|
get the device id |
|
|
|
|
|
|
|
Returns: |
|
|
|
the device id of current network used |
|
|
|
""" |
|
|
|
device_id = c_int() |
|
|
|
self._api.LITE_get_device_id(self._network, byref(device_id)) |
|
|
@@ -472,7 +657,10 @@ class LiteNetwork(object): |
|
|
|
def device_id(self, device_id): |
|
|
|
""" |
|
|
|
set the device id |
|
|
|
Note: this must be set before the network loaded |
|
|
|
|
|
|
|
Note: |
|
|
|
this must be set before the network loaded |
|
|
|
|
|
|
|
""" |
|
|
|
self._api.LITE_set_device_id(self._network, device_id) |
|
|
|
|
|
|
@@ -480,6 +668,9 @@ class LiteNetwork(object): |
|
|
|
def stream_id(self): |
|
|
|
""" |
|
|
|
get the stream id |
|
|
|
|
|
|
|
Returns: |
|
|
|
the value of stream id set for detwork |
|
|
|
""" |
|
|
|
stream_id = c_int() |
|
|
|
self._api.LITE_get_stream_id(self._network, byref(stream_id)) |
|
|
@@ -489,7 +680,9 @@ class LiteNetwork(object): |
|
|
|
def stream_id(self, stream_id): |
|
|
|
""" |
|
|
|
set the stream id |
|
|
|
Note: this must be set before the network loaded |
|
|
|
|
|
|
|
Note: |
|
|
|
this must be set before the network loaded |
|
|
|
""" |
|
|
|
self._api.LITE_set_stream_id(self._network, stream_id) |
|
|
|
|
|
|
@@ -497,6 +690,9 @@ class LiteNetwork(object): |
|
|
|
def threads_number(self): |
|
|
|
""" |
|
|
|
get the thread number of the netwrok |
|
|
|
|
|
|
|
Returns: |
|
|
|
the number of thread set in the network |
|
|
|
""" |
|
|
|
nr_thread = c_size_t() |
|
|
|
self._api.LITE_get_cpu_threads_number(self._network, byref(nr_thread)) |
|
|
@@ -506,13 +702,22 @@ class LiteNetwork(object): |
|
|
|
def threads_number(self, nr_threads): |
|
|
|
""" |
|
|
|
set the network forward in multithread mode, and the thread number |
|
|
|
Note: this must be set before the network loaded |
|
|
|
|
|
|
|
Note: |
|
|
|
this must be set before the network loaded |
|
|
|
""" |
|
|
|
self._api.LITE_set_cpu_threads_number(self._network, nr_threads) |
|
|
|
|
|
|
|
def get_io_tensor(self, name, phase=LiteTensorPhase.LITE_IO): |
|
|
|
""" |
|
|
|
get input or output tensor by its name |
|
|
|
|
|
|
|
Args: |
|
|
|
name: the name of io tensor |
|
|
|
phase: the type of LiteTensor, this is useful to separate input or output tensor with the same name |
|
|
|
|
|
|
|
Returns: |
|
|
|
the tensor with given name and type |
|
|
|
""" |
|
|
|
if type(name) == str: |
|
|
|
c_name = c_char_p(name.encode("utf-8")) |
|
|
@@ -528,6 +733,12 @@ class LiteNetwork(object): |
|
|
|
def get_input_name(self, index): |
|
|
|
""" |
|
|
|
get the input name by the index in the network |
|
|
|
|
|
|
|
Args: |
|
|
|
index: the index of the input name |
|
|
|
|
|
|
|
Returns: |
|
|
|
the name of input tesor with given index |
|
|
|
""" |
|
|
|
c_name = c_char_p() |
|
|
|
self._api.LITE_get_input_name(self._network, index, byref(c_name)) |
|
|
@@ -536,6 +747,12 @@ class LiteNetwork(object): |
|
|
|
def get_output_name(self, index): |
|
|
|
""" |
|
|
|
get the output name by the index in the network |
|
|
|
|
|
|
|
Args: |
|
|
|
index: the index of the output name |
|
|
|
|
|
|
|
Returns: |
|
|
|
the name of output tesor with given index |
|
|
|
""" |
|
|
|
c_name = c_char_p() |
|
|
|
self._api.LITE_get_output_name(self._network, index, byref(c_name)) |
|
|
@@ -544,6 +761,9 @@ class LiteNetwork(object): |
|
|
|
def get_all_input_name(self): |
|
|
|
""" |
|
|
|
get all the input tensor name in the network |
|
|
|
|
|
|
|
Returns: |
|
|
|
the names of all input tesor in the network |
|
|
|
""" |
|
|
|
nr_input = c_size_t() |
|
|
|
self._api.LITE_get_all_input_name(self._network, byref(nr_input), None) |
|
|
@@ -557,6 +777,9 @@ class LiteNetwork(object): |
|
|
|
def get_all_output_name(self): |
|
|
|
""" |
|
|
|
get all the output tensor name in the network |
|
|
|
|
|
|
|
Returns: |
|
|
|
the names of all output tesor in the network |
|
|
|
""" |
|
|
|
nr_output = c_size_t() |
|
|
|
self._api.LITE_get_all_output_name(self._network, byref(nr_output), None) |
|
|
@@ -576,6 +799,9 @@ class LiteNetwork(object): |
|
|
|
def share_weights_with(self, src_network): |
|
|
|
""" |
|
|
|
share weights with the loaded network |
|
|
|
|
|
|
|
Args: |
|
|
|
src_network: the network to share weights |
|
|
|
""" |
|
|
|
assert isinstance(src_network, LiteNetwork) |
|
|
|
self._api.LITE_shared_weight_with_network(self._network, src_network._network) |
|
|
@@ -583,11 +809,21 @@ class LiteNetwork(object): |
|
|
|
def share_runtime_memroy(self, src_network): |
|
|
|
""" |
|
|
|
share runtime memory with the srouce network |
|
|
|
|
|
|
|
Args: |
|
|
|
src_network: the network to share runtime memory |
|
|
|
""" |
|
|
|
assert isinstance(src_network, LiteNetwork) |
|
|
|
self._api.LITE_share_runtime_memroy(self._network, src_network._network) |
|
|
|
|
|
|
|
def async_with_callback(self, async_callback): |
|
|
|
""" |
|
|
|
set the network forwarding in async mode and set the AsyncCallback callback |
|
|
|
function |
|
|
|
|
|
|
|
Args: |
|
|
|
async_callback: the callback to set for network |
|
|
|
""" |
|
|
|
callback = wrap_async_callback(async_callback) |
|
|
|
self._api.LITE_set_async_callback(self._network, callback) |
|
|
|
|
|
|
@@ -596,6 +832,9 @@ class LiteNetwork(object): |
|
|
|
when the network start forward, the callback will be called, |
|
|
|
the start_callback with param mapping from LiteIO to the corresponding |
|
|
|
LiteTensor |
|
|
|
|
|
|
|
Args: |
|
|
|
start_callback: the callback to set for network |
|
|
|
""" |
|
|
|
callback = start_finish_callback(start_callback) |
|
|
|
self._api.LITE_set_start_callback(self._network, callback) |
|
|
@@ -605,28 +844,49 @@ class LiteNetwork(object): |
|
|
|
when the network finish forward, the callback will be called, |
|
|
|
the finish_callback with param mapping from LiteIO to the corresponding |
|
|
|
LiteTensor |
|
|
|
|
|
|
|
Args: |
|
|
|
finish_callback: the callback to set for network |
|
|
|
""" |
|
|
|
callback = start_finish_callback(finish_callback) |
|
|
|
self._api.LITE_set_finish_callback(self._network, callback) |
|
|
|
|
|
|
|
def enable_profile_performance(self, profile_file): |
|
|
|
""" |
|
|
|
enable get the network performance profiled information and save into given file |
|
|
|
|
|
|
|
Args: |
|
|
|
profile_file: the file to save profile information |
|
|
|
""" |
|
|
|
c_file = profile_file.encode("utf-8") |
|
|
|
self._api.LITE_enable_profile_performance(self._network, c_file) |
|
|
|
|
|
|
|
def set_network_algo_workspace_limit(self, size_limit): |
|
|
|
""" |
|
|
|
set the opr workspace limitation in the target network, some opr |
|
|
|
maybe use large of workspace to get good performance, set workspace limitation |
|
|
|
can save memory but may influence the performance |
|
|
|
|
|
|
|
Args: |
|
|
|
size_limit: the byte size of workspace limitation |
|
|
|
""" |
|
|
|
self._api.LITE_set_network_algo_workspace_limit(self._network, size_limit) |
|
|
|
|
|
|
|
def set_network_algo_policy( |
|
|
|
self, policy, shared_batch_size=0, binary_equal_between_batch=False |
|
|
|
): |
|
|
|
""" |
|
|
|
shared_batch_size: the batch size used by fastrun, |
|
|
|
Non-zero value means that fastrun use this batch size |
|
|
|
regardless of the batch size of the model. Zero means |
|
|
|
fastrun use batch size of the model |
|
|
|
binary_equal_between_batch: if the content of each input batch is |
|
|
|
binary equal,whether the content of each output batch is |
|
|
|
promised to be equal |
|
|
|
set the network algorithm search policy for fast-run |
|
|
|
|
|
|
|
Args: |
|
|
|
shared_batch_size: the batch size used by fastrun, |
|
|
|
Non-zero value means that fastrun use this batch size |
|
|
|
regardless of the batch size of the model. Zero means |
|
|
|
fastrun use batch size of the model |
|
|
|
|
|
|
|
binary_equal_between_batch: if the content of each input batch is |
|
|
|
binary equal,whether the content of each output batch is |
|
|
|
promised to be equal |
|
|
|
|
|
|
|
""" |
|
|
|
self._api.LITE_set_network_algo_policy(self._network, policy) |
|
|
@@ -635,29 +895,68 @@ class LiteNetwork(object): |
|
|
|
) |
|
|
|
|
|
|
|
def io_txt_dump(self, txt_file): |
|
|
|
""" |
|
|
|
dump all input/output tensor of all operators to the output file, in txt |
|
|
|
format, user can use this function to debug compute error |
|
|
|
|
|
|
|
Args: |
|
|
|
txt_file: the txt file |
|
|
|
""" |
|
|
|
c_file = txt_file.encode("utf-8") |
|
|
|
self._api.LITE_enable_io_txt_dump(self._network, c_file) |
|
|
|
|
|
|
|
def io_bin_dump(self, bin_dir): |
|
|
|
""" |
|
|
|
dump all input/output tensor of all operators to the output file, in |
|
|
|
binary format, user can use this function to debug compute error |
|
|
|
|
|
|
|
Args: |
|
|
|
bin_dir: the binary file directory |
|
|
|
""" |
|
|
|
c_dir = bin_dir.encode("utf-8") |
|
|
|
self._api.LITE_enable_io_bin_dump(self._network, c_dir) |
|
|
|
|
|
|
|
def get_static_memory_alloc_info(self, log_dir="logs/test"): |
|
|
|
""" |
|
|
|
get static peak memory info showed by Graph visualization |
|
|
|
|
|
|
|
Args: |
|
|
|
log_dir: the directory to save information log |
|
|
|
""" |
|
|
|
c_log_dir = log_dir.encode("utf-8") |
|
|
|
self._api.LITE_get_static_memory_alloc_info(self._network, c_log_dir) |
|
|
|
|
|
|
|
def enable_global_layout_transform(self): |
|
|
|
""" |
|
|
|
set global layout transform optimization for network, global |
|
|
|
layout optimization can auto determine the layout of every operator in |
|
|
|
the network by profile, thus it can improve the performance of the |
|
|
|
network forwarding |
|
|
|
""" |
|
|
|
self._api.LITE_enable_global_layout_transform(self._network) |
|
|
|
|
|
|
|
def dump_layout_transform_model(self, model_file): |
|
|
|
""" |
|
|
|
dump network after global layout transform optimization to the |
|
|
|
specific path |
|
|
|
|
|
|
|
Args: |
|
|
|
model_file: the file path to dump model |
|
|
|
""" |
|
|
|
c_file = model_file.encode("utf-8") |
|
|
|
self._api.LITE_dump_layout_transform_model(self._network, c_file) |
|
|
|
|
|
|
|
|
|
|
|
def get_model_io_info(model_path, config=None): |
|
|
|
""" |
|
|
|
get the model IO information before create the NetWork, this IO |
|
|
|
information can be used to configuration the NetWork. |
|
|
|
get the model io information before model loaded by model path. |
|
|
|
|
|
|
|
Args: |
|
|
|
model_path: the model path to get the model IO information |
|
|
|
config the model configuration |
|
|
|
|
|
|
|
Returns: |
|
|
|
the input and output information in the network configuration |
|
|
|
""" |
|
|
|
api = _NetworkAPI()._lib |
|
|
|
c_path = c_char_p(model_path.encode("utf-8")) |
|
|
|