OpenI
/
MegEngine

 
			
			   
				 
					
						
						
							
							#ifndef LITE_C_NETWORK_H_
#define LITE_C_NETWORK_H_

#include "tensor_c.h"

#ifdef __cplusplus
extern "C" {
#endif

/*!
 * \brief the inference options which will be translated to megenine
 *
 * \param weight_preprocess is the option wich optimize the inferece performance
 * with preprocess the const weights
 *
 * \param fuse_preprocess fuse preprocess patten, like astype + pad_channel +
 * dimshuffle
 *
 * \param fake_next_exec  whether only to perform non-computing tasks (like
 * memory allocation and queue initialization) for next exec. This would be
 * reset to false when the graph is executed.
 *
 * \param var_sanity_check_first_run Disable var sanity check on the first run.
 * Var sanity check is enabled on the first-time execution by default, and can
 * be used to find some potential memory access errors in the operator
 * implementation.
 *
 * \param const_shape This can be used to reduce memory usage since some
 * static inference data structures can be omitted.
 *
 * \param force_dynamic_alloc force dynamic memory alloc for all vars
 *
 * \param force_output_dynamic_alloc force dynamic memory alloc for output vars
 * which are used as CallbackCaller input when call compile() function
 *
 * \param no_profiling_on_shape_change do not re-profile to select best impl
 * algo when input shape changes (use previous algo)
 *
 * \param jit_level Execute supported operators with JIT (support MLIR,
 * NVRTC). Can only be used on Nvidia GPUs, this value indicates JIT level:
 * 1 for basic elemwise opr;
 * 2 for including reduce operator
 *
 * \param record_level flag optimize the inference performace with record the
 * kernel tasks in first run, hereafter the inference all need to execute the
 * recorded tasks.
 * level = 0 means the normal inference,
 * level = 1 means use record inference,
 * level = 2 means record inference with free the extra memory
 *
 * \param graph_opt_level optimization level:
 * 0: disable
 * 1: level-1: inplace arith transformations during graph
 *    construction
 * 2: level-2: level-1, plus global optimization before graph
 *    compiling
 * 3: also enable JIT
 * <0: corresponding level, with result check for debug
 *
 * \param async_exec_level exec: dispatch on separate threads for different
 * comp_node.
 * 0: do not perform async dispatch
 * 1: dispatch async if there are more than one comp node with limited queue
 * mask 0b10: async if there are multiple comp nodes with
 * mask 0b100: always async
 */
typedef struct {
    int weight_preprocess;
    int fuse_preprocess;
    int fake_next_exec;
    int var_sanity_check_first_run;
    int const_shape;
    int force_dynamic_alloc;
    int force_output_dynamic_alloc;
    int force_output_use_user_specified_memory;
    int no_profiling_on_shape_change;
    int jit_level;
    int comp_node_seq_record_level;
    int graph_opt_level;
    int async_exec_level;

    //! layout transform options
    int enable_nchw44;
    int enable_nchw44_dot;
    int enable_nchw88;
    int enable_nhwcd4;
    int enable_nchw4;
    int enable_nchw32;
    int enable_nchw64;
} LiteOptions;

//! define a default Options
extern LITE_API const LiteOptions default_option;

/*!
 * \brief Configuration when load and compile the graph
 *
 * \param bare_model_cryption_name is the bare model cryption method name, bare
 *model is not pack json info inside
 *
 *\param has_compression flag whether the model is compressed, the compress
 *method will read form the model

 *\param auto_optimize_inference lite will detect the device information add
 * set the options heuristically
 *
 * \param discrete_input_name configure which input is composed of discrete
 * multiple tensors
 */
typedef struct LiteConfig {
    int has_compression;
    int device_id;
    LiteDeviceType device_type;
    LiteBackend backend;
    const char* bare_model_cryption_name;
    LiteOptions options;
    int auto_optimize_inference;
    const char* discrete_input_name;
} LiteConfig;

//! get default config
LITE_API LiteConfig* default_config();

/*!
 * \brief Exetra Configuration for a network
 *
 * \param disable_configure_by_model_info disable the configuration dumped with model,
 * if set true, all configuration in the model will not apply, users should configure
 * the network.
 */
typedef struct LiteExtraConfig {
    int disable_configure_by_model_info;
} LiteExtraConfig;

/*!
 * \brief config the network input and output item
 *
 */
typedef struct {
    //! the tensor name in the graph corresponding to the IO
    const char* name;

    //! Used to mark where the input tensor comes from and the output where copy
    //! to, if is_host is true, the input is from host and output copy to host,
    //! otherwise device. Sometimes The input is from device and output no need
    //! copy to host, default is true.
    int is_host;

    //! The IO type, it can be SHAPE or VALUE, when SHAPE is set, the input or
    //! output tensor value is invaid, only shape will be set, default is VALUE
    LiteIOType io_type;

    //! The layout of the config from user, if other layout is set before
    //! forward or get after forward, this layout will by pass. if no other
    //! layout is set before forward, this layout will work. if this layout is
    //! no set, the model will forward with its origin layout. if in output, it
    //! will used to check.
    LiteLayout config_layout;
} LiteIO;

//! define a default IO
extern LITE_API const LiteIO default_io;

/*!
 * \brief the input and output information when load the network
 * the NetworkIO will remain in the network until the network is destroyed
 */
typedef struct {
    LiteIO* inputs;
    LiteIO* outputs;
    size_t input_size;   //! the number IO in inputs
    size_t output_size;  //! the number IO in outputs
} LiteNetworkIO;

//! get default NetworkIO
LITE_API LiteNetworkIO* default_network_io();

/*!
 * \brief A user-implemented allocator function
 */
//! allocate memory of size in the given device with the given align
typedef void* (*LiteAllocate)(
        LiteDeviceType device_type, int device_id, size_t size, size_t align);
//! free the memory pointed by ptr in the given device
typedef void (*LiteFree)(LiteDeviceType device_type, int device_id, void* ptr);

/*!
 * \brief the thread affinith callback type
 * \param thread_id thread_id is the a number begin from 0 to (nr_threads - 1),
 * thread_id of (nr_threads - 1) is the main worker thread.
 */
typedef int (*LiteThreadAffinityCallback)(int thread_id);

typedef int (*LiteAsyncCallback)();

typedef int (*LiteAsyncCallbackWithData)(void* user_data);

/*!
 * \brief the start/finish callback function
 * \param unordered_map map from the io tensor name to the pair of which is the
 * corresponding IO of user config and the realy input or output tensor.
 */

typedef int (*LiteStartCallback)(
        const LiteIO* inputs, const LiteTensor* input_tensors, size_t size);

typedef int (*LiteStartCallbackWithData)(
        const LiteIO* inputs, const LiteTensor* input_tensors, size_t size,
        void* user_data);

typedef int (*LiteFinishCallback)(
        const LiteIO* outputs, const LiteTensor* output_tensors, size_t size);

typedef int (*LiteFinishCallbackWithData)(
        const LiteIO* outputs, const LiteTensor* output_tensors, size_t size,
        void* user_data);

/*!
 * \brief The network is construct form a model, implement model load, init,
 * forward, and display some model information
 */
typedef void* LiteNetwork;

/**
 * \brief Create a lite Network object with default config and networkIO.
 * \param[out] network The netwrok pointer
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_make_default_network(LiteNetwork* network);

/**
 * \brief Create a lite Network object from the given config and networkIO.
 * \param[in] config The configration to create the network
 * \param[in] network_io The configration io to create the network
 * \param[out] network The network pointer
 */
LITE_API int LITE_make_network(
        LiteNetwork* network, const LiteConfig config, const LiteNetworkIO network_io);

/**
 * \brief Create a lite Network object from the given config and networkIO.
 * \param[in] config The configration to create the network
 * \param[out] network The network pointer
 */
LITE_API int LITE_make_network_config(LiteNetwork* network, const LiteConfig config);

/**
 * \brief load the model to network form memory
 * \param[in] model_mem The model in memory
 * \param[in] size The size of the model memory
 * \param[out] network The network to be load model in
 */
LITE_API int LITE_load_model_from_mem(
        LiteNetwork network, void* model_mem, size_t size);

/**
 * \brief load the model to network form given path
 * \param[in] model_path The model path
 * \param[out] network The network to be load model in
 */
LITE_API int LITE_load_model_from_path(LiteNetwork network, const char* model_path);

/**
 * \brief load a new network which will share weights with src network
 * \param[in] origin_network The origin network pointer
 * \param[out] network The network pointer
 */
LITE_API int LITE_shared_weight_with_network(
        LiteNetwork dst_network, const LiteNetwork src_network);

/**
 * \brief Destroy a lite network object.
 * \param[in] network The network pointer
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_destroy_network(LiteNetwork network);

/**
 * \brief forward the network with filled input data and fill the output data
 * to the output tensor
 * \param[in] network The loaded model
 */
LITE_API int LITE_forward(const LiteNetwork network);

/**
 * \brief waite until forward finish in sync model
 * \param[in] network The loaded model
 */
LITE_API int LITE_wait(const LiteNetwork network);

/**
 * \brief get the network input and ouput tensor, the layout of which is
 * get from model
 * \param[in] network The loaded model
 * \param[in] io_name The input or output name
 * \param[in] phase The tensor phase
 * \param[out] tensor The IO tensor get from the network
 */
LITE_API int LITE_get_io_tensor(
        LiteNetwork network, const char* io_name, LiteTensorPhase phase,
        LiteTensor* tensor);

/**
 * \brief get the n'th tensor in the network input tensors whose input
 * consists of discrete multiple tensors and name is io_name, layout (1, c, h, w)
 * \param[in] network The loaded model
 * \param[in] io_name The input name
 * \param[in] n_idx The index of tensor
 * \param[in] phase The tensor phase
 * \param[out] tensor The IO tensor get from the network
 */
LITE_API int LITE_get_discrete_tensor(
        LiteNetwork network, const char* io_name, size_t n_idx, LiteTensorPhase phase,
        LiteTensor* tensor);

/**
 * \brief get the input tensor name in the order in loaded model
 * \param[in] network The loaded model
 * \param[in] index The index of input tensor
 * \param[out] name The input tensor name
 */
LITE_API int LITE_get_input_name(
        const LiteNetwork network, size_t index, const char** name);

/**
 * \brief get the output tensor name in the order in loaded model
 * \param[in] network The loaded model
 * \param[in] index The index of output tensor
 * \param[out] name The output tensor name
 */
LITE_API int LITE_get_output_name(
        const LiteNetwork network, size_t index, const char** name);

/**
 * \brief get all the input tensor name in the order in loaded model
 * \param[in] network The loaded model
 * \param[in] size The number of the input tensor
 * \param[out] name The input tensor names
 */
LITE_API int LITE_get_all_input_name(
        const LiteNetwork network, size_t* size, const char** name);

/**
 * \brief get all the output tensor name in the order in loaded model
 * \param[in] network The loaded model
 * \param[in] size The number of output tensor
 * \param[out] name The output tensor name
 */
LITE_API int LITE_get_all_output_name(
        const LiteNetwork network, size_t* size, const char** name);

/**
 * \brief get whether the model is running in cpu inplace mode
 * \param[in] network The loaded model
 * \param[out] is_cpu_inplace_mode whether is in cpu inplace mode
 */
LITE_API int LITE_is_cpu_inplace_mode(
        const LiteNetwork network, int* is_cpu_inplace_mode);

/**
 * \brief get the number of thread the network will run with
 * \param[in] network The loaded model
 * \param[out] nr_threads the thread number when the network running
 */
LITE_API int LITE_get_cpu_threads_number(const LiteNetwork network, size_t* nr_threads);

/**
 * \brief get the device id the network will run with
 * \param[in] network The loaded model
 * \param[out] device_id the device id of the network will run
 */
LITE_API int LITE_get_device_id(const LiteNetwork network, int* device_id);

/**
 * \brief get the stream id the network will run with
 * \param[in] network The loaded model
 * \param[out] stream_id the stream id of the network will run
 */
LITE_API int LITE_get_stream_id(const LiteNetwork network, int* stream_id);

/**
 * \brief get the device type the network will run with
 * \param[in] network The loaded model
 * \param[out] device_type the device type of the network will run
 */
LITE_API int LITE_get_device_type(
        const LiteNetwork network, LiteDeviceType* device_type);

/**
 * \brief get the device type the network will run with
 * \param[in] network The loaded model
 * \param[out] info  : the json format memory
 * \param[out] info_size: the json format memory size
 */
LITE_API int LITE_get_model_extra_info(
        const LiteNetwork network, const char** info, int* info_size);

/**
 * \brief Set cpu default mode when device is CPU, in some low computation
 * device or single core device, this mode will get good performace
 * \param[in] network The loaded model
 */
LITE_API int LITE_set_cpu_inplace_mode(LiteNetwork network);

/**
 * \brief When device is CPU, this interface will set the to be loaded model
 * run in multi thread mode with the given thread number.
 * \param[in] network The loaded model
 * \param[in] nr_threads The threads number
 */
LITE_API int LITE_set_cpu_threads_number(LiteNetwork network, size_t nr_threads);

/**
 * \brief set device id, default device id = 0
 * \param[in] network The loaded model
 * \param[in] device_id The device id to be set
 */
LITE_API int LITE_set_device_id(LiteNetwork network, int device_id);

/**
 * \brief set stream id, default stream id = 0
 * \param[in] network The loaded model
 * \param[in] stream_id The stream id to be set
 */
LITE_API int LITE_set_stream_id(LiteNetwork network, int stream_id);

/**
 * \brief enable tensorrt
 * \param[in] network The loaded model
 */
LITE_API int LITE_use_tensorrt(LiteNetwork network);

/**
 * \brief set opr algorithm selection strategy in the network
 * \param[in] network The loaded model
 * \param[in] select_strategy The operator algorithm selection strategy
 */
LITE_API int LITE_set_network_algo_policy(
        LiteNetwork network, LiteAlgoSelectStrategy strategy);

/**
 * \brief set opr algorithm selection strategy in the network
 * \param[in] network The loaded model
 * \param[in] shared_batch_size: the batch size used by fastrun,
 *                      Non-zero value means that fastrun use this batch size
 *                      regardless of the batch size of the model. Zero means
 *                      fastrun use batch size of the model
 * \param[in] binary_equal_between_batch: if the content of each input batch is
 *                      binary equal,whether the content of each output batch is
 *                      promised to be equal
 */
LITE_API int LITE_set_network_algo_fastrun_config(
        LiteNetwork network, unsigned int shared_batch_size,
        int binary_equal_between_batch);

/**
 * \brief set workspace_limit for oprs with multiple algorithms, set
 * workspace limit can save memory but may influence the performance
 * \param[in] network The loaded model
 * \param[in] workspace_limit The operator algorithm workspace limit
 */
LITE_API int LITE_set_network_algo_workspace_limit(
        LiteNetwork network, size_t workspace_limit);

/**
 * \brief set the network forward in async mode and set the async callback
 * function
 * \param[in] network The loaded model
 * \param[in] async_callback when network finish forwarding, the callbak
 * will be called
 */
LITE_API int LITE_set_async_callback(
        LiteNetwork network, const LiteAsyncCallback async_callback);

/**
 * \brief set the network forward in async mode and set the async callback
 * function
 * \param[in] network The loaded model
 * \param[in] async_callback when network finish forwarding, the callback
 * will be called
 * \param[in] user_data user defined data for something user want to deploy
 * at forward finish stage
 */
LITE_API int LITE_set_async_callback_with_userdata(
        LiteNetwork network, const LiteAsyncCallbackWithData async_callback,
        void* user_data);

/**
 * \brief set the start forward callback function, which will be execute beform
 *  forward, this can be used to check network input or dump model inputs
 *  for debug
 * \param[in] network The loaded model
 * \param[in] start_callback when network start forwarding, the callbak
 * will be called
 */
LITE_API int LITE_set_start_callback(
        LiteNetwork network, const LiteStartCallback start_callback);

/**
 * \brief set the start forward callback function, which will be execute beform
 *  forward, this can be used to check network input or dump model inputs
 *  for debug
 * \param[in] network The loaded model
 * \param[in] start_callback when network start forwarding, the callbak
 * will be called
 * \param[in] user_data user defined data for something user want to deploy
 * at forward start stage
 */
LITE_API int LITE_set_start_callback_with_userdata(
        LiteNetwork network, const LiteStartCallbackWithData start_callback,
        void* user_data);

/**
 * \brief set the finish forward callback function, which will be execute after
 * forward, this can be used to dump model outputs for debug
 * \param[in] network The loaded model
 * \param[in] finish_callback when network finish forwarding, the callbak
 * will be called
 */
LITE_API int LITE_set_finish_callback(
        LiteNetwork network, const LiteFinishCallback finish_callback);

/**
 * \brief set the finish forward callback function, which will be execute after
 * forward, this can be used to dump model outputs for debug
 * \param[in] network The loaded model
 * \param[in] finish_callback when network finish forwarding, the callbak
 * will be called
 * \param[in] user_data user defined data for something user want to deploy
 * at finish stage
 */
LITE_API int LITE_set_finish_callback_with_userdata(
        LiteNetwork network, const LiteFinishCallbackWithData finish_callback,
        void* user_data);

/**
 * \brief set threads affinity callback
 * \param[in] network The loaded model
 * \param[in] thread_affinity_callback
 */
LITE_API int LITE_set_runtime_thread_affinity(
        LiteNetwork network, const LiteThreadAffinityCallback thread_affinity_callback);

/**
 * \brief set the network memroy allocator, the allocator is defined by user
 * \param[in] network The loaded model
 * \param[in] allocate_fun The allocate function of the user defined allocator
 * \param[in] free_fun The free function of the user defined allocator
 */
LITE_API int LITE_set_memory_allocator(
        LiteNetwork network, const LiteAllocate allocate_fun, const LiteFree free_fun);

/**
 * \brief the dst_network share the runtime memory with src_network
 * \param[in] src_network The source network
 * \param[in] dst_network The dst network to shared memory with src_network
 */
LITE_API int LITE_share_runtime_memroy(
        LiteNetwork src_network, LiteNetwork dst_network);

/**
 * \brief enable profile the network, a JSON format file will be generated
 * \param[in] network The loaded model
 * \param[in] profile_json_file_path The profile result file path
 */
LITE_API int LITE_enable_profile_performance(
        LiteNetwork network, const char* profile_json_file_path);

/**
 * \brief Dump input/output values of all internal variables to output file,
 * in text format
 * \param[in] network The loaded model
 * \param[in] io_txt_out_file The dumped txt file name
 */
LITE_API int LITE_enable_io_txt_dump(LiteNetwork network, const char* io_txt_out_file);

/**
 * \brief Dump input/output values of all internal variables to output
 * directory, in binary format
 * \param[in] network The loaded model
 * \param[in] io_bin_out_dir The dumped bin file directory
 */
LITE_API int LITE_enable_io_bin_dump(LiteNetwork network, const char* io_bin_out_dir);

/**
 * \brief get static peak memory info showed by Graph visualization
 * \param[in] log_dir The dumped json file directory
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_get_static_memory_alloc_info(
        LiteNetwork network, const char* log_dir);

/**
 * \brief enable the global layout transform optimization
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_enable_global_layout_transform(LiteNetwork network);

/**
 * \brief dump the model after the global layout transform optimization
 * \param[in] dump_file_path The model file path need to dump
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_dump_layout_transform_model(
        LiteNetwork network, const char* dump_file_path);

/**! get the model io information before model loaded by model path.
 * \param[in] model_path The model file path
 * \param[in] config The model config for loading
 * \param[out] ios The model io infermation
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_get_model_io_info_by_path(
        const char* model_path, const LiteConfig config, LiteNetworkIO* ios);

/** get the model io information before model loaded by model memory.
 * \param[in] model_mem The model memory ptr
 * \param[in] size The model memory ptr length
 * \param[in] config The model config for loading
 * \param[out] ios The model io infermation
 * \return int if the return is not zero, error happened, the error message
 * can get by LITE_get_last_error
 */
LITE_API int LITE_get_model_io_info_by_memory(
        const void* model_mem, size_t size, const LiteConfig config,
        LiteNetworkIO* ios);

/** @brief the extra configuration
 *
 * @param extra_config the extra configuration to set into the network
 */
LITE_API int LITE_extra_configure(LiteNetwork network, LiteExtraConfig extra_config);

#ifdef __cplusplus
}
#endif
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}