From c1b7f6ecdefd44a20b8e27fa1442c51087a0135a Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 2 Dec 2021 11:38:12 +0800
Subject: [PATCH] upgrade Ascend package 2 Dec 21

---
 inc/external/acl/acl_base.h                        |    9 +-
 inc/external/acl/acl_tdt_queue.h                   |   46 +-
 inc/external/acl/ops/acl_dvpp.h                    |   18 +-
 inc/external/ge/ge_api.h                           |   16 +-
 inc/external/ge/ge_api_error_codes.h               |   31 +-
 inc/external/ge/ge_api_types.h                     |    5 +-
 inc/external/ge/ge_error_codes.h                   |   65 +-
 inc/external/ge/ge_ir_build.h                      |   13 +-
 inc/framework/common/debug/ge_log.h                |   16 +-
 inc/framework/common/debug/log.h                   |   82 +-
 inc/framework/common/file_constant_util.h          |   60 +
 inc/framework/common/fmk_error_codes.h             |   18 +-
 inc/framework/common/ge_format_util.h              |    2 +-
 inc/framework/common/ge_inner_error_codes.h        |    5 +-
 inc/framework/common/ge_types.h                    |   45 +-
 inc/framework/common/helper/model_helper.h         |   61 +-
 inc/framework/common/op/attr_value_util.h          |   10 +-
 inc/framework/common/op/ge_op_utils.h              |   18 +-
 inc/framework/common/op/op_parser_util.h           |   42 +-
 inc/framework/common/string_util.h                 |    9 +-
 inc/framework/common/taskdown_common.h             |    2 +-
 inc/framework/common/types.h                       |    3 +
 inc/framework/common/util.h                        |   10 +-
 inc/framework/generator/ge_generator.h             |   12 +-
 inc/framework/omg/parser/model_parser.h            |   24 +-
 inc/framework/omg/parser/op_parser.h               |    8 +-
 inc/framework/omg/parser/parser_api.h              |    1 -
 inc/framework/omg/parser/parser_factory.h          |    4 +-
 inc/framework/omg/parser/parser_inner_ctx.h        |    1 -
 inc/framework/omg/parser/weights_parser.h          |    3 +-
 metadef                                            |    2 +-
 third_party/fwkacllib/inc/cce/aicpu_engine.h       |   63 -
 .../fwkacllib/inc/cce/aicpu_engine_struct.h        |   56 -
 third_party/fwkacllib/inc/cce/blas_struct.h        |   31 -
 third_party/fwkacllib/inc/cce/cce.h                |  101 -
 third_party/fwkacllib/inc/cce/cce_def.hpp          |  152 -
 third_party/fwkacllib/inc/cce/common/attr_list.hpp |   82 -
 third_party/fwkacllib/inc/cce/common/catch.hpp     |   95 -
 third_party/fwkacllib/inc/cce/compiler_stub.h      |   36 -
 third_party/fwkacllib/inc/cce/customize.h          |   60 -
 third_party/fwkacllib/inc/cce/dnn.h                |   23 -
 third_party/fwkacllib/inc/cce/dnn_base.h           |  676 ---
 third_party/fwkacllib/inc/cce/dnn_base_def.hpp     |  994 ----
 third_party/fwkacllib/inc/cce/dnn_op.h             | 4838 --------------------
 third_party/fwkacllib/inc/cce/dnn_struct.hpp       |   23 -
 third_party/fwkacllib/inc/cce/dnn_struct_base.hpp  |  894 ----
 third_party/fwkacllib/inc/cce/fwk_adpt_struct.h    |  155 -
 third_party/fwkacllib/inc/cce/l2fusion_struct.hpp  |   56 -
 .../fwkacllib/inc/cce/optimizer/fusion_engine.h    |   65 -
 third_party/fwkacllib/inc/cce/taskdown_api.h       |   54 -
 third_party/fwkacllib/inc/cce/taskdown_common.hpp  |  108 -
 third_party/fwkacllib/inc/ops/aipp.h               |    2 +-
 third_party/fwkacllib/inc/ops/all_ops.h            |    3 +-
 third_party/fwkacllib/inc/ops/array_ops.h          |   21 +-
 third_party/fwkacllib/inc/ops/audio_ops.h          |    2 +-
 third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h    |    2 +-
 third_party/fwkacllib/inc/ops/batch_ops.h          |    2 +-
 third_party/fwkacllib/inc/ops/bitwise_ops.h        |    2 +-
 third_party/fwkacllib/inc/ops/boosted_trees_ops.h  |    2 +-
 .../fwkacllib/inc/ops/candidate_sampling_ops.h     |    2 +-
 third_party/fwkacllib/inc/ops/case_condition_ops.h |    2 +-
 third_party/fwkacllib/inc/ops/cluster.h            |    2 +-
 third_party/fwkacllib/inc/ops/condtake_ops.h       |    2 +-
 third_party/fwkacllib/inc/ops/control_flow_ops.h   |    2 +-
 .../fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h   |    2 +-
 third_party/fwkacllib/inc/ops/correlation.h        |    2 +-
 third_party/fwkacllib/inc/ops/ctc_ops.h            |    2 +-
 third_party/fwkacllib/inc/ops/data_flow_ops.h      |   21 +-
 third_party/fwkacllib/inc/ops/deep_md.h            |   59 +
 .../fwkacllib/inc/ops/elewise_calculation_ops.h    |    2 +-
 third_party/fwkacllib/inc/ops/functional_ops.h     |    2 +-
 third_party/fwkacllib/inc/ops/get_data_ops.h       |    2 +-
 third_party/fwkacllib/inc/ops/hcom_ops.h           |    2 +-
 third_party/fwkacllib/inc/ops/hvd_ops.h            |    2 +-
 third_party/fwkacllib/inc/ops/image_ops.h          |   84 +-
 third_party/fwkacllib/inc/ops/index_to_addr_ops.h  |    2 +-
 third_party/fwkacllib/inc/ops/internal_ops.h       |    2 +-
 third_party/fwkacllib/inc/ops/linalg_ops.h         |    2 +-
 third_party/fwkacllib/inc/ops/list_ops.h           |    2 +-
 third_party/fwkacllib/inc/ops/logging_ops.h        |    2 +-
 third_party/fwkacllib/inc/ops/lookup_ops.h         |    2 +-
 third_party/fwkacllib/inc/ops/math_ops.h           |    4 +-
 .../fwkacllib/inc/ops/matrix_calculation_ops.h     |    2 +-
 third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h  |   10 +-
 third_party/fwkacllib/inc/ops/nn_calculation_ops.h |    6 +-
 third_party/fwkacllib/inc/ops/nn_detect_ops.h      |   31 +-
 third_party/fwkacllib/inc/ops/nn_norm_ops.h        |    2 +-
 third_party/fwkacllib/inc/ops/nn_ops.h             |    2 +-
 third_party/fwkacllib/inc/ops/nn_pooling_ops.h     |    2 +-
 third_party/fwkacllib/inc/ops/nn_training_ops.h    |    2 +-
 third_party/fwkacllib/inc/ops/no_op.h              |    2 +-
 third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h  |   30 +-
 third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h |    2 +-
 third_party/fwkacllib/inc/ops/ocr_ops.h            |    2 +-
 third_party/fwkacllib/inc/ops/outfeed_ops.h        |    2 +-
 third_party/fwkacllib/inc/ops/pad_ops.h            |    2 +-
 third_party/fwkacllib/inc/ops/parsing_ops.h        |    2 +-
 third_party/fwkacllib/inc/ops/quantize_ops.h       |    2 +-
 third_party/fwkacllib/inc/ops/ragged_array_ops.h   |    2 +-
 .../fwkacllib/inc/ops/ragged_conversion_ops.h      |    2 +-
 third_party/fwkacllib/inc/ops/ragged_math_ops.h    |    2 +-
 third_party/fwkacllib/inc/ops/random_ops.h         |    2 +-
 third_party/fwkacllib/inc/ops/reduce_ops.h         |    2 +-
 .../fwkacllib/inc/ops/resource_variable_ops.h      |    2 +-
 third_party/fwkacllib/inc/ops/rnn.h                |    2 +-
 third_party/fwkacllib/inc/ops/rpn_ops.h            |    2 +-
 third_party/fwkacllib/inc/ops/save_ops.h           |    2 +-
 third_party/fwkacllib/inc/ops/sdca_ops.h           |    2 +-
 third_party/fwkacllib/inc/ops/selection_ops.h      |    2 +-
 third_party/fwkacllib/inc/ops/set_ops.h            |    2 +-
 third_party/fwkacllib/inc/ops/slice_write_ops.h    |    2 +-
 third_party/fwkacllib/inc/ops/sparse_ops.h         |    2 +-
 third_party/fwkacllib/inc/ops/spectral_ops.h       |    2 +-
 .../fwkacllib/inc/ops/split_combination_ops.h      |    2 +-
 third_party/fwkacllib/inc/ops/state_ops.h          |    2 +-
 .../fwkacllib/inc/ops/stateful_random_ops.h        |    2 +-
 .../fwkacllib/inc/ops/stateless_random_ops.h       |    2 +-
 third_party/fwkacllib/inc/ops/string_ops.h         |    2 +-
 third_party/fwkacllib/inc/ops/swap_co_ops.h        |    2 +-
 .../fwkacllib/inc/ops/target_crop_and_resize.h     |    2 +-
 third_party/fwkacllib/inc/ops/transformation_ops.h |    2 +-
 third_party/fwkacllib/inc/ops/vector_search.h      |    2 +-
 .../fwkacllib/inc/ops/warp_perspective_ops.h       |    2 +-
 .../fwkacllib/inc/register/op_kernel_registry.h    |   46 -
 third_party/fwkacllib/inc/register/op_registry.h   |   97 -
 third_party/fwkacllib/inc/runtime/base.h           |   96 +-
 third_party/fwkacllib/inc/runtime/config.h         |   26 +-
 third_party/fwkacllib/inc/runtime/context.h        |   68 +-
 third_party/fwkacllib/inc/runtime/dev.h            |   79 +-
 third_party/fwkacllib/inc/runtime/dvfsprofile.h    |   20 +-
 third_party/fwkacllib/inc/runtime/event.h          |   76 +-
 third_party/fwkacllib/inc/runtime/kernel.h         |  126 +-
 third_party/fwkacllib/inc/runtime/mem.h            |  104 +-
 third_party/fwkacllib/inc/runtime/rt.h             |   18 +-
 third_party/fwkacllib/inc/runtime/rt_dfx.h         |   17 +-
 third_party/fwkacllib/inc/runtime/rt_ffts.h        |   30 +-
 third_party/fwkacllib/inc/runtime/rt_ffts_plus.h   |   23 +-
 .../fwkacllib/inc/runtime/rt_ffts_plus_define.h    |   21 +-
 third_party/fwkacllib/inc/runtime/rt_mem_queue.h   |  124 +-
 third_party/fwkacllib/inc/runtime/rt_model.h       |   90 +-
 third_party/fwkacllib/inc/runtime/rt_stars.h       |   29 +-
 .../fwkacllib/inc/runtime/rt_stars_define.h        |   21 +-
 third_party/fwkacllib/inc/runtime/stream.h         |   80 +-
 third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h |   58 -
 .../inc/toolchain/adx_datadump_callback.h          |   18 +-
 third_party/fwkacllib/inc/toolchain/prof_acl_api.h |  100 +-
 .../fwkacllib/inc/toolchain/prof_callback.h        |    2 +-
 .../fwkacllib/inc/toolchain/tuning_tool/tune_api.h |   94 -
 148 files changed, 1181 insertions(+), 9878 deletions(-)
 create mode 100644 inc/framework/common/file_constant_util.h
 delete mode 100644 third_party/fwkacllib/inc/cce/aicpu_engine.h
 delete mode 100644 third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
 delete mode 100644 third_party/fwkacllib/inc/cce/blas_struct.h
 delete mode 100644 third_party/fwkacllib/inc/cce/cce.h
 delete mode 100644 third_party/fwkacllib/inc/cce/cce_def.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/common/attr_list.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/common/catch.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/compiler_stub.h
 delete mode 100644 third_party/fwkacllib/inc/cce/customize.h
 delete mode 100644 third_party/fwkacllib/inc/cce/dnn.h
 delete mode 100644 third_party/fwkacllib/inc/cce/dnn_base.h
 delete mode 100644 third_party/fwkacllib/inc/cce/dnn_base_def.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/dnn_op.h
 delete mode 100644 third_party/fwkacllib/inc/cce/dnn_struct.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/dnn_struct_base.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
 delete mode 100644 third_party/fwkacllib/inc/cce/l2fusion_struct.hpp
 delete mode 100644 third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h
 delete mode 100644 third_party/fwkacllib/inc/cce/taskdown_api.h
 delete mode 100644 third_party/fwkacllib/inc/cce/taskdown_common.hpp
 create mode 100644 third_party/fwkacllib/inc/ops/deep_md.h
 delete mode 100644 third_party/fwkacllib/inc/register/op_kernel_registry.h
 delete mode 100644 third_party/fwkacllib/inc/register/op_registry.h
 delete mode 100644 third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
 delete mode 100644 third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h
index 90da8b8f..1f81b15a 100644
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -113,6 +113,7 @@ static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
 static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
 static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
 static const int ACL_ERROR_OP_UNSUPPORTED_DYNAMIC = 148050;
+static const int ACL_ERROR_RELATIVE_RESOURCE_NOT_CLEARED = 148051;
 
 static const int ACL_ERROR_BAD_ALLOC = 200000;
 static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
@@ -176,10 +177,7 @@ typedef enum {
   ACL_ERROR = 3,
 } aclLogLevel;
 
-typedef enum {
-  ACL_MEMTYPE_DEVICE = 0,
-  ACL_MEMTYPE_HOST = 1,
-} aclMemType;
+typedef enum { ACL_MEMTYPE_DEVICE = 0, ACL_MEMTYPE_HOST = 1, ACL_MEMTYPE_HOST_COMPILE_INDEPENDENT = 2 } aclMemType;
 
 /**
  * @ingroup AscendCL
@@ -601,7 +599,8 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu
  * @brief Set tensor memory type specified by the tensor description
  *
  * @param  desc [OUT]      pointer to the instance of aclTensorDesc
- * @param  memType [IN]       ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host
+ * @param  memType [IN]       ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST or
+ * ACL_MEMTYPE_HOST_COMPILE_INDEPENDENT means host
  *
  * @retval ACL_SUCCESS     The function is successfully executed.
  * @retval OtherValues Failure
diff --git a/inc/external/acl/acl_tdt_queue.h b/inc/external/acl/acl_tdt_queue.h
index 95cfdb8c..01f0eebf 100644
--- a/inc/external/acl/acl_tdt_queue.h
+++ b/inc/external/acl/acl_tdt_queue.h
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid);
  *
  * @param qid [IN] qid
  * @param buf [IN] acltdtBuf
- * @param timeout [IN] timeout
+ * @param timeout [IN] timeout, -1 means blocking
  *
  * @retval ACL_SUCCESS  The function is successfully executed.
  * @retval OtherValues Failure
@@ -101,7 +101,7 @@ ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t
  *
  * @param qid [IN] qid
  * @param buf [OUT] pointer to the acltdtBuf
- * @param timeout [IN] timeout
+ * @param timeout [IN] timeout, -1 means blocking
  *
  * @retval ACL_SUCCESS  The function is successfully executed.
  * @retval OtherValues Failure
@@ -112,12 +112,50 @@ ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t
 
 /**
  * @ingroup AscendCL
+ * @brief enqueue function
+ *
+ * @param qid [IN] qid
+ * @param data [IN] the pointer to data buf
+ * @param dataSize [IN] the size of data buf
+ * @param userData [IN] the pointer to user data buf
+ * @param userDataSize [IN] the size of user data buf
+ * @param timeout [IN] timeout, -1 means blocking
+ * @param rsv [IN] reserved param
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtDequeueData
+ */
+ACL_FUNC_VISIBILITY aclError acltdtEnqueueData(uint32_t qid, const void *data, size_t dataSize, const void *userData,
+                                               size_t userDataSize, int32_t timeout, uint32_t rsv);
+
+/**
+ * @ingroup AscendCL
+ * @brief dequeue function
+ *
+ * @param qid [IN] qid
+ * @param data [IN|OUT] the pointer to data buf
+ * @param dataSize [IN] the size of data buf
+ * @param retDataSize [OUT] the return size of data buf
+ * @param userData [IN|OUT] the pointer to user data buf
+ * @param userDataSize [IN] the size of user data buf
+ * @param timeout [IN] timeout, -1 means blocking
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtEnqueueData
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDequeueData(uint32_t qid, void *data, size_t dataSize, size_t *retDataSize,
+                                               void *userData, size_t userDataSize, int32_t timeout);
+
+/**
+ * @ingroup AscendCL
  * @brief grant queue to other process
  *
  * @param qid [IN] qid
  * @param pid [IN] pid of dst process
  * @param permission [IN] permission of queue
- * @param timeout [IN] timeout
+ * @param timeout [IN] timeout, -1 means blocking
  *
  * @retval ACL_SUCCESS  The function is successfully executed.
  * @retval OtherValues Failure
@@ -131,7 +169,7 @@ ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_
  * @brief attach queue in current process
  *
  * @param qid [IN] qid
- * @param timeout [IN] timeout
+ * @param timeout [IN] timeout, -1 means blocking
  * @param permission [OUT] permission of queue
  *
  * @retval ACL_SUCCESS  The function is successfully executed.
diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h
index a536a23b..b839ae48 100644
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -158,9 +158,21 @@ enum acldvppJpegFormat {
   ACL_JPEG_CSS_UNKNOWN = 1000
 };
 
-enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 };
-
-enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 };
+enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0, ACL_DVPP_MODE_UINT32, ACL_DVPP_CHANNEL_ID_UINT64 };
+
+enum aclvdecChannelDescParamType {
+  ACL_VDEC_CSC_MATRIX_UINT32 = 0,
+  ACL_VDEC_OUT_MODE_UINT32,
+  ACL_VDEC_THREAD_ID_UINT64,
+  ACL_VDEC_CALLBACK_PTR,
+  ACL_VDEC_CHANNEL_ID_UINT32,
+  ACL_VDEC_ENCODE_TYPE_UINT32,
+  ACL_VDEC_OUT_PIC_FORMAT_UINT32,
+  ACL_VDEC_OUT_PIC_WIDTH_UINT32,
+  ACL_VDEC_OUT_PIC_HEIGHT_UINT32,
+  ACL_VDEC_REF_FRAME_NUM_UINT32,
+  ACL_VDEC_BIT_DEPTH_UINT32
+};
 
 // Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType
 enum acldvppCscMatrix {
diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h
index c2cbe794..1e80ac4e 100644
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -57,15 +57,15 @@ class GE_FUNC_VISIBILITY Session {
 
   ///
   /// @ingroup client
-  /// @brief add a graph with a specific graphId
-  /// @param [in] graphId graph id
+  /// @brief add a graph with a specific graph id
+  /// @param [in] graph_id graph id
   /// @return Status result of function
   ///
-  Status AddGraph(uint32_t graphId, const Graph &graph);
+  Status AddGraph(uint32_t graph_id, const Graph &graph);
 
   ///
   /// @ingroup client
-  /// @brief add a graph with a specific graphId and graphOptions
+  /// @brief add a graph with a specific graph id and graphOptions
   /// @param [in] graphId graph id
   /// @param [in] graph the graph
   /// @param [in] options graph options
@@ -106,10 +106,10 @@ class GE_FUNC_VISIBILITY Session {
   ///
   /// @ingroup ge_graph
   /// @brief remove a graph of the session with specific session id
-  /// @param [in] graphId graph id
+  /// @param [in] graph_d graph id
   /// @return Status result of function
   ///
-  Status RemoveGraph(uint32_t graphId);
+  Status RemoveGraph(uint32_t graph_id);
 
   ///
   /// @ingroup ge_graph
@@ -142,7 +142,7 @@ class GE_FUNC_VISIBILITY Session {
   ///
   Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs);
 
-  Status BuildGraph(uint32_t graphId, const std::vector<ge::Tensor> &inputs);  /*lint !e148*/
+  Status BuildGraph(uint32_t graphId, const std::vector<ge::Tensor> &inputs); /*lint !e148*/
 
   ///
   /// @ingroup ge_graph
@@ -189,7 +189,7 @@ class GE_FUNC_VISIBILITY Session {
 
   Status RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback);
 
-  bool IsGraphNeedRebuild(uint32_t graphId);
+  bool IsGraphNeedRebuild(uint32_t graph_id);
 
  private:
   uint64_t sessionId_;
diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h
index 2512de0a..5d2d6963 100644
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -20,6 +20,7 @@
 #include <map>
 #include <string>
 #include "ge_error_codes.h"
+#include "graph/types.h"
 
 namespace ge {
 #ifdef __GNUC__
@@ -35,7 +36,7 @@ class GE_FUNC_VISIBILITY StatusFactory {
     return &instance;
   }
 
-  void RegisterErrorNo(uint32_t err, const std::string &desc) {
+  void RegisterErrorNo(const uint32_t err, const std::string &desc) {
     // Avoid repeated addition
     if (err_desc_.find(err) != err_desc_.end()) {
       return;
@@ -43,19 +44,19 @@ class GE_FUNC_VISIBILITY StatusFactory {
     err_desc_[err] = desc;
   }
 
-  void RegisterErrorNo(uint32_t err, const char *desc) {
+  void RegisterErrorNo(const uint32_t err, const char *const desc) {
     if (desc == nullptr) {
       return;
     }
-    std::string error_desc = desc;
+    const std::string error_desc = desc;
     if (err_desc_.find(err) != err_desc_.end()) {
       return;
     }
     err_desc_[err] = error_desc;
   }
 
-  std::string GetErrDesc(uint32_t err) {
-    auto iter_find = err_desc_.find(err);
+  std::string GetErrDesc(const uint32_t err) {
+    const auto iter_find = err_desc_.find(err);
     if (iter_find == err_desc_.end()) {
       return "";
     }
@@ -72,23 +73,23 @@ class GE_FUNC_VISIBILITY StatusFactory {
 
 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  public:
-  ErrorNoRegisterar(uint32_t err, const std::string &desc) {
+  ErrorNoRegisterar(const uint32_t err, const std::string &desc) noexcept {
     StatusFactory::Instance()->RegisterErrorNo(err, desc);
   }
-  ErrorNoRegisterar(uint32_t err, const char *desc) {
+  ErrorNoRegisterar(const uint32_t err, const char *const desc) noexcept {
     StatusFactory::Instance()->RegisterErrorNo(err, desc);
   }
   ~ErrorNoRegisterar() {}
 };
 
 // Code compose(4 byte), runtime: 2 bit,  type: 2 bit,   level: 3 bit,  sysid: 8 bit, modid: 5 bit, value: 12 bit
-#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                               \
-  constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30) | \
-                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28) |    \
-                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25) |   \
-                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17) |   \
-                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12) |   \
-                              (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value)));        \
+#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                                \
+  constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) |    \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) |   \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) |   \
+                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) |   \
+                              (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value)));         \
   const ErrorNoRegisterar g_##name##_errorno(name, desc);
 
 #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);
@@ -97,7 +98,7 @@ using Status = uint32_t;
 
 // General error code
 GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success");
-GE_ERRORNO(0b11, 0b11, 0b111, 0xFF, 0b11111, FAILED, 0xFFF, "failed"); /*lint !e401*/
+GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/
 
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet.");
diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index d2d5bf5d..52881020 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -24,6 +24,7 @@
 #include <functional>
 #include <memory>
 #include "graph/tensor.h"
+#include "graph/types.h"
 
 namespace ge {
 // Option key: graph run mode
@@ -337,6 +338,8 @@ const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist";
 
 const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode";
 
+const char *const FILE_CONSTANT_PATH = "ge.exec.value_bins";
+
 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };
 
@@ -353,7 +356,7 @@ struct OutputTensorInfo {
   std::vector<int64_t> dims;        // shape description
   std::unique_ptr<uint8_t[]> data;  // tensor data
   int64_t length;                   // tensor length
-  OutputTensorInfo() : data_type(0), dims({}), data(nullptr), length(0) {}
+  OutputTensorInfo() : data_type(0U), dims({}), data(nullptr), length(0) {}
   OutputTensorInfo(OutputTensorInfo &&out)
       : data_type(out.data_type), dims(out.dims), data(std::move(out.data)), length(out.length) {}
 
diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h
index cafc5a64..027c83ea 100644
--- a/inc/external/ge/ge_error_codes.h
+++ b/inc/external/ge/ge_error_codes.h
@@ -32,42 +32,43 @@
 #endif
 
 #include <stddef.h>
+#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
-static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
-static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
-static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
-static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
-static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
-static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
-static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
-static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
-static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
-static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
-static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
-static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
-static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
-static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
-static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
-static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
-static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
-static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
-static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
-static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
-static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
-static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
-static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
+static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U;
+static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U;
+static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U;
+static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U;
+static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U;
+static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U;
+static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U;
+static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U;
+static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U;
+static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U;
+static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U;
+static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U;
+static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U;
+static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U;
+static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U;
+static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U;
+static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U;
+static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U;
+static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U;
 
 #ifdef __cplusplus
 }  // namespace ge
diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h
index cf543315..84f31145 100644
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -36,14 +36,10 @@
 #include <memory>
 #include "graph/graph.h"
 #include "graph/ge_error_codes.h"
-
-namespace {
-const int IR_MAJOR_VERSION = 1;
-const int IR_MINOR_VERSION = 0;
-const int IR_PATCH_VERSION = 0;
-}  // namespace
-
 namespace ge {
+const int32_t IR_MAJOR_VERSION = 1;
+const int32_t IR_MINOR_VERSION = 0;
+const int32_t IR_PATCH_VERSION = 0;
 
 struct ModelBufferData {
   std::shared_ptr<uint8_t> data = nullptr;
@@ -117,7 +113,8 @@ GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const M
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);
+GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int32_t *major_version, int32_t *minor_version,
+                                                   int32_t *patch_version);
 
 /**
  * @ingroup AscendCL
diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h
index dbd6f875..b1bf929f 100644
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -40,17 +40,17 @@ enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };
 
 class GE_FUNC_VISIBILITY GeLog {
  public:
-  static uint64_t GetTid() {
+  static const uint64_t GetTid() {
 #ifdef __GNUC__
-    uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
+    const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
 #else
-    uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId());
+    const uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId());
 #endif
     return tid;
   }
 };
 
-inline bool IsLogEnable(int module_name, int log_level) {
+inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {
   const int32_t enable = CheckLogLevel(module_name, log_level);
   // 1:enable, 0:disable
   return (enable == 1);
@@ -92,10 +92,10 @@ inline bool IsLogEnable(int module_name, int log_level) {
 #define GELOGT(VALUE, fmt, ...)                                                                                      \
   do {                                                                                                               \
     TraceStatus stat = VALUE;                                                                                        \
-    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                       \
+    const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                     \
     const int32_t idx = static_cast<int32_t>(stat);                                                                  \
-    char *k = const_cast<char *>("status");                                                                          \
-    char *v = const_cast<char *>(TraceStatStr[idx]);                                                                 \
+    char_t *k = const_cast<char_t *>("status");                                                                      \
+    char_t *v = const_cast<char_t *>(TraceStatStr[idx]);                                                             \
     KeyValue kv = {k, v};                                                                                            \
     DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
   } while (false)
@@ -110,7 +110,7 @@ inline bool IsLogEnable(int module_name, int log_level) {
 // print memory when it is greater than 1KB.
 #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE)                                                        \
   do {                                                                                                      \
-    if ((SIZE) > 1024) {                                                                                    \
+    if (static_cast<size_t>(SIZE) > 1024UL) {                                                               \
       GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
     }                                                                                                       \
   } while (false)
diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h
index 3b11ce57..32e915eb 100644
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -26,7 +26,7 @@
 #include "external/ge/ge_api_error_codes.h"
 
 #if !defined(__ANDROID__) && !defined(ANDROID)
-#define DOMI_LOGE(fmt, ...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, fmt, ##__VA_ARGS__)
+#define DOMI_LOGE(fmt, ...) GE_LOG_ERROR(GE_MODULE_NAME, (ge::FAILED), fmt, ##__VA_ARGS__)
 #else
 #include <android/log.h>
 #if defined(BUILD_VERSION_PERF)
@@ -49,9 +49,9 @@
     GELOGW(__VA_ARGS__);           \
   }
 
-#define GE_LOGE_IF(condition, ...)   \
-  if ((condition)) {                 \
-    GELOGE(ge::FAILED, __VA_ARGS__); \
+#define GE_LOGE_IF(condition, ...)     \
+  if ((condition)) {                   \
+    GELOGE((ge::FAILED), __VA_ARGS__); \
   }
 
 // If expr is not SUCCESS, print the log and return the same value
@@ -59,7 +59,7 @@
   do {                                     \
     const ge::Status _chk_status = (expr); \
     if (_chk_status != ge::SUCCESS) {      \
-      GELOGE(ge::FAILED, __VA_ARGS__);     \
+      GELOGE((ge::FAILED), __VA_ARGS__);   \
       return _chk_status;                  \
     }                                      \
   } while (false)
@@ -69,7 +69,7 @@
   do {                                     \
     const ge::Status _chk_status = (expr); \
     if (_chk_status != ge::SUCCESS) {      \
-      GELOGE(ge::FAILED, __VA_ARGS__);     \
+      GELOGE((ge::FAILED), __VA_ARGS__);   \
     }                                      \
   } while (false)
 
@@ -88,7 +88,7 @@
     if ((expr) != ge::GRAPH_SUCCESS) {                      \
       REPORT_CALL_ERROR("E19999", "Operator graph failed"); \
       GELOGE(ge::FAILED, __VA_ARGS__);                      \
-      return FAILED;                                        \
+      return (FAILED);                                      \
     }                                                       \
   } while (false)
 
@@ -105,8 +105,8 @@
     const bool b = (expr);                         \
     if (!b) {                                      \
       REPORT_INNER_ERROR("E19999", __VA_ARGS__);   \
-      GELOGE(_status, __VA_ARGS__);                \
-      return _status;                              \
+      GELOGE((_status), __VA_ARGS__);              \
+      return (_status);                            \
     }                                              \
   } while (false)
 
@@ -115,7 +115,7 @@
   do {                                                   \
     const bool b = (expr);                               \
     if (!b) {                                            \
-      return _status;                                    \
+      return (_status);                                  \
     }                                                    \
   } while (false)
 
@@ -196,7 +196,7 @@
       REPORT_INNER_ERROR("E19999", __VA_ARGS__);                        \
       GELOGE(ge::FAILED, __VA_ARGS__);                                  \
       exec_expr;                                                        \
-      return _status;                                                   \
+      return (_status);                                                 \
     }                                                                   \
   }
 
@@ -211,22 +211,22 @@
 
 // -----------------runtime related macro definitions-------------------------------
 // If expr is not RT_ERROR_NONE, print the log
-#define GE_CHK_RT(expr)                                             \
-  do {                                                              \
-    const rtError_t _rt_ret = (expr);                               \
-    if (_rt_ret != RT_ERROR_NONE) {                                 \
-      GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
-    }                                                               \
+#define GE_CHK_RT(expr)                                                \
+  do {                                                                 \
+    const rtError_t _rt_ret = (expr);                                  \
+    if (_rt_ret != RT_ERROR_NONE) {                                    \
+      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
+    }                                                                  \
   } while (false)
 
 // If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression
-#define GE_CHK_RT_EXEC(expr, exec_expr)                             \
-  do {                                                              \
-    const rtError_t _rt_ret = (expr);                               \
-    if (_rt_ret != RT_ERROR_NONE) {                                 \
-      GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
-      exec_expr;                                                    \
-    }                                                               \
+#define GE_CHK_RT_EXEC(expr, exec_expr)                                \
+  do {                                                                 \
+    const rtError_t _rt_ret = (expr);                                  \
+    if (_rt_ret != RT_ERROR_NONE) {                                    \
+      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
+      exec_expr;                                                       \
+    }                                                                  \
   } while (false)
 
 // If expr is not RT_ERROR_NONE, print the log and return
@@ -235,7 +235,7 @@
     const rtError_t _rt_ret = (expr);                                         \
     if (_rt_ret != RT_ERROR_NONE) {                                           \
       REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X", #expr, _rt_ret); \
-      GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret);           \
+      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret);        \
       return RT_ERROR_TO_GE_STATUS(_rt_ret);                                  \
     }                                                                         \
   } while (false)
@@ -257,26 +257,26 @@
     exec_expr1;                                \
   }
 
-#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg)    \
-  {                                                    \
-    GELOGE(_status, "[Check][InnerData]%s", errormsg); \
-    REPORT_INNER_ERROR("E19999", "%s", errormsg);      \
+#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg)        \
+  {                                                        \
+    GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \
+    REPORT_INNER_ERROR("E19999", "%s", (errormsg));        \
   }
 
-#define GE_WARNINGLOG_AND_ERRORMSG(errormsg)                                           \
-  {                                                                                    \
-    GELOGW("%s", errormsg);                                                            \
-    ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
+#define GE_WARNINGLOG_AND_ERRORMSG(errormsg)                                             \
+  {                                                                                      \
+    GELOGW("%s", (errormsg));                                                            \
+    ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \
   }
 
-#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg)                                 \
-  do {                                                                                   \
-    const bool b = (expr);                                                               \
-    if (!b) {                                                                            \
-      GELOGE(_status, "%s", errormsg);                                                   \
-      ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
-      return _status;                                                                    \
-    }                                                                                    \
+#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg)                                   \
+  do {                                                                                     \
+    const bool b = (expr);                                                                 \
+    if (!b) {                                                                              \
+      GELOGE((_status), "%s", (errormsg));                                                 \
+      ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {(errormsg)}); \
+      return (_status);                                                                    \
+    }                                                                                      \
   } while (false)
 
 template <typename T>
diff --git a/inc/framework/common/file_constant_util.h b/inc/framework/common/file_constant_util.h
new file mode 100644
index 00000000..1ea81960
--- /dev/null
+++ b/inc/framework/common/file_constant_util.h
@@ -0,0 +1,60 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
+#define INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
+
+#include <map>
+#include <string>
+#include <vector>
+#include "ge/ge_api_error_codes.h"
+#include "nlohmann/json.hpp"
+#include "graph/op_desc.h"
+#include "graph/types.h"
+#include "graph/ge_tensor.h"
+
+namespace ge {
+extern const int64_t kBlockSize;
+extern const std::string kBinFileValues;
+extern const std::string kBinIdValue;
+extern const std::string kBinFilePathValue;
+
+struct FileConstantInfo {
+  std::string value_bin_file_id;
+  std::string value_bin_file_path;
+};
+
+struct OptionInfo {
+  std::vector<FileConstantInfo> info;
+};
+
+void from_json(const nlohmann::json &j, FileConstantInfo &info);
+
+void from_json(const nlohmann::json &j, OptionInfo &option_info);
+
+Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map);
+
+Status CopyOneWeightFromFile(const void *curr_dev_ptr, const std::string &value, const size_t file_constant_size,
+                             size_t &left_size);
+
+Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map,
+                   std::string &file_path);
+
+Status GetFileConstantElementTotalSize(const GeShape &shape, const DataType data_type, int64_t &mem_size,
+                                       const Format format = FORMAT_ND);
+}  // namespace ge
+
+#endif  // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h
index 139785ae..9f1719ac 100644
--- a/inc/framework/common/fmk_error_codes.h
+++ b/inc/framework/common/fmk_error_codes.h
@@ -42,27 +42,27 @@
 #include "register/register_error_codes.h"
 
 // Each module uses the following four macros to define error codes:
-#define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, name, value)
-#define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, name, value)
+#define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, (name), (value))
+#define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, (name), (value))
 #define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value)
 
-#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);
+#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc));
 
 // Interface for Obtaining Error Code Description
 #define GET_ERRORNO_STR(value) domi::StatusFactory::Instance()->GetErrDesc(value)
 
-const int MODID_OMG = 1;          // OMG module ID
-const int MODID_OME = 2;          // OME module ID
-const int MODID_CALIBRATION = 3;  // Calibration module ID
-
 namespace domi {
+constexpr int32_t MODID_OMG = 1;          // OMG module ID
+constexpr int32_t MODID_OME = 2;          // OME module ID
+constexpr int32_t MODID_CALIBRATION = 3;  // Calibration module ID
+
 class GE_FUNC_VISIBILITY StatusFactory {
  public:
   static StatusFactory *Instance();
 
-  void RegisterErrorNo(uint32_t err, const std::string &desc);
+  void RegisterErrorNo(const uint32_t err, const std::string &desc);
 
-  std::string GetErrDesc(uint32_t err);
+  std::string GetErrDesc(const uint32_t err);
 
  protected:
   StatusFactory() {}
diff --git a/inc/framework/common/ge_format_util.h b/inc/framework/common/ge_format_util.h
index 3c621576..c6decdcc 100644
--- a/inc/framework/common/ge_format_util.h
+++ b/inc/framework/common/ge_format_util.h
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY GeFormatUtil {
   /// @param  [out] dst_shape     destination shape
   /// @return Status
   ///
-  static Status TransShape(const TensorDesc &src_desc, Format dst_format, std::vector<int64_t> &dst_shape);
+  static Status TransShape(const TensorDesc &src_desc, const Format dst_format, std::vector<int64_t> &dst_shape);
 };
 }  // namespace ge
 
diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h
index 9e9b5d1d..529f6cb4 100644
--- a/inc/framework/common/ge_inner_error_codes.h
+++ b/inc/framework/common/ge_inner_error_codes.h
@@ -313,7 +313,10 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");
 
-#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<const Status>(RT_ERROR)
+static inline Status TransRtErrorCode(const int32_t error_code) {
+  return static_cast<Status>(error_code);
+}
+#define RT_ERROR_TO_GE_STATUS(RT_ERROR) TransRtErrorCode(RT_ERROR)
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 060a7bf0..868debe7 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -30,7 +30,11 @@
 namespace ge {
 enum RuntimeType { HOST = 0, DEVICE = 1 };
 
-enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 };
+enum class PerfLevel : int32_t {
+  GEN_TASK_WITH_FUSION = -1,
+  GEN_TASK_WITHOUT_L2FUSION = 3,
+  GEN_TASK_WITHOUT_FUSION = 4
+};
 
 enum FrameworkType {
   CAFFE = 0,
@@ -40,6 +44,10 @@ enum FrameworkType {
   ONNX,
 };
 
+enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED };
+
+const char *const kGraphDumpStage = "DumpStage";
+
 const std::map<std::string, std::string> kFwkTypeToStr = {
     {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};
 
@@ -53,17 +61,18 @@ enum OpEngineType {
 
 enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE };
 
-const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
-const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";
+const char_t *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
+const char_t *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";
 
 // profiling data
+
 const std::string kTaskTypeAicore = "AI_CORE";
 const std::string kTaskTypeAicpu = "AI_CPU";
 const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";
 const std::string kTaskTypeFftsPlus = "FFTS_PLUS";
 
 // dynamic execute mode
-const char *const kLazyRecompile = "lazy_recompile";
+const char_t *const kLazyRecompile = "lazy_recompile";
 
 // Data cache, including data address and length
 struct DataBuffer {
@@ -75,7 +84,7 @@ struct DataBuffer {
   DataBuffer(void *data_in, uint64_t data_len, bool is_support_mem_share, uint32_t placement = 0U)
       : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(placement) {}
 
-  DataBuffer() : data(nullptr), length(0U), isDataSupportMemShare(false) {}
+  DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false) {}
 };
 
 ///
@@ -87,7 +96,7 @@ struct InputData {
   uint32_t timestamp;                        // Data creation time
   uint32_t timeout;                          // Processing timeout
   uint32_t model_id;                         // Model ID required for data processing
-  uint64_t request_id = 0U;                  // Request ID
+  uint64_t request_id = 0UL;                 // Request ID
   std::vector<DataBuffer> blobs;             // Actual input data, currently only supports one input
   bool is_dynamic_batch = false;             // Whether is dynamic batch size scene, default:false
   std::string batch_label;                   // Gear used for current inference in dynamic batch scene
@@ -114,10 +123,10 @@ struct Command {
 
 // The definition of I/O shape description
 struct ShapeDescription {
-  int64_t num = 0;
-  int64_t channel = 0;
-  int64_t height = 0;
-  int64_t width = 0;
+  int64_t num = 0L;
+  int64_t channel = 0L;
+  int64_t height = 0L;
+  int64_t width = 0L;
   std::vector<int64_t> dims;
   std::vector<std::pair<int64_t, int64_t>> shape_ranges;
 };
@@ -187,14 +196,14 @@ struct AippConfigInfo {
   int32_t mean_chn_1;
   int32_t mean_chn_2;
   int32_t mean_chn_3;
-  float min_chn_0;
-  float min_chn_1;
-  float min_chn_2;
-  float min_chn_3;
-  float var_reci_chn_0;
-  float var_reci_chn_1;
-  float var_reci_chn_2;
-  float var_reci_chn_3;
+  float32_t min_chn_0;
+  float32_t min_chn_1;
+  float32_t min_chn_2;
+  float32_t min_chn_3;
+  float32_t var_reci_chn_0;
+  float32_t var_reci_chn_1;
+  float32_t var_reci_chn_2;
+  float32_t var_reci_chn_3;
   int8_t support_rotation;
   uint32_t related_input_rank;
   uint32_t max_src_image_size;
diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h
index b3cf19a4..8da856e1 100644
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -36,75 +36,66 @@ class GE_FUNC_VISIBILITY ModelHelper {
   Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file,
                        ge::ModelBufferData &model);
   Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param,
-                           const std::string &output_file, ModelBufferData &model, bool is_unknown_shape);
+                           const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape);
   Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file);
   Status LoadModel(const ge::ModelData &model_data);
   Status LoadRootModel(const ge::ModelData &model_data);
-  Status GetModelBufferData(ge::ModelBufferData &model);
-
-  const ModelFileHeader *GetFileHeader() const {
-    return file_header_;
-  }
 
   GeModelPtr GetGeModel();
   GeRootModelPtr GetGeRootModel();
-  void SetSaveMode(bool val) {
+  void SetSaveMode(const bool val) {
     is_offline_ = val;
   }
-  bool GetSaveMode(void) const {
-    return is_offline_;
-  }
+
   bool GetModelType() const {
     return is_unknown_shape_model_;
-  };
+  }
 
-  Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name);
-  Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name);
+  Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name) const;
+  Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name) const;
 
  private:
   bool is_assign_model_ = false;
   bool is_offline_ = true;
   bool is_unknown_shape_model_ = false;
   ModelFileHeader *file_header_ = nullptr;
-  // Encrypted model need delete temp model and unencrypted model need not delete model
-  uint8_t *model_addr_tmp_ = nullptr;
-  uint32_t model_len_tmp_ = 0;
   GeModelPtr model_;
   GeRootModelPtr root_model_;
 
-  ModelHelper(const ModelHelper &);
-  ModelHelper &operator=(const ModelHelper &);
+  ModelHelper(const ModelHelper &) = default;
+  ModelHelper &operator=(const ModelHelper &) = default;
   Status GenerateGeModel(OmFileLoadHelper &om_load_helper);
   Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper);
   Status LoadModelData(OmFileLoadHelper &om_load_helper);
-  void SetModelToGeModel(GeModelPtr &ge_model, Model &model);
-  Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
+  void SetModelToGeModel(GeModelPtr &ge_model, Model &model) const;
+  Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
   Status LoadWeights(OmFileLoadHelper &om_load_helper);
-  Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
+  Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
   Status LoadTask(OmFileLoadHelper &om_load_helper);
-  Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
+  Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
   Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper);
-  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
+  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
   Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper);
-  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index);
-  Status ReleaseLocalModelData() noexcept;
-  Status SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper, ModelPartitionType type,
-                            const uint8_t *data, size_t size, size_t model_index);
+  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model,
+                                  const size_t mode_index) const;
+
+  Status SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper, const ModelPartitionType type,
+                            const uint8_t *const data, const size_t size, const size_t model_index) const;
   Status SaveModelDef(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                      Buffer &model_buffer, size_t model_index = 0);
-  Status SaveSizeToModelDef(const GeModelPtr &ge_model);
+                      Buffer &model_buffer, const size_t model_index = 0U) const;
+  Status SaveSizeToModelDef(const GeModelPtr &ge_model) const;
   Status SaveModelWeights(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                          size_t model_index = 0);
+                          const size_t model_index = 0U) const;
   Status SaveModelTbeKernel(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                            size_t model_index = 0);
+                            const size_t model_index = 0U) const;
   Status SaveModelCustAICPU(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                            size_t model_index = 0);
+                            const size_t model_index = 0U) const;
   Status SaveModelTaskDef(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                          Buffer &task_buffer, size_t model_index = 0);
+                          Buffer &task_buffer, const size_t model_index = 0U) const;
   Status SaveModelHeader(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                         size_t model_num = 1);
+                         const size_t model_num = 1U) const;
   Status SaveAllModelPartiton(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
-                              Buffer &model_buffer, Buffer &task_buffer, size_t model_index = 0);
+                              Buffer &model_buffer, Buffer &task_buffer, const size_t model_index = 0U) const;
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_
diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h
index cc4e0fcf..27415df8 100644
--- a/inc/framework/common/op/attr_value_util.h
+++ b/inc/framework/common/op/attr_value_util.h
@@ -165,10 +165,12 @@ GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValu
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr);
 
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr);
-GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, int32_t *value,
+                                            const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, uint32_t *value,
+                                            const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, float *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, double *value, const AttrDefMap &attr);
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h
index 1d12c05b..8b28258a 100644
--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -65,7 +65,7 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT;
 
-GE_FUNC_VISIBILITY extern const int NORMAL_TENSOR_SIZE;
+GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE;
 
 class GE_FUNC_VISIBILITY OpUtils {
  public:
@@ -107,9 +107,10 @@ class GE_FUNC_VISIBILITY OpUtils {
   static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector<int64_t> &input_dims,
                                              const std::vector<int64_t> &begin, const std::vector<int64_t> &output_dims,
                                              ge::GeTensor *output, const std::vector<int64_t> &stride);
-  static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type, std::vector<int64_t> &input_dims,
-                                   std::vector<int64_t> &begin, std::vector<int64_t> &output_dims, ge::GeTensor *output,
-                                   std::vector<int64_t> &stride);
+  static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type,
+                                   const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
+                                   const std::vector<int64_t> &output_dims, ge::GeTensor *const output,
+                                   const std::vector<int64_t> &stride);
 
   ///
   /// @ingroup domi_omg
@@ -121,7 +122,7 @@ class GE_FUNC_VISIBILITY OpUtils {
   /// @param [in] K value of K dimension
   /// @param [out] output Data pointer after conversion. The format is KCHW.
   ///
-  static void TransDataHWCK2KCHW(const void *input, int64_t H, int64_t W, int64_t C, int64_t K, void **output);
+  static void TransDataHWCK2KCHW(const void *input, int64_t h, int64_t w, int64_t c, int64_t k, void **output);
   ///
   /// @ingroup domi_omg
   /// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k].
@@ -132,15 +133,16 @@ class GE_FUNC_VISIBILITY OpUtils {
   /// @param [in] W value of W dimension
   /// @param [out] output Data pointer after conversion. The format is HWCK
   ///
-  static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output);
+  static void TransDataKCHW2HWCK(const void *input, int64_t k, int64_t c, int64_t h, int64_t w, void *output);
 
   static std::vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
   static std::vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
   static std::vector<GeTensorPtr> MutableWeights(const ge::Node &node);
   static std::vector<GeTensorPtr> MutableWeights(const ge::NodePtr node);
   static Status SetWeights(ge::Node &node, const std::vector<ge::GeTensorPtr> &weights);
-  static Status SetWeights(ge::NodePtr node, const std::vector<ge::GeTensorPtr> &weights);
-  static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector<int64_t> &dims);
+  static Status SetWeights(const ge::NodePtr node, const std::vector<ge::GeTensorPtr> &weights);
+  static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type,
+                                            std::vector<int64_t> &dims);
 
  private:
   static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc);
diff --git a/inc/framework/common/op/op_parser_util.h b/inc/framework/common/op/op_parser_util.h
index 12ce6980..6d33c508 100644
--- a/inc/framework/common/op/op_parser_util.h
+++ b/inc/framework/common/op/op_parser_util.h
@@ -30,7 +30,7 @@ const uint32_t NORMAL_OUTPUT_NUM = 1;
 const uint32_t NORMAL_WORKSPACE_NUM = 0;
 const int32_t NORMAL_1D_DIM_NUM = 1;
 const int32_t NORMAL_SCALE_DIM_NUM = 0;
-const int NORMAL_TENSOR_SIZE = 4;
+const int32_t NORMAL_TENSOR_SIZE = 4;
 const uint32_t DEFAULT_REAL_DIM_CNT = 4;
 
 // const
@@ -111,8 +111,8 @@ const int32_t ROIPOOLING_DEFAULT_SAMPLING_RATIO = -1;
 const int32_t DETECTIONOUTPUT_INPUT_SIZE = 3;
 const int32_t DETECTIONOUTPUT_OUTPUT_SIZE = 2;
 const int32_t DETECTIONOUTPUT_WORKSPACE_NUM = 1;
-const int DETECTIONOUTPUT_CLASS_NUM = 20;  // Number of background categories
-const int DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21;
+const int32_t DETECTIONOUTPUT_CLASS_NUM = 20;  // Number of background categories
+const int32_t DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21;
 const float DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
 const float DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.8;
 
@@ -128,8 +128,8 @@ const float PROPOSAL_SCALE_DIM_0_DEFAULT_VALUE = 8;
 const float PROPOSAL_SCALE_DIM_1_DEFAULT_VALUE = 16;
 const float PROPOSAL_SCALE_DIM_2_DEFAULT_VALUE = 32;
 const float PROPOSAL_MIN_SIZE_DEFAULT_VALUE = 16;
-const int PROPOSAL_PRE_NMS_TOPN_DEFAULT_VALUE = 6000;
-const int PROPOSAL_POST_NMS_TOPN_DEFAULT_VALUE = 304;
+const int32_t PROPOSAL_PRE_NMS_TOPN_DEFAULT_VALUE = 6000;
+const int32_t PROPOSAL_POST_NMS_TOPN_DEFAULT_VALUE = 304;
 const float PROPOSAL_NMS_THRESH_DEFAULT_VALUE = 0.7;
 const float PROPOSAL_FILTER_THRESH_DEFAULT_VALUE = 0;
 
@@ -150,7 +150,7 @@ const int32_t PERMUTE_WORKSPACE_NUM = 1;
 const int32_t PERMUTE_ORDER_NUM = 4;
 
 // Ssd normalize
-const int SSD_NORMALIZE_INPUT_SIZE = 1;
+const int32_t SSD_NORMALIZE_INPUT_SIZE = 1;
 const float SSD_NORMALIZE_EPS_DEFAULT_VALUE = 2e-7;
 
 // SsdPriroBox
@@ -163,9 +163,9 @@ const double SSD_PRIORBOX_VARIANCE_VALUE = 0.1;
 const double SSD_PRIORBOX_VARIANCE_SIZE_ONE = 1;
 const double SSD_PRIORBOX_VARIANCE_SIZE_FOUR = 4;
 const double SSD_PRIORBOX_ASPECT_RATIO_VALUE = 1.0;
-const int SSD_PRIOR_BOX_CODETYPE_CORNER_VALUE = 1;
-const int SSD_PRIOR_BOX_CODETYPE_CENTER_SIZE_VALUE = 2;
-const int SSD_PRIOR_BOX_CODETYPE_CORNER_SIZE_VALUE = 3;
+const int32_t SSD_PRIOR_BOX_CODETYPE_CORNER_VALUE = 1;
+const int32_t SSD_PRIOR_BOX_CODETYPE_CENTER_SIZE_VALUE = 2;
+const int32_t SSD_PRIOR_BOX_CODETYPE_CORNER_SIZE_VALUE = 3;
 
 // Ssd DetectionOutput
 const int32_t SSD_DETECTIONOUTPUT_INPUT_SIZE = 3;
@@ -205,8 +205,8 @@ const int32_t CHANNEL_AXPY_INPUT_DIM_SIZE = 4;
 const int32_t CHANNEL_AXPY_WORKSPACE_NUM = 1;
 
 // Psroi pooling
-const int PSROI_POOLING_INPUT_COUNT = 2;
-const int PSROI_POOLING_WORKSPACE_NUM = 1;
+const int32_t PSROI_POOLING_INPUT_COUNT = 2;
+const int32_t PSROI_POOLING_WORKSPACE_NUM = 1;
 
 // MaxPoolWithArgmax
 const uint32_t MAX_POOL_WITH_ARGMAX_OUTPUT_NUM = 2;
@@ -223,7 +223,7 @@ const int32_t ROIALIGN_DEFAULT_POOLED_W = 1;
 
 // Correlation
 const uint32_t CORRELATION_INPUT_NUM = 2;
-const int CORRELATION_WORKSPACE_NUM = 1;
+const int32_t CORRELATION_WORKSPACE_NUM = 1;
 
 // Detectionpostprocess
 const int32_t POSTPROCESS_INPUT_SIZE = 4;
@@ -394,15 +394,15 @@ const uint32_t ATTENTION_DECODER_WORKSPACE_NUM = 1;
 const uint32_t ATTENTION_DECODER_INPUT_DECODER_INPUTS = 0;
 const uint32_t ATTENTION_DECODER_INPUT_DECODER_INITIAL_HIDDEN = 1;
 
-const int ATTENTION_DECODER_ALGO_NORMAL = 0;
-const int ATTENTION_DECODER_SYMBOLS = 10000;
-const int ATTENTION_DECODER_EMBEDDING_SIZE = 128;
-const int ATTENTION_DECODER_ATTENTION_NUM_HIDDEN = 256;
-const int ATTENTION_DECODER_DECODER_NUM_HIDDEN = 128;
-const int ATTENTION_DECODER_DECODER_NUM_LAYERS = 2;
-const int ATTENTION_DECODER_RNN_UNBIDIRECTIONAL = 0;
-const int ATTENTION_DECODER_SEQLEN_VALUE = 57;
-const int ATTENTION_DECODER_GRU = 3;
+const int32_t ATTENTION_DECODER_ALGO_NORMAL = 0;
+const int32_t ATTENTION_DECODER_SYMBOLS = 10000;
+const int32_t ATTENTION_DECODER_EMBEDDING_SIZE = 128;
+const int32_t ATTENTION_DECODER_ATTENTION_NUM_HIDDEN = 256;
+const int32_t ATTENTION_DECODER_DECODER_NUM_HIDDEN = 128;
+const int32_t ATTENTION_DECODER_DECODER_NUM_LAYERS = 2;
+const int32_t ATTENTION_DECODER_RNN_UNBIDIRECTIONAL = 0;
+const int32_t ATTENTION_DECODER_SEQLEN_VALUE = 57;
+const int32_t ATTENTION_DECODER_GRU = 3;
 
 // Logicaland
 const int32_t LOGICAL_AND_INPUT_NUM = 2;
diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h
index 4d808e2e..21f09ffd 100644
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -45,18 +45,19 @@ class GE_FUNC_VISIBILITY StringUtils {
  public:
   static std::string &Ltrim(std::string &s) {
 #if __cplusplus >= 201103L
-    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return std::isspace(c) == 0; }));
+    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int32_t c) { return std::isspace(c) == 0; }));
 #else
-    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
+    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))));
 #endif
     return s;
   }
   // lint -esym(551,*)
   static std::string &Rtrim(std::string &s) { /*lint !e618*/
 #if __cplusplus >= 201103L
-    (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](int c) { return std::isspace(c) == 0; }).base(), s.end());
+    (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](int32_t c) { return std::isspace(c) == 0; }).base(), s.end());
 #else
-    (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
+    (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))).base(),
+                  s.end());
 #endif
     return s;
   }
diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h
index 1417677c..331bf486 100644
--- a/inc/framework/common/taskdown_common.h
+++ b/inc/framework/common/taskdown_common.h
@@ -21,7 +21,7 @@
 
 namespace ge {
 
-const int CC_FUSION_OP_MAX = 32;
+const int32_t CC_FUSION_OP_MAX = 32;
 
 typedef enum tagCcStatus {
   CC_STATUS_SUCCESS = 0,         /**< succ */
diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h
index 053a3423..e5681ba6 100644
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -318,6 +318,9 @@ REGISTER_OPTYPE_DECLARE(INITDATA, "InitData");
 REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape")
 REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity");
 REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast");
+REGISTER_OPTYPE_DECLARE(GATHERSHAPES, "GatherShapes");
+REGISTER_OPTYPE_DECLARE(FLATTENV2, "FlattenV2");
+REGISTER_OPTYPE_DECLARE(FILECONSTANT, "FileConstant");
 
 // ANN dedicated operator
 REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean");
diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h
index c854e016..fac7e4ca 100644
--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -235,7 +235,7 @@ using google::protobuf::Message;
 /// @return true success
 /// @return false fail
 ///
-GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *proto);
+GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int32_t size, Message *proto);
 
 ///
 /// @ingroup domi_proto
@@ -264,7 +264,7 @@ GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file);
 /// @return false fail
 /// @return true success
 ///
-GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length);
+GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *const file_name, char **buffer, int32_t &length);
 
 GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);
 
@@ -275,7 +275,7 @@ GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vect
 /// @return 0 success
 /// @return -1 fail
 ///
-GE_FUNC_VISIBILITY extern int CreateDirectory(const std::string &directory_path);
+GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path);
 
 ///
 /// @ingroup domi_common
@@ -398,10 +398,10 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const
 ///
 /// @ingroup domi_common
 /// @brief Check whether the file path meets the whitelist verification requirements.
-/// @param [in] filePath file path
+/// @param [in] str file path
 /// @param [out] result
 ///
-GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::string &mode);
+GE_FUNC_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode);
 
 ///
 /// @ingroup domi_common
diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h
index b746dbac..b49fa53b 100644
--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -90,6 +90,10 @@ class GE_FUNC_VISIBILITY GeGenerator {
   Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
                             const std::vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
                             ModelBufferData &model_buff);
+  Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                            const std::vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
+                            ModelBufferData &model_buff, GraphStage graph_stage, ComputeGraphPtr &compute_graph);
+
   ///
   /// @ingroup ge
   /// @brief: Build single Op into model buff.
@@ -101,13 +105,19 @@ class GE_FUNC_VISIBILITY GeGenerator {
   /// @return SUCCESS or FAILED
   Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
                             Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type);
+  Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
+                                const std::vector<GeTensor> &outputs, const std::string &model_file_name,
+                                bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph,
+                                ComputeGraphPtr &compute_graph, bool &fuzz_compile_flag,
+                                std::vector<std::pair<std::string, std::string>> &inputs_name_type);
 
  private:
   Status GenerateModel(const Graph &graph, const std::string &file_name_prefix, const std::vector<GeTensor> &inputs,
                        ge::ModelBufferData &model, bool is_offline = true);
   Status BuildSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, const std::vector<GeTensor> &outputs,
                        const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
-                       bool is_offline = true, int32_t compile_flag = 0);
+                       ComputeGraphPtr &compute_graph, bool is_offline = true, int32_t compile_flag = 0,
+                       GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED);
   bool CheckNoAicore(const ComputeGraphPtr &graph);
   void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);
   Status CheckForSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h
index b6ebd8a8..a0415d73 100644
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -25,8 +25,6 @@
 #include "graph/ge_tensor.h"
 #include "graph/graph.h"
 #include "graph/op_desc.h"
-#include "graph/operator.h"
-#include "graph/range_vistor.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/op_desc_utils.h"
@@ -54,7 +52,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status Parse(const char *file, ge::Graph &graph) = 0;
+  virtual domi::Status Parse(const char *file, ge::Graph &graph) = 0;
 
   /**
    * @ingroup domi_omg
@@ -66,7 +64,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return FAILED
    * @author
    */
-  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;
+  virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;
 
   /**
    * @ingroup domi_omg
@@ -78,7 +76,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return FAILED
    * @author
    */
-  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
+  virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
 
   /**
    * @ingroup domi_omg
@@ -88,7 +86,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;
+  virtual domi::Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;
 
   /**
    * @ingroup domi_omg
@@ -99,8 +97,8 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
-                                        ge::ComputeGraphPtr &graph) = 0;
+  virtual domi::Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
+                                              ge::ComputeGraphPtr &graph) = 0;
   /**
    * @ingroup domi_omg
    * @brief Convert model files to JSON format
@@ -109,7 +107,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ToJson(const char *model_file, const char *json_file) {
+  virtual domi::Status ToJson(const char *model_file, const char *json_file) {
     return domi::SUCCESS;
   }
 
@@ -121,7 +119,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    */
   virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0;
 
-  virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0;
+  virtual domi::Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0;
 
   /**
    * @ingroup domi_omg
@@ -131,7 +129,7 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
+  virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
     return UNSUPPORTED;
   }
 
@@ -144,8 +142,8 @@ class GE_FUNC_VISIBILITY ModelParser {
    * @return SUCCESS
    * @return Others failed
    */
-  virtual Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
-                                        ge::ComputeGraphPtr &graph) {
+  virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
+                                              ge::ComputeGraphPtr &graph) {
     return UNSUPPORTED;
   }
 };
diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h
index 760b41f5..04731ff3 100644
--- a/inc/framework/omg/parser/op_parser.h
+++ b/inc/framework/omg/parser/op_parser.h
@@ -50,7 +50,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0;
+  virtual domi::Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0;
 
   /**
    * @ingroup domi_omg
@@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0;
+  virtual domi::Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0;
 
   /**
    * @ingroup domi_omg
@@ -70,7 +70,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0;
+  virtual domi::Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0;
 
   /**
    * @ingroup domi_omg
@@ -80,7 +80,7 @@ class GE_FUNC_VISIBILITY OpParser {
    * @return SUCCESS
    * @return FAILED
    */
-  virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) {
+  virtual domi::Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) {
     (void)op_src;
     // Indicates that the op does not provide a value for format
     format = domi::DOMI_TENSOR_RESERVED;
diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h
index 26c9e051..6840da2b 100644
--- a/inc/framework/omg/parser/parser_api.h
+++ b/inc/framework/omg/parser/parser_api.h
@@ -17,7 +17,6 @@
 #ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
 #define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
 
-#include <iostream>
 #include <map>
 #include <string>
 #include "external/ge/ge_api_error_codes.h"
diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h
index 5b505d52..7ae286df 100644
--- a/inc/framework/omg/parser/parser_factory.h
+++ b/inc/framework/omg/parser/parser_factory.h
@@ -63,7 +63,7 @@ class GE_FUNC_VISIBILITY ModelParserFactory {
 
 class GE_FUNC_VISIBILITY ModelParserRegisterar {
  public:
-  ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) {
+  ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN const fun) {
     ModelParserFactory::Instance()->RegisterCreator(type, fun);
   }
   ~ModelParserRegisterar() {}
@@ -115,7 +115,7 @@ class GE_FUNC_VISIBILITY WeightsParserFactory {
 
 class GE_FUNC_VISIBILITY WeightsParserRegisterar {
  public:
-  WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) {
+  WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN const fun) {
     WeightsParserFactory::Instance()->RegisterCreator(type, fun);
   }
   ~WeightsParserRegisterar() {}
diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h
index fd559468..969a94f8 100644
--- a/inc/framework/omg/parser/parser_inner_ctx.h
+++ b/inc/framework/omg/parser/parser_inner_ctx.h
@@ -21,7 +21,6 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
-#include <utility>
 #include <vector>
 #include "external/register/register_fmk_types.h"
 #include "external/register/register_types.h"
diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h
index c231fd41..04f09b14 100644
--- a/inc/framework/omg/parser/weights_parser.h
+++ b/inc/framework/omg/parser/weights_parser.h
@@ -17,13 +17,12 @@
 #ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_
 #define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_
 
+#include "external/register/register_error_codes.h"
 #include "graph/graph.h"
 #include "graph/attr_value.h"
 #include "graph/compute_graph.h"
 #include "graph/ge_tensor.h"
 #include "graph/op_desc.h"
-#include "graph/operator.h"
-#include "graph/range_vistor.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
diff --git a/metadef b/metadef
index fe47d04d..1d99928b 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit fe47d04d75170006fc0d28538dec49a2da426ceb
+Subproject commit 1d99928bfcb02e45acc7db73e3ee57304ff1131a
diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h
deleted file mode 100644
index bc2e415f..00000000
--- a/third_party/fwkacllib/inc/cce/aicpu_engine.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef AICPU_ENGINE_H__
-#define AICPU_ENGINE_H__
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-  AE_STATUS_SUCCESS = 0,
-  AE_STATUS_BAD_PARAM = 1,
-  AE_STATUS_OPEN_SO_FAILED = 2,
-  AE_STATUS_GET_KERNEL_NAME_FAILED = 3,
-  AE_STATUS_INNER_ERROR = 4,
-  AE_STATUS_KERNEL_API_INNER_ERROR = 5,
-  AE_STATUS_END_OF_SEQUENCE = 6,
-  AE_STATUS_DUMP_FAILED = 7,
-  AE_STATUS_TASK_WAIT = 101,
-  AE_STATUS_RESERVED
-} aeStatus_t;
-
-/**
- * @ingroup aicpu engine
- * @brief aeCallInterface:
- *          a interface to call a function in a op kernfel lib
- * @param [in] addr     void *,  should be STR_KERNEL * format
- * @return aeStatus_t
- */
-aeStatus_t aeCallInterface(void *addr);
-
-/**
- * @ingroup aicpu engine
- * @brief aeBatchLoadKernelSo:
- *          a interface to load kernel so
- * @param [in] loadSoNum  load so number
- * @param [in] soPaths    load so paths
- * @param [in] soNames    load so names
- * @return aeStatus_t
- */
-aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // AICPU_ENGINE_H__
diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
deleted file mode 100644
index 8c0c1847..00000000
--- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef AICPU_ENGINE_STRUCT_H__
-#define AICPU_ENGINE_STRUCT_H__
-
-#include "fwk_adpt_struct.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-    The different framwork we adapted for.
-*/
-typedef enum {
-  FMK_KERNEL_TYPE_TF = 0,
-  FMK_KERNEL_TYPE_CF = 10,
-  FMK_KERNEL_TYPE_PT = 20,
-  FMK_KERNEL_TYPE_RESERVED
-} FwkkernelType_t;
-
-#pragma pack(push, 1)
-typedef struct {
-  uint32_t fwkKernelType;  // FwkkernelType_t
-  union {
-    ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
-  } fwkKernelBase;
-} STR_FWK_OP_KERNEL;
-#pragma pack(pop)
-
-#pragma pack(push, 1)
-struct SessionInfo {
-  uint64_t sessionId;
-  uint64_t kernelId;
-  bool sessFlag;
-};
-#pragma pack(pop)
-
-#ifdef __cplusplus
-}
-#endif
-#endif  // AICPU_ENGINE_STRUCT_H__
diff --git a/third_party/fwkacllib/inc/cce/blas_struct.h b/third_party/fwkacllib/inc/cce/blas_struct.h
deleted file mode 100644
index e0bcee4c..00000000
--- a/third_party/fwkacllib/inc/cce/blas_struct.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CC_BLAS_STRUCT_API__
-#define CC_BLAS_STRUCT_API__
-
-#include <stdint.h>
-
-typedef enum { CCBLAS_FILL_MODE_LOWER = 0, CCBLAS_FILL_MODE_UPPER = 1 } ccblasFillMode_t;
-
-typedef enum {
-  CCBLAS_OP_N = 0,
-  CCBLAS_OP_T = 1,
-} ccblasOperation_t;
-
-typedef enum { CCBLAS_DIAG_NON_UNIT = 0, CCBLAS_DIAG_UNIT = 1 } ccblasDiagType_t;
-
-#endif  // CC_BLAS_STRUCT_API__
diff --git a/third_party/fwkacllib/inc/cce/cce.h b/third_party/fwkacllib/inc/cce/cce.h
deleted file mode 100644
index 0cd9613a..00000000
--- a/third_party/fwkacllib/inc/cce/cce.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CCE_H__
-#define CCE_H__
-
-#include <stdint.h>
-#include "cce_def.hpp"
-
-namespace cce {
-
-/**
- * @ingroup cce
- * @brief create cc handler
- * @param [in|out] handle   point of cc handler
- * @return ccStatus_t
- */
-ccStatus_t ccCreate(ccHandle_t *handle);
-
-/**
- * @ingroup cce
- * @brief destroy cc handler
- * @param [in] *handle   cc handler
- * @return ccStatus_t
- */
-ccStatus_t ccDestroy(ccHandle_t *handle);
-
-/**
- * @ingroup cce
- * @brief bind stream with specified cc handler
- * @param [in] handle   cc handler
- * @param [in] streamId   stream
- * @return ccStatus_t
- */
-ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t streamId);
-
-/**
- * @ingroup cce
- * @brief get the stream from cc handler
- * @param [in] handle   cc handler
- * @param [in|out] streamId   point of stream
- * @return ccStatus_t
- */
-ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *streamId);
-
-/**
- * @ingroup cce
- * @brief get the stream from cc handler
- * @param [in] dataTypeTransMode   mode of data type transform
- * @param [in] inputData   input data point
- * @param [in] inputDataSize   input data size
- * @param [in|out] outputData   output data point
- * @param [in] outputDataSize   output data size
- * @return ccStatus_t
- */
-ccStatus_t ccTransDataType(ccDataTypeTransMode_t dataTypeTransMode, const void *inputData, uint32_t inputDataSize,
-                           void *outputData, const uint32_t outputDataSize);
-/**
- * @ingroup cce
- * @brief cce sys init func
- */
-void cceSysInit();
-
-/**
- * @ingroup cce
- * @brief cce Log Start up func
- */
-void cceLogStartup();
-
-/**
- * @ingroup cce
- * @brief cce Log Shut down func
- */
-void cceLogShutdown();
-
-/**
- * @ingroup cce
- * @brief set the profiling on or off
- * @param [in] const unsigned char* target: The engine gets it from ENV. Don't need care about it.
- * @param const char* job_ctx: identifies profiling job
- * @param [in] uint32_t flag: value: 0, on ; 1, off.
- * @return ccStatus_t value: 0, success; 1, fail.
- */
-ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag);
-
-};  // namespace cce
-
-#endif  // CCE_H__
diff --git a/third_party/fwkacllib/inc/cce/cce_def.hpp b/third_party/fwkacllib/inc/cce/cce_def.hpp
deleted file mode 100644
index 7b1a1b8a..00000000
--- a/third_party/fwkacllib/inc/cce/cce_def.hpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CCE_DEF_H__
-#define CCE_DEF_H__
-
-#include "runtime/rt.h"
-
-namespace cce {
-
-/**
- * @ingroup cce
- * @brief memory configure for fusion
- */
-typedef struct TagCceFusionMemCfg {
-  uint64_t memAddr;        /**< memAddr */
-  uint32_t memSize;        /**< memSize */
-  uint32_t addrChangeFlag; /**< op data addr change flag. value:0,valid;1,not valid */
-  uint32_t poolFlag;       /**< mempool flag : value:0,is valid; value: 1, not valid */
-  TagCceFusionMemCfg() {
-    memAddr = 0;
-    memSize = 0;
-    addrChangeFlag = 0;
-    poolFlag = 0;
-  }
-} CceFusionMemCfg_t;
-/**
- * @ingroup cce
- * @brief return value
- */
-typedef enum tagCcStatus {
-  CC_STATUS_SUCCESS = 0,         /**< succ */
-  CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
-  CC_STATUS_ALLOC_FAILED = 2,    /**< alloc mem failed */
-  CC_STATUS_BAD_PARAM = 3,       /**< para check failed */
-  CC_STATUS_INTERNAL_ERROR = 4,  /**< internal error */
-  CC_STATUS_KERNEL_ERROR = 5,    /**< kernel error */
-  CC_STATUS_RUNTIME_ERROR = 6,   /**< runtime error */
-  CC_STATUS_NOT_SUPPORTED = 7,   /**< unsupport error */
-  CC_STATUS_INVALID_VALUE = 7,   /**< invalid value error for blas*/
-  CC_STATUS_RESERVED             /**< just for check */
-} ccStatus_t;
-
-/**
- * @ingroup cce
- * @brief original data type
- */
-typedef enum tagCcDataType {
-  CC_DATA_FLOAT = 0,            /**< float type */
-  CC_DATA_HALF,                 /**< fp16 type */
-  CC_DATA_INT8,                 /**< int8 type */
-  CC_DATA_INT32,                /**< int32 type */
-  CC_DATA_UINT8,                /**< uint8 type */
-  CC_DATA_HALF_UINT16_PROPOSAL, /**<mixed type for proposal*/
-  CC_DATA_INT16,                /**< int16 type */
-  CC_DATA_UINT16,               /**< uint16 type */
-  CC_DATA_UINT32,               /**< uint32 type */
-  CC_DATA_INT64,                /**< int64 type */
-  CC_DATA_UINT64,               /**< uint64 type */
-  CC_DATA_DOUBLE,               /**< double type */
-  CC_DATA_BOOL,                 /**< bool type */
-  CC_DATA_DUAL,                 /**< dual output type */
-  CC_DATA_DUAL_SUB_INT8,        /**< dual output int8 type */
-  CC_DATA_DUAL_SUB_UINT8,       /**< dual output uint8 type */
-  CC_DATA_COMPLEX64,
-  CC_DATA_COMPLEX128,
-  CC_DATA_QINT8,
-  CC_DATA_QINT16,
-  CC_DATA_QINT32,
-  CC_DATA_QUINT8,
-  CC_DATA_QUINT16,
-  CC_DATA_RESERVED
-} ccDataType_t;
-
-/**
- * @ingroup cce
- * @brief save context of cce library
- */
-typedef struct tagCcContext {
-  rtStream_t streamId;
-  uint32_t opIndex;
-} ccContext_t;
-
-typedef struct tagCcContext *ccHandle_t;
-
-/**
- * @ingroup cce
- * @brief mode of data type transform
- */
-typedef enum tagCcDataTypeTransMode {
-  CC_DATATYPE_TRANS_FLOAT_NO_TRANS = 0, /**< origin data is float, no trans */
-  CC_DATATYPE_TRANS_FP16_NO_TRANS,      /**< origin data is fp16, no trans */
-  CC_DATATYPE_TRANS_INT8_NO_TRANS,      /**< origin data is int8, no trans */
-  CC_DATATYPE_TRANS_FLOAT_TO_FP16,      /**< data type float trans to fp16 */
-  CC_DATATYPE_TRANS_FP16_TO_FLOAT,      /**< data type fp16 trans to float */
-  CC_DATATYPE_TRANS_FLOAT_TO_INT8,      /**< data type float trans to int8 */
-  CC_DATATYPE_TRANS_INT8_TO_FLOAT,      /**< data type int8 trans to float */
-  CC_DATATYPE_TRANS_UINT8_TO_FLOAT,     /**< data type uint8 trans to float */
-  CC_DATATYPE_TRANS_UINT8_NO_TRANS,     /**< origin data is uint8, no trans */
-  CC_DATATYPE_TRANS_INT32_NO_TRANS,     /**< data type uint8 trans to float */
-  CC_DATATYPE_TRANS_UINT16_NO_TRANS,    /** < origin data is uint16, no trans*/
-  CC_DATATYPE_TRANS_UINT16_TO_FLOAT,    /** < data type uint16 trans to float*/
-  CC_DATATYPE_TRANS_MODE_RESERVED
-} ccDataTypeTransMode_t;
-
-typedef struct tagContextInfo {
-  ccHandle_t handle;
-  rtStream_t stream;
-  uint8_t *memBase;
-  uint64_t totalMemSize;
-  uint8_t *weightsMemBase;
-  uint64_t weightsMemSize;
-  uint8_t *weightsMemBaseHost;
-} ContextInfo;
-
-/**
- * @ingroup cce
- * @brief cce function parameter type
- */
-typedef enum tagCcFuncType {
-  CC_FUSION_L2,
-  GLOBAL_MEMORY_CLEAR,
-  MAX_NUM,
-} ccFuncParamType_t;
-
-/**
- * @ingroup cce
- * @brief cce set function point state
- */
-ccStatus_t ccSetFuncState(ccFuncParamType_t type, bool isOpen);
-
-/**
- * @ingroup cce
- * @brief cce get function point state
- */
-bool ccGetFuncState(ccFuncParamType_t type);
-
-}  // namespace cce
-#endif  // CCE_DEF_H__
diff --git a/third_party/fwkacllib/inc/cce/common/attr_list.hpp b/third_party/fwkacllib/inc/cce/common/attr_list.hpp
deleted file mode 100644
index bf48e9fc..00000000
--- a/third_party/fwkacllib/inc/cce/common/attr_list.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ATTR_LIST_HPP__
-#define ATTR_LIST_HPP__
-
-#include "catch.hpp"
-
-/**
- * @ingroup util
- * @brief frame  Error Value
- */
-#define ATTR_SUCCESS (0)
-#define ATTR_ERROR_NULL_POINT (1)
-#define ATTR_ERROR_ALREADY_EXIST (2)
-#define ATTR_ERROR_NOT_EXIST (3)
-#define ATTR_ERROR_BUFFER_NOT_ENOUGH (4)
-#define ATTR_ERROR_BAD_PARAM (5)
-#define ATTR_ERROR_ALLOC_FAIL (6)
-#define ATTR_ERROR_FREE_FAIL (7)
-#define ATTR_ERROR_RESERVED (8)
-
-struct AttrListPrivate;
-/**
- * @ingroup util
- * @brief attribute list
- */
-class AttrList {
- public:
-  AttrList();
-  AttrList(uint32_t initLen);
-  ~AttrList();
-  AttrList(const AttrList &rhs) = delete;
-  AttrList &operator=(const AttrList &rhs);
-
- public:
-  /**
-   * @ingroup util
-   * @brief add paras
-   * @param [in] attrId   attribute id
-   * @param [in] attrLen   length of attribute
-   * @param [in] attrValue   point to attribute
-   * @return ccStatus_t
-   */
-  uint32_t Add(uint32_t attrId, uint32_t attrLen, const void *attrValue);
-
-  /**
-   * @ingroup util
-   * @brief read paras
-   * @param [in] attrId   attribute id
-   * @param [in] attrLen   point to length of attribute
-   * @param [in] attrValue   reference of point to attribute
-   * @return ccStatus_t
-   */
-  uint32_t Get(uint32_t attrId, uint32_t &attrLen, const void *&attr_value) const;
-
-  /**
-   * @ingroup util
-   * @brief get the length of attribute list
-   * @return length of attribute
-   */
-  uint32_t Length() const;
-
- private:
-  AttrListPrivate *impl_;
-  uint32_t initLen_;
-  uint32_t Init();
-};
-#endif  // ATTR_LIST_HPP__
diff --git a/third_party/fwkacllib/inc/cce/common/catch.hpp b/third_party/fwkacllib/inc/cce/common/catch.hpp
deleted file mode 100644
index c440be53..00000000
--- a/third_party/fwkacllib/inc/cce/common/catch.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CATCH_HPP_
-#define CATCH_HPP_
-
-#include <stdint.h>
-#include <iostream>
-
-#define ERROR_CODE() __catch_error_code
-#define ERROR_LINE_NO() __catch_error_line_no
-#define ERROR_PROC() __catch_error_line_no = __LINE__;
-
-#define PROC                                   \
-  uint32_t __catch_error_code = 0x7FFFFFCC;    \
-  uint32_t __catch_error_line_no = 0xFFFFFFFF; \
-  {
-#define END_PROC \
-  }              \
-  __tabErrorCode:
-#define THROW(errcode)              \
-  {                                 \
-    __catch_error_code = (errcode); \
-    ERROR_PROC();                   \
-    goto __tabErrorCode;            \
-  }
-#define EXEC(func)                                                    \
-  {                                                                   \
-    if (0 != (__catch_error_code = (func))) THROW(__catch_error_code) \
-  }
-#define EXEC_EX1(func, error_code)     \
-  {                                    \
-    if (0 != (func)) THROW(error_code) \
-  }
-#define EXEC_EX(func, succRet, error_code)                          \
-  {                                                                 \
-    if (succRet != (__catch_error_code = (func))) THROW(error_code) \
-  }
-#define ASSERT_EXEC(func, succRet)                                       \
-  {                                                                      \
-    if (succRet != (__catch_error_code = (func))) /*GO_ASSERT_FALSE();*/ \
-      THROW(__catch_error_code)                                          \
-  }                                                                      \
-  }
-#define NEW_ERROR_EXEC(errcode, func, succRet) \
-  {                                            \
-    if (succRet != (func)) {                   \
-      THROW(errcode)                           \
-    }                                          \
-  }
-#define JUDGE(errcode, expr) \
-  {                          \
-    if (!(expr)) {           \
-      THROW(errcode)         \
-    }                        \
-  }
-#define ASSERT_JUDGE(errcode, expr)       \
-  {                                       \
-    if (!(expr)) { /*GO_ASSERT_FALSE();*/ \
-      THROW(errcode)                      \
-    }                                     \
-  }
-#define JUDGE_FALSE(errcode, expr) \
-  {                                \
-    if (expr) {                    \
-      THROW(errcode)               \
-    }                              \
-  }
-#define JUDGE_CONTINUE(expr) \
-  {                          \
-    if (expr) {              \
-      continue;              \
-    }                        \
-  }
-#define CATCH_ERROR(errcode) if (__catch_error_code == (errcode)) {  // ERROR_LOG();
-#define CATCH_ALL_ERROR {
-#define END_CATCH_ERROR }
-#define FINAL \
-  __tabFinal:
-#define END_FINAL /*GO_ASSERT_FALSE()*/ ;
-#define GOTO_FINAL() goto __tabFinal;
-#endif  // CATCH_HPP_
diff --git a/third_party/fwkacllib/inc/cce/compiler_stub.h b/third_party/fwkacllib/inc/cce/compiler_stub.h
deleted file mode 100644
index 00ea467e..00000000
--- a/third_party/fwkacllib/inc/cce/compiler_stub.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef COMPILER_STUB_H__
-#define COMPILER_STUB_H__
-
-namespace cce {
-
-/**
- * @ingroup cce
- * @brief compiler stub init func
- */
-bool compilerStubInit();
-
-/**
- * @ingroup cce
- * @brief compiler stub free func
- */
-bool compilerStubFree();
-
-};  // namespace cce
-
-#endif  // COMPILER_STUB_H__
diff --git a/third_party/fwkacllib/inc/cce/customize.h b/third_party/fwkacllib/inc/cce/customize.h
deleted file mode 100644
index 7dd97af1..00000000
--- a/third_party/fwkacllib/inc/cce/customize.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CC_CUSTOMIZE_API__
-#define CC_CUSTOMIZE_API__
-
-#include <stdint.h>
-
-#define CC_DEVICE_DIM_MAX 8
-typedef enum tagOpTensorFormat
-{
-    OP_TENSOR_FORMAT_NC1HWC0 = 0,
-    OP_TENSOR_FORMAT_ND,
-    OP_TENSOR_FORMAT_RESERVED,
-
-} opTensorFormat_t;
-
-
-typedef enum tagOpDataType
-{
-    OP_DATA_FLOAT = 0,             /**< float type */
-    OP_DATA_HALF,                  /**< fp16 type */
-    OP_DATA_INT8,                  /**< int8 type */
-    OP_DATA_INT32,                 /**< int32 type */
-    OP_DATA_UINT8,                 /**< uint8 type */
-    OP_DATA_HALF_UINT16_PROPOSAL,  /**<mixed type for proposal*/
-    OP_DATA_RESERVED
-} opDataType_t;
-
-typedef struct tagOpTensor
-{
-    // real dim info
-    opTensorFormat_t format;
-    opDataType_t data_type;
-    int32_t dim_cnt;
-    int32_t mm;
-    int32_t dim[CC_DEVICE_DIM_MAX];
-} opTensor_t;
-
-typedef opTensor_t tagCcAICPUTensor;
-typedef void * rtStream_t;
-typedef void (*aicpu_run_func)(opTensor_t **, void **, int32_t,
-                               opTensor_t **, void **, int32_t, void *, rtStream_t);
-
-
-#endif  // CC_CUSTOMIZE_API__
-
diff --git a/third_party/fwkacllib/inc/cce/dnn.h b/third_party/fwkacllib/inc/cce/dnn.h
deleted file mode 100644
index 03ca7d5a..00000000
--- a/third_party/fwkacllib/inc/cce/dnn.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DNN_H__
-#define DNN_H__
-
-#include "cce/dnn_base.h"
-#include "cce/dnn_op.h"
-
-#endif  // DNN_H__
diff --git a/third_party/fwkacllib/inc/cce/dnn_base.h b/third_party/fwkacllib/inc/cce/dnn_base.h
deleted file mode 100644
index 912ba671..00000000
--- a/third_party/fwkacllib/inc/cce/dnn_base.h
+++ /dev/null
@@ -1,676 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DNN_BASE_H__
-#define DNN_BASE_H__
-
-#include "cce/blas_struct.h"
-#include "cce/customize.h"
-#include "cce/dnn_base_def.hpp"
-
-namespace cce {
-/**
-  * @ingroup dnn
-  * @brief Minimum epsilon allowed to be used in the Batch Normalization formula
-  */
-#define CC_BN_MIN_EPSILON               (1e-7)
-
-#ifndef NULL
-    #ifdef __cplusplus
-        #define NULL 0
-    #else
-        #define NULL ((void *)0)
-    #endif
-#endif
-
-/**
-  * @ingroup dnn
-  * @brief max number of dimensions
-  */
-#define CC_DIM_MAX (8)
-
-typedef  struct cCTagL2LossDescriptor *   ccL2LossDescriptor_t;
-
-/**
-  * @ingroup dnn
-  * @brief mode of concatfive2fout
-  */
-typedef enum tagTransForLossMode {
-    CC_TRANS_FOR_BOX = 0,
-    CC_TRANS_FOR_SCORE,
-} ccTransForLossMode_t;
-
-/**
-  * @ingroup dnn
-  * @brief descriptor of concatfive2fout
-  */
-typedef struct tagCcConcatFive2Four_t *ccConcatFive2FourDescriptor_t;
-
-}; /* end cce */
-
-namespace cce {
-
-/**
-  * @ingroup dnn
-  * @brief create descriptor of tensor
-  * @param [in|out] tensorDesc   point to descriptor of tensor
-  * @return ccStatus_t
-  */
-ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensorDesc);
-
-/**
-  * @ingroup dnn
-  * @brief destroy descriptor of tensor
-  * @param [in] *tensorDesc   descriptor of tensor
-  * @return ccStatus_t
-  */
-ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensorDesc);
-
-/**
-  * @ingroup dnn
-  * @brief init tensor to 4d tensor
-  * @param [in|out] tensorDesc   descriptor of tensor
-  * @param [in] format   format of tensor
-  * @param [in] dataType   data type in device
-  * @param [in] n   batch size
-  * @param [in] c   channels
-  * @param [in] h   height of feature map
-  * @param [in] w   width of feature map
-  * @return ccStatus_t
-  */
-ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensorDesc,
-                                   ccTensorFormat_t format,
-                                   ccDataType_t dataType,
-                                   int32_t n,
-                                   int32_t c,
-                                   int32_t h,
-                                   int32_t w);
-
-/**
-  * @ingroup dnn
-  * @brief read 4d tensor
-  * @param [in] tensorDesc   descriptor of tensor
-  * @param [in|out] dataType   point to data type in device
-  * @param [in|out] n   point to batch size
-  * @param [in|out] c   point to channels
-  * @param [in|out] h   point to height of feature map
-  * @param [in|out] w   point to width of feature map
-  * @param [in|out] nStride   point to stride of n
-  * @param [in|out] cStride   point to stride of c
-  * @param [in|out] hStride   point to stride of h
-  * @param [in|out] wStride   point to stride of w
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc,
-                                   ccDataType_t *dataType,
-                                   int32_t *n,
-                                   int32_t *c,
-                                   int32_t *h,
-                                   int32_t *w,
-                                   int32_t *nStride,
-                                   int32_t *cStride,
-                                   int32_t *hStride,
-                                   int32_t *wStride);
-
-/**
-* @ingroup dnn
-* @brief print 4d tensor (just in debug log mode)
-* @param [in] tensorDesc   descriptor of tensor
-* @return ccStatus_t
-*/
-ccStatus_t ccPrintTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc);
-
-/**
-* @ingroup dnn
-* @brief print Nd tensor (just in debug log mode)
-* @param [in] tensorDesc   descriptor of tensor
-* @return ccStatus_t
-*/
-ccStatus_t ccPrintTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc);
-
-/**
-  * @ingroup dnn
-  * @brief init tensor to Nd tensor
-  * @param [in|out] tensorDesc   descriptor of tensor
-  * @param [in] dataType   data type in device
-  * @param [in] dimCnt   Dimension of the tensor
-  * @param [in] dimA   Array of dimension dimCnt that contain the size of the tensor for every dimension. Size along unused dimensions should be set to 1.
-  * @return ccStatus_t
-  */
-ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensorDesc,
-                                   ccDataType_t dataType,
-                                   int32_t dimCnt,
-                                   int32_t dimA[]);
-
-/**
-  * @ingroup dnn
-  * @brief read Nd tensor
-  * @param [in] tensorDesc   descriptor of tensor
-  * @param [in] dimCntReq   point to data type in device
-  * @param [in|out] dataType   point to data type in device
-  * @param [in|out] dimCnt   Dimension of the tensor
-  * @param [in|out] dimA   Array of dimension of at least dimCntReq that will be filled with the dimensions from the provided tensor descriptor.
-  * @param [in|out] strideA   Array of dimension dimCntReq that contain the stride of the tensor for every dimension
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc,
-                                   int32_t dimCntReq,
-                                   ccDataType_t *dataType,
-                                   int32_t *dimCnt,
-                                   int32_t dimA[],
-                                   int32_t strideA[]);
-
-/**
-  * @ingroup dnn
-  * @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0)
-  * @param [in] xDesc   descriptor of input tensor
-  * @param [in] x   point to input data in host memory
-  * @param [in] dataTypeTransmode   mode of data type transform
-  * @param [in] yDesc   descriptor of output tensor
-  * @param [in|out] y   point to output data in host memory
-  * @param [in] ySizeInBytes   size of outputData
-  * @return ccStatus_t
-  */
-ccStatus_t ccTransTensor(const ccTensorDescriptor_t xDesc,
-                         const void *x,
-                         const ccTensorDescriptor_t yDesc,
-                         void *y,
-                         uint32_t ySizeInBytes);
-
-/**
-  * @ingroup dnn
-  * @brief get the format and dimcnt of Tensor
-  * @param [in] tensorDesc   descriptor of tensor
-  * @param [in|out] format   point to format
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensorDesc,
-                             ccTensorFormat_t  *format);
-
-/**
-  * @ingroup dnn
-  * @brief set the format and dimcnt of Tensor
-  * @param [in] tensorDesc   descriptor of tensor
-  * @param [in|out] format   point to format
-  * @return ccStatus_t
-  */
-ccStatus_t ccSetTensorFormat(ccTensorDescriptor_t tensorDesc,
-                             ccTensorFormat_t  format);
-
-
-/**
-  * @ingroup dnn
-  * @brief get the RealDimCnt of Tensor
-  * @param [in] tensorDesc   descriptor of tensor
-  * @param [in|out] RealDimCnt   point to RealDimCnt
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetTensorRealDimCnt(const ccTensorDescriptor_t tensorDesc,
-                                 int32_t *realDimCnt);
-
-/**
-  * @ingroup dnn
-  * @brief set the RealDimCnt of Tensor
-  * @param [in|out] tensorDesc   descriptor of tensor
-  * @param [in] RealDimCnt   RealDimCnt to set
-  * @return ccStatus_t
-  */
-ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensorDesc,
-                                 int32_t realDimCnt);
-
-
-/**
-  * @ingroup dnn
-  * @brief get data size of 4d tensor
-  * @param [in] tensorDesc   descriptor of tensor
-  * @param [in|out] size   point to data size
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);
-
-/**
-* @ingroup dnn
-* @brief get data size of 4d tensor which is align to 32B
-* @param [in] tensorDesc   descriptor of tensor
-* @param [in|out] size   point to data size
-* @return ccStatus_t
-*/
-ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);
-
-
-ccStatus_t ccSetTensorDataSize(ccTensorDescriptor_t xDesc, uint32_t size);
-
-/**
-  * @ingroup dnn
-  * @brief get data size of 4d filter
-  * @param [in] filterDesc   descriptor of filter
-  * @param [in] groupNum number of group
-  * @param [in|out] size   point to data size
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, uint32_t *size);
-
-
-/**
-  * @ingroup dnn
-  * @brief read 4d filter
-  * @param [in] filterDesc   descriptor of filter
-  * @param [in|out] format   point to format of filter
-  * @param [in|out] dataType   point to data type in device
-  * @param [in|out] k   point to number of output feature maps
-  * @param [in|out] c   point to number of input feature maps
-  * @param [in|out] h   point to height of filter
-  * @param [in|out] w   point to width of filter
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetFilter4dDescriptor(const ccFilterDescriptor_t filterDesc,
-                                   ccTensorFormat_t *format,
-                                   ccDataType_t *dataType,
-                                   int32_t *k,
-                                   int32_t *c,
-                                   int32_t *h,
-                                   int32_t *w);
-
-ccStatus_t ccTransFilterFracZToNCHW(const ccFilterDescriptor_t wDesc,
-                                    const void *w,
-                                    ccFilterDescriptor_t yDesc,
-                                    void *y,
-                                    uint32_t ySizeInBytes);
-
-/**
-  * @ingroup dnn
-  * @brief trans weight to fractal format, and trans data type together
-  * @param [in] wDesc   descriptor of input filter
-  * @param [in] w   input data pointer
-  * @param [in] yDesc   descriptor of output filter
-  * @param [in|out] y   output data pointer
-  * @param [in] ySizeInBytes   size of outputData
-  * @return ccStatus_t
-  */
-ccStatus_t ccTransFilter(const ccFilterDescriptor_t wDesc,
-                         const void *w,
-                         const ccFilterDescriptor_t yDesc,
-                         void *y,
-                         uint32_t ySizeInBytes);
-
-/**
-  * @ingroup dnn
-  * @brief trans weight to fractal format, and trans data type together
-  * @param [in] wDesc   descriptor of input filter
-  * @param [in] w   input data pointer
-  * @param [in] dataTypeTransmode   mode of data type transform
-  * @param [in] yDesc   descriptor of output filter
-  * @param [in|out] y   output data pointer
-  * @param [in] ySizeInBytes   size of outputData
-  * @return ccStatus_t
-  */
-ccStatus_t ccTransFilterInt8(const ccFilterDescriptor_t wDesc,
-                         const void *w,
-                         ccFilterDescriptor_t yDesc,
-                         void *y,
-                         uint32_t ySizeInBytes,
-                         ccDataType_t outputDataType);
-
-/**
-  * @ingroup dnn
-  * @brief create descriptor of filter
-  * @param [in|out] filterDesc   point to descriptor of filter
-  * @return ccStatus_t
-  */
-ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filterDesc);
-
-/**
-  * @ingroup dnn
-  * @brief destroy descriptor of filter
-  * @param [in] *filterDesc   descriptor of filter
-  * @return ccStatus_t
-  */
-ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filterDesc);
-
-/**
-  * @ingroup dnn
-  * @brief init conv descriptor to 2d conv
-  * @param [in|out] convDesc   descriptor of convolution operator
-  * @param [in] mode   mode of convolution
-  * @param [in] padMode   mode of padding
-  * @param [in] padHHead   zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
-  * @param [in] padHTail   zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
-  * @param [in] padWHead   zero padding in width head,  if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
-  * @param [in] padWTail   zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
-  * @param [in] strideH   stride in height
-  * @param [in] strideW   stride in width
-  * @param [in] dilationH   dilation in height
-  * @param [in] dilationW   dilation in width
-  * @return ccStatus_t
-  */
-ccStatus_t ccSetConvolution2dDescriptor(ccConvolutionDescriptor_t convDesc,
-                                        ccConvolutionMode_t mode,
-                                        ccPaddingMode_t padMode,
-                                        int32_t padHHead,
-                                        int32_t padHTail,
-                                        int32_t padWHead,
-                                        int32_t padWTail,
-                                        int32_t group,
-                                        int32_t strideH,
-                                        int32_t strideW,
-                                        int32_t dilationH,
-                                        int32_t dilationW);
-
-/**
-  * @ingroup dnn
-  * @brief read 2d conv
-  * @param [in] convDesc   descriptor of convolution operator
-  * @param [in|out] mode   point to mode of convolution
-  * @param [in] padMode   mode of padding
-  * @param [in] padHHead   zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
-  * @param [in] padHTail   zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
-  * @param [in] padWHead   zero padding in width head,  if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
-  * @param [in] padWTail   zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
-  * @param [in|out] strideH   point to stride in height
-  * @param [in|out] strideW   point to stride in width
-  * @param [in|out] dilationH   point to dilation in height
-  * @param [in|out] dilationW   point to dilation in width
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetConvolution2dDescriptor(const ccConvolutionDescriptor_t convDesc,
-                                        ccConvolutionMode_t *mode,
-                                        ccPaddingMode_t *padMode,
-                                        int32_t *padHHead,
-                                        int32_t *padHTail,
-                                        int32_t *padWHead,
-                                        int32_t *padWTail,
-                                        int32_t *group,
-                                        int32_t *strideH,
-                                        int32_t *strideW,
-                                        int32_t *dilationH,
-                                        int32_t *dilationW);
-
-/**
-  * @ingroup dnn
-  * @brief get the output dimension info of 2d convolution
-  * @param [in] convDesc   descriptor of convolution operator
-  * @param [in] xDesc   descriptor of input tensor
-  * @param [in] wDesc   descriptor of filter
-  * @param [in|out] n   point to batch size
-  * @param [in|out] c   point to channels
-  * @param [in|out] h   point to height of feature map
-  * @param [in|out] w   point to width of feature map
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetConvolution2dForwardOutputDim(const ccConvolutionDescriptor_t  convDesc,
-                                              const ccTensorDescriptor_t xDesc,
-                                              const ccFilterDescriptor_t wDesc,
-                                              int32_t *n,
-                                              int32_t *c,
-                                              int32_t *h,
-                                              int32_t *w);
-
-/**
-  * @ingroup dnn
-  * @brief create descriptor of convolution operator
-  * @param [in|out] filterDesc   point to descriptor of convolution operator
-  * @return ccStatus_t
-  */
-ccStatus_t ccCreateConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);
-
-/**
-  * @ingroup dnn
-  * @brief destroy descriptor of convolution operator
-  * @param [in] *convDesc   descriptor of convolution operator
-  * @return ccStatus_t
-  */
-ccStatus_t ccDestroyConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);
-
-/**
-  * @ingroup dnn
-  * @brief check specific stride condition flag
-  * @param [in] deconvDesc   descriptor of Deconvolution operator
-  * @param [in] xDesc   descriptor of input tensor
-  * @param [in] yDesc   descriptor of output tensor
-  * @param [in] biasDesc   descriptor of bias tensor
-  * @param [in] wDesc   descriptor of filter
-  * @param [in|out] transMark   output condition flag
-  * @return ccStatus_t
-  */
-ccStatus_t ccDeconvSpStrideCondCheck(const ccConvolutionDescriptor_t deconvDesc,
-                                     const ccTensorDescriptor_t xDesc,
-                                     const ccTensorDescriptor_t yDesc,
-                                     const ccTensorDescriptor_t biasDesc,
-                                     const ccFilterDescriptor_t wDesc,
-                                     uint32_t &transMark);
-
-/**
-  * @ingroup dnn
-  * @brief special deconv stride trans
-  * @param [in] deconvDesc   descriptor of Deconvolution operator
-  * @param [in] xDesc   descriptor of input tensor
-  * @param [in] yDesc   descriptor of output tensor
-  * @param [in] biasDesc   descriptor of bias tensor
-  * @param [in] deconvStPtr   descriptor of filter
-  * @param [in|out] xStPtr   descriptor of trans input tensor
-  * @param [in|out] yStPtr   descriptor of trans output tensor
-  * @param [in|out] wStPtr   descriptor of trans filter tensor
-  * @param [in|out] wDesc   descriptor of trasn filter
-  * @param [in|out] transMark   condition flag
-  * @return ccStatus_t
-  */
-ccStatus_t ccDeconvSpStrideDescTrans(const ccConvolutionDescriptor_t deconvDesc,
-                          const ccTensorDescriptor_t xDesc,
-                          const ccTensorDescriptor_t yDesc,
-                          const ccTensorDescriptor_t biasDesc __attribute__((__unused__)),
-                          const ccFilterDescriptor_t wDesc,
-                          ccConvolutionDescriptor_t deconvStPtr,
-                          ccTensorDescriptor_t xStPtr,
-                          ccTensorDescriptor_t yStPtr,
-                          ccFilterDescriptor_t wStPtr,
-                          uint32_t transMark);
-
-/**
-  * @ingroup dnn
-  * @brief check deconv goto aicore flag
-  * @param [in] deconvDesc   descriptor of Deconvolution operator
-  * @param [in] xDesc   descriptor of input tensor
-  * @param [in] yDesc   descriptor of output tensor
-  * @param [in] wDesc   descriptor of filter
-  * @param [in] isGotoAicore  out flag
-  * @param [in] transMark   condition flag
-  * @return ccStatus_t
-  */
-ccStatus_t ccDeconvCheckGotoAiCore(const ccConvolutionDescriptor_t deconvDesc,
-                           const ccTensorDescriptor_t xDesc,
-                           const ccTensorDescriptor_t yDesc,
-                           const ccFilterDescriptor_t wDesc,
-                           uint32_t *isGotoAicore,
-                           uint32_t transMark);
-
-/**
-  * @ingroup dnn
-  * @brief get the output dimension info of 2d Deconvolution
-  * @param [in] deconvDesc   descriptor of Deconvolution operator
-  * @param [in] xDesc   descriptor of input tensor
-  * @param [in] wDesc   descriptor of filter
-  * @param [in|out] n   point to batch size
-  * @param [in|out] c   point to channels
-  * @param [in|out] h   point to height of feature map
-  * @param [in|out] w   point to width of feature map
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetDeconvolution2dForwardOutputDim(const ccConvolutionDescriptor_t deconvDesc,
-                                                const ccTensorDescriptor_t xDesc,
-                                                const ccFilterDescriptor_t wDesc,
-                                                int32_t *n,
-                                                int32_t *c,
-                                                int32_t *h,
-                                                int32_t *w);
-
-/**
-  * @ingroup dnn
-  * @brief create descriptor of PAD
-  * @param [in|out] padDesc  point to descriptor of pad
-  * @return ccStatus_t
-  */
-ccStatus_t ccCreatePadDescriptor(ccPadDescriptor_t *padDesc);
-
-/**
-  * @ingroup dnn
-  * @brief destroy descriptor of PAD
-  * @param [in] *padDesc descriptor of PAD
-  * @return ccStatus_t
-  */
-ccStatus_t ccDestroyPadDescriptor(ccPadDescriptor_t *padDesc);
-
-/**
-  * @ingroup dnn
-  * @brief set PADDesc
-  * @param [in|out] padDesc descriptor of PAD
-  * @param [in] padMode  mode of PAD
-  * @param [in] padValue  pad value of PAD
-  * @param [in] wleft width left pad of PAD
-  * @param [in] wright width right of PAD
-  * @param [in] htop higth pad of PAD
-  * @param [in] hbottom higth bottom pad of PAD
-  * @return ccStatus_t
-  */
-ccStatus_t ccSetPadDescriptor(ccPadDescriptor_t padDesc,
-                                    ccPadMode_t  padMode,
-                                    float padValue,
-                                    int32_t htop,
-                                    int32_t hbottom,
-                                    int32_t wleft,
-                                    int32_t wright);
-
-/**
-  * @ingroup dnn
-  * @brief read 2d pooling
-  * @param [in] poolingDesc   descriptor of pooling operator
-  * @param [in|out] mode   point to mode of pooling
-  * @param [in|out] maxpoolingNanOpt   point to Nan propagation mode
-  * @param [in|out] windowH   point to height of pooling window
-  * @param [in|out] windowW   point to width of pooling window
-  * @param [in|out] padHHead   point to zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
-  * @param [in|out] padHTail   point to zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
-  * @param [in|out] padWHead   point to zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
-  * @param [in|out] padWTail   point to zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
-  * @param [in|out] strideH   point to stride in height
-  * @param [in|out] strideW   point to stride in width
-  * @param [in|out] dataMode
-  * @param [in|out] ceilMode  0:Ceil 1:Floor
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetPooling2dDescriptor(const ccPoolingDescriptor_t poolingDesc,
-                                    ccPoolingMode_t *mode,
-                                    ccPaddingMode_t *padMode,
-                                    ccNanPropagation_t *maxpoolingNanOpt,
-                                    int32_t *windowH,
-                                    int32_t *windowW,
-                                    int32_t *padHHead,
-                                    int32_t *padHTail,
-                                    int32_t *padWHead,
-                                    int32_t *padWTail,
-                                    int32_t *strideH,
-                                    int32_t *strideW,
-                                    int32_t *dataMode,
-                                    int32_t *ceilMode,
-                                    ccPooingFwdAlgo_t *algo);
-
-ccStatus_t ccGetCompare5dOutputDim(const ccTensorDescriptor_t xDesc,
-                                   const ccTensorDescriptor_t yDesc,
-                                   int32_t* dimCnt,
-                                   int32_t* dim,
-                                   int32_t dimLen);
-
-ccStatus_t ccGetMaximum5dOutputDim(const ccTensorDescriptor_t xDesc,
-                                   const ccTensorDescriptor_t yDesc,
-                                   int32_t* dimCnt,
-                                   int32_t* dim,
-                                   int32_t dimLen);
-
-ccStatus_t ccGetMinimum5dOutputDim(const ccTensorDescriptor_t xDesc,
-                                   const ccTensorDescriptor_t yDesc,
-                                   int32_t* dimCnt,
-                                   int32_t* dim,
-                                   int32_t dimLen);
-
-ccStatus_t ccGetReduce5dOutputDim(const ccTensorDescriptor_t xDesc,
-                                const ccIntArray_t* axis,
-                                bool keepDims,
-                                int32_t *dimCnt,
-                                int32_t dim[],
-                                int32_t dimLen);
-
-/**
- * @brief get out put descrition of slice tensor.
- * @param [in] xDesc         descriptor of input data
- * @param [in] begin         begin position of tensor
- * @param [in] size          size to slice
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetSliceOutputDim(
-      const ccTensorDescriptor_t xDesc,
-      const ccIntArray_t* begin,
-      const ccIntArray_t* size,
-      int32_t *dimCnt,
-      int32_t dim[],
-      int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get strided slice output dim info.
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
- * @param [in] attrDesc         reserve for optional attributes.
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
-                                    const ccStridedSliceDescriptor_t stridedSliceDesc,
-                                    const ccStridedSliceAttrsDescriptor_t attrDesc,
-                                    int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
-  * @ingroup dnn
-  * @brief get workspace size for softmax computation
-  * @param [in] handle              cce handle
-  * @param [in] xDesc               descriptor of input tensor
-  * @param [in] yDesc               descriptor of output tensor
-  * @param [in|out] sizeInBytes     workSpace size in bytes
-  * @return ccStatus_t
-  */
-ccStatus_t ccGetSoftmaxForwardWorkspaceSize(ccHandle_t handle,
-                                            const ccTensorDescriptor_t xDesc,
-                                            const ccTensorDescriptor_t yDesc,
-                                            uint32_t *sizeInBytes);
-
-/**
-  * @ingroup dnn
-  * @brief set quantize algorithm type and quantize scale type (vector or scalar)
-  * @param [in] quantizeInfo    descriptor of quantize parameters
-  * @param [in] quantAlgo       enum type for quantize algorithm type
-  * @param [in] scaleType       enum type for quantize scale type
-  * @param [in] reluflag        flag for relu
-  * @return ccStatus_t
-  */
-ccStatus_t   ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType);
-ccStatus_t   ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType, bool reluFlag);
-
-}; /* end cce */
-
-#endif  // DNN_BASE_H__
diff --git a/third_party/fwkacllib/inc/cce/dnn_base_def.hpp b/third_party/fwkacllib/inc/cce/dnn_base_def.hpp
deleted file mode 100644
index 8ce5e933..00000000
--- a/third_party/fwkacllib/inc/cce/dnn_base_def.hpp
+++ /dev/null
@@ -1,994 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DNN_BASE_HPP__
-#define DNN_BASE_HPP__
-
-#include "cce/cce_def.hpp"
-
-namespace cce {
-
-/**
- * @ingroup dnn
- * @brief tiling para
- */
-typedef struct tagCcWeightCompressInfo {
-  uint32_t blockRow;     /**< block row */
-  uint32_t blockCol;     /**< block col */
-  uint32_t fractalK;     /**< fractal K */
-  uint32_t fractalN;     /**< fractal N */
-  uint32_t lastFractalK; /**< K of last fractal */
-  uint32_t lastFractalN; /**< N of last fractal */
-  uint32_t cubeSize;     /**< cube's length */
-  uint32_t loadDir;      /**< data load directtiono 0??col load     1:row load*/
-} ccWeightCompressInfo_t;
-
-/**
- * @ingroup dnn
- * @brief compress table info
- */
-typedef struct tagCcWeightCompressTab {
-  uint16_t dataLen : 14;  /**< 0: data length in 128 Byte */
-  uint16_t storeFlag : 1; /**< 0: compressed addr = original addr, 1: compressed addr = original addr + 256 Byte */
-  uint16_t dataType : 1;  /**< 0: original data, 1: compressed data */
-} ccWeightCompressTab_t;
-
-/**
- * @conv quantize dnn vector mode/scalar mode
- */
-typedef enum {
-  QUANT_ALGO_NON_OFFSET = 0,
-  QUANT_ALGO_HALF_OFFSET = 1,
-  QUANT_ALGO_ALL_OFFSET = 2,
-  QUANT_ALGO_BUTT
-} ccQuantizeAlgo_t;
-typedef enum { SCALE_VEC = 0, SCALE_SCALAR = 1, SCALE_TYPE_BUTT } ccConvolutionScaleType_t, ccScaleType_t;
-
-/**
- * @conv quantize dnn sqrt mode/non sqrt mode
- */
-typedef enum {
-  SCALE_NORMAL = 0,
-  SCALE_SQRT = 1,
-  SCALE_VALUE_MODE_BUTT
-} ccConvolutionScaleValueMode_t,
-    ccScaleValueMode_t;
-
-typedef struct {
-  float scaleW;
-  float scaleD;
-  float scaleDNext;
-  uint8_t offsetW;
-  uint8_t offsetD;
-  uint8_t offsetDNext;
-} ccQuantAllOffsetPara_t;
-
-typedef struct tagCcVecQuantizePara {
-  float scale;
-  uint16_t offset;
-  uint16_t rrv;  // 32byte align
-} ccVecQuantizePara_t;
-
-/**
- * @ingroup dnn
- * @brief format of tensor
- */
-typedef enum tagCcTensorFormat {
-  CC_TENSOR_NCHW = 0,  /**< NCHW */
-  CC_TENSOR_NHWC,      /**< NHWC */
-  CC_TENSOR_ND,        /**< Nd Tensor */
-  CC_TENSOR_NC1HWC0,   /**< NC1HWC0 */
-  CC_TENSOR_FRACTAL_Z, /**< FRACTAL_Z */
-  CC_TENSOR_NC1C0HWPAD,
-  CC_TENSOR_NHWC1C0,
-  CC_TENSOR_FSR_NCHW,
-  CC_TENSOR_FRACTAL_DECONV,
-  CC_TENSOR_C1HWNC0,
-  CC_TENSOR_FRACTAL_DECONV_TRANSPOSE,
-  CC_TENSOR_FRACTAL_DECONV_SP_STRIDE_TRANS,
-  CC_TENSOR_NC1HWC0_C04,   /**< NC1HWC0, C0 =4*/
-  CC_TENSOR_FRACTAL_Z_C04, /**< FRACZ?????C0 =4 */
-  CC_TENSOR_CHWN,
-  CC_TENSOR_FRACTAL_DECONV_SP_STRIDE8_TRANS,
-  CC_TENSOR_HWCN,
-  CC_TENSOR_NC1KHKWHWC0, /** < KH,KW kernel h& kernel w maxpooling max output format*/
-  CC_TENSOR_HASHTABLE_LOOKUP_LOOKUPS = 20,
-  CC_TENSOR_HASHTABLE_LOOKUP_KEYS,
-  CC_TENSOR_HASHTABLE_LOOKUP_VALUE,
-  CC_TENSOR_HASHTABLE_LOOKUP_OUTPUT,
-  CC_TENSOR_HASHTABLE_LOOKUP_HITS = 24,
-  CC_TENSOR_C1HWNCoC0, /**< C1,H,W,N,Co,C0 6D diagonal format*/
-  CC_TENSOR_RESERVED
-} ccTensorFormat_t;
-
-/**
- * @ingroup dnn
- * @brief format of compare
- */
-typedef enum tagCcCompareType {
-  CC_COMPARE_TYPE_LESS = 0,
-  CC_COMPARE_TYPE_LESS_EQUAL,
-  CC_COMPARE_TYPE_NOT_EQUAL,
-  CC_COMPARE_TYPE_EQUAL,
-  CC_COMPARE_TYPE_GREATER,
-  CC_COMPARE_TYPE_GREATER_EQUAL,
-  CC_COMPARE_TYPE_RESERVED
-} ccCompareType_t;
-
-/**
- * @ingroup dnn
- * @brief propagate Nan
- */
-typedef enum tagCcNanPropagation {
-  CC_NAN_NOT_PROPAGATE = 0, /**< Nan numbers are not propagated */
-  CC_NAN_PROPAGATE,         /**< Nan numbers are propagated */
-  CC_NAN_PROPAGATE_RESERVED
-} ccNanPropagation_t;
-
-/**
- * @ingroup dnn
- * @brief algorithm of convolution forward
- */
-typedef enum tagCcConvolutionFwdAlgo {
-  CC_CONVOLUTION_FWD_ALGO_GEMM = 0, /**< matrix gemm algo */
-  CC_CONVOLUTION_FWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
-  CC_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32,
-  CC_CONVOLUTION_FWD_ALGO_RESERVED
-} ccConvolutionFwdAlgo_t;
-
-#define ccCorrelationFwdAlgo_t ccConvolutionFwdAlgo_t
-
-typedef enum tagCcConvolutionBwdAlgo {
-  CC_CONVOLUTION_BWD_ALGO_GEMM = 0, /**< matrix gemm algo */
-  CC_CONVOLUTION_BWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
-  CC_CONVOLUTION_BWD_ALGO_GEMM_CO2IMG,
-  CC_CONVOLUTION_BWD_FILTER_GEM_ALGO,
-  CC_CONVOLUTION_BWD_ALGO_RESERVED
-} ccConvolutionBwdAlgo_t;
-
-#define ccCorrelationBwdAlgo_t ccConvolutionBwdAlgo_t
-
-/**
- * @ingroup dnn
- * @brief algorithm of FullConnect forward
- */
-typedef enum tagCcFullConnectFwdAlgo {
-  CC_FULLCONNECT_FWD_ALGO_HALF = 0,
-  CC_FULLCONNECT_FWD_ALGO_FLOAT32
-} ccFullConnectFwdAlgo_t;
-
-/**
- * @ingroup dnn
- * @brief mode of convolution
- */
-typedef enum tagCcConvolutionMode {
-  CC_CONV_CONVOLUTION = 0,   /**< math convolution */
-  CC_CONV_CROSS_CORRELATION, /**< cross-correlation convolution */
-  CC_CONV_DECONVOLUTION,     /**< deconvolution, also named transposed convolution*/
-  CC_CONV_MODE_DEPTHWISE,    /**< depthwise convolution*/
-  CC_CONV_MODE_RESERVED
-} ccConvolutionMode_t;
-
-#define ccCorrelationMode_t ccConvolutionMode_t
-
-/**
- * @ingroup dnn
- * @brief mode of pooling
- */
-typedef enum tagCcPoolingMode {
-  CC_POOLING_MAX = 0,  /**< max pooling */
-  CC_POOLING_AVG,      /**< average pooling */
-  CC_POOLING_L2,       /**< L2 pooling */
-  CC_POOLING_AVG_FP32, /**< average pooling for training */
-  CC_POOLING_RESERVED
-} ccPoolingMode_t;
-
-/**
- * @ingroup dnn
- * @brief L0C accumulate algo of AvgPooling
- */
-typedef enum tagCcPooingFwdAlgo {
-  CC_POOLING_FWD_ALGO_HALF = 0,  // accumulate in L0c with FP16
-  CC_POOLING_FWD_ALGO_FLOAT32    // accumulate in L0c with FP32
-} ccPooingFwdAlgo_t;
-
-/**
- * @ingroup dnn
- * @brief mode of momentum
- */
-typedef enum tagMomentumAlgo {
-  CC_MOMENTUM_UPDATE_FP32 = 0,  /**< FP32 out */
-  CC_MOMENTUM_UPDATE_FP32_FP16, /**< FP32 and FP16 out */
-  CC_MOMENTUM_UPDATE_FP32_NESTEROV,
-  CC_MOMENTUM_UPDATE_FP32_FP16_NESTEROV,
-  CC_MOMENTUM_RESERVED
-} ccMomentumAlgo_t;
-
-/**
- * @ingroup dnn
- * @brief mode of partitionStrategy
- *attention: if need to motify this struct,please must motify dPartitionStrategy_t
- */
-typedef enum tagCcPartitionStrategy {
-  CC_PARTITION_STRATEGY_MOD = 0, /**< mod */
-  CC_PARTITION_STRATEGY_DIV,     /**< div */
-  CC_PARTITION_STRATEGY_RESERVED
-} ccPartitionStrategy_t;
-
-/**
- * @ingroup dnn
- * @brief mode of assignOp
- */
-typedef enum tagCcAssignOpMode {
-  CC_ASSIGN_ADD = 0, /**< assign add */
-  CC_ASSIGN_SUB,     /**< assign sub */
-  CC_ASSIGN_RESERVED
-} ccAssignOpMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of arcSinCos
- */
-typedef enum tagCcArcSinCosMode {
-  CC_ARCUS_SIN = 0, /**< asin */
-  CC_ARCUS_COS,     /**< acos */
-  CC_ARCUS_RESERVED
-} ccArcSinCosMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of padding
- */
-typedef enum tagCcPaddingMode {
-  CC_PADDING_CEIL = 0,
-  CC_PADDING_DIRECTASSIGN,
-  CC_PADDING_VALID,
-  CC_PADDING_SAME,      /**< Padding values of 0 are always used */
-  CC_PADDING_CEIL_NEW,  /*new ceil,use for backward compatibility*/
-  CC_PADDING_VALID_NEW, /*new valid,use for backward compatibility*/
-  CC_PADDING_SAME_NEW,  /*new same,use for backward compatibility*/
-  CC_PADDING_RESERVED
-} ccPaddingMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of activation
- */
-typedef enum tagCcActivationMode {
-  CC_ACTIVATION_SIGMOID = 0,  /**< sigmoid */
-  CC_ACTIVATION_RELU,         /**< ReLU */
-  CC_ACTIVATION_TANH,         /**< tanh */
-  CC_ACTIVATION_CLIPPED_RELU, /**< clipped ReLU */
-  CC_ACTIVATION_ELU,          /**< ELU */
-  CC_ACTIVATION_LEAKY_RELU,
-  CC_ACTIVATION_ABS,            /**< Abs */
-  CC_ACTIVATION_RELU1,          /**< relu1 */
-  CC_ACTIVATION_SOFTSIGN,       /**< softsign */
-  CC_ACTIVATION_SOFTPLUS,       /**< softplus */
-  CC_ACTIVATION_HARDSIGMOID,    /**< hardsigmoid*/
-  CC_ACTIVATION_THRESHOLD_RELU, /**< threshold */
-  CC_ACTIVATION_SELU,           /**< selu */
-  CC_ACTIVATION_LINEAR,         /**< linear */
-  CC_ACTIVATION_RELU6,          /**< relu6 */
-  CC_ACTIVATION_RESERVED
-} ccActivationMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of logical op mode
- */
-typedef enum tagCcLogicalOpMode {
-  CC_LOGICAL_OP_NOT = 0, /**logical not**/
-  CC_LOGICAL_OP_AND,     /**logical and**/
-  CC_LOGICAL_OP_OR,      /**logical or**/
-  CC_LOGICAL_OP_XOR,     /**logical xor**/
-  CC_LOGICAL_OP_RESERVED
-} ccLogicalOpMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of batchnorm
- */
-typedef enum tagCcBatchNormMode {
-  CC_BATCHNORM_PER_ACTIVATION = 0, /**< bnScale, bnBias tensor dims are 1xCxHxW */
-  CC_BATCHNORM_SPATIAL,            /**< bnScale, bnBias tensor dims are 1xCx1x1 */
-  CC_BATCHNORM_RESERVED
-} ccBatchNormMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of instancenorm
- */
-typedef enum tagCcInstanceNormMode {
-  CC_INSTANCENORM_PER_ACTIVATION = 0, /**< inScale, inBias tensor dims are NxCxHxW */
-  CC_INSTANCENORM_SPATIAL,            /**< inScale, inBias tensor dims are NxCx1x1 */
-  CC_INSTANCENORM_RESERVED
-} ccInstanceNormMode_t;
-/**
- * @ingroup dnn
- * @brief mode of layernorm
- */
-typedef enum tagCcLayerNormMode {
-  CC_LAYERNORM_PER_ACTIVATION = 0, /**< lnScale, lnBias tensor dims are 1xCxHxW */
-  CC_LAYERNORM_SPATIAL,            /**< lnScale, lnBias tensor dims are Nx1x1x1 */
-  CC_LAYERNORM_RESERVED
-} ccLayerNormMode_t;
-
-/**
- * @ingroup dnn
- * @brief softmax algorithm
- */
-typedef enum tagCcSoftmaxAlgo {
-  CC_SOFTMAX_FAST = 0,      /**< straightforward implementation */
-  CC_SOFTMAX_ACCURATE,      /**< subtract max from every point to avoid overflow */
-  CC_SOFTMAX_LOG,           /**< perform the Log softmax operation to avoid overflow */
-  CC_SOFTMAX_ACCURATE_FP32, /**< accurate mode for fp32 */
-  CC_SOFTMAX_RESERVED
-} ccSoftmaxAlgo_t;
-
-/**
- * @ingroup dnn
- * @brief softmax mode
- */
-typedef enum tagCcSoftmaxMode {
-  CC_SOFTMAX_MODE_INSTANCE = 0, /**< compute the softmax over all C, H, W for each N */
-  CC_SOFTMAX_MODE_CHANNEL,      /**< compute the softmax over all C for each H, W, N */
-  CC_SOFTMAX_MODE_HEIGHT,       /**< compute the softmax over all H for each N, C, W */
-  CC_SOFTMAX_MODE_WIDTH,        /**< compute the softmax over all W for each N, C, H */
-  CC_SOFTMAX_MODE_CLASS,        /**< special mode: compute the softmax over all class for each N, H ,W */
-  CC_SOFTMAX_MODE_RESERVED
-} ccSoftmaxMode_t;
-
-/**
- * @ingroup dnn
- * @brief cross entropy mode
- */
-typedef enum tagCcCrossEntropyMode {
-  CC_CROSS_ENTROPY_SPARSE_WITHOUT_REDUCTION = 0, /**< compute the sparse cross entropy without fused reduce mean */
-  CC_CROSS_ENTROPY_SPARSE_WITH_REDUCTION,        /**< compute the sparse cross entropy with fused reduce mean*/
-  CC_CROSS_ENTROPY_WITHOUT_REDUCTION,            /**< compute the cross entropy without fused reduce mean */
-  CC_CROSS_ENTROPY_WITH_REDUCTION,               /**< compute the cross entropy with fused reduce mean */
-  CC_CROSS_ENTROPY_RESERVED
-} ccCrossEntropyMode_t;
-
-/**
- * @ingroup dnn
- * @brief concat mode
- */
-typedef enum tagCcConcatMode {
-  CC_CONCAT_BY_BATCH = 0, /**< concat by batch */
-  CC_CONCAT_BY_FEATURE,   /**< concat by feature */
-  CC_CONCAT_BY_HEIGHT,    /**< concat by height */
-  CC_CONCAT_BY_WIDTH,     /**< concat by width */
-  CC_CONCAT_BY_FLATTEN,
-  CC_CONCAT_RESERVED
-} ccConcatMode_t;
-
-/**
- * @ingroup dnn
- * @brief eltwise mode
- */
-typedef enum tagCcEltwiseMode {
-  CC_ELTWISE_PROD = 0, /**< prod */
-  CC_ELTWISE_SUM,      /**< sum */
-  CC_ELTWISE_MAX,      /**< max */
-  CC_ELTWISE_RESERVED
-} ccEltwiseMode_t;
-
-/**
- * @ingroup dnn
- * @brief depthwise filter type
- */
-typedef enum tagCcDepthwiseFilterType {
-  CC_Depthwise_FILTER_DEPTHWISE = 0, /**< depthwise filter */
-  CC_Depthwise_FILTER_POINTWISE,     /**< pointwise filter */
-  CC_Depthwise_FILTER_RESERVED
-} ccDepthwiseFilterType_t;
-
-/**
- * @ingroup dnn
- * @brief sampler type
- */
-typedef enum tagCcSamplerType {
-  CC_SAMPLER_BILINEAR = 0, /**< bilinear sampler algo */
-  CC_SAMPLER_RESERVED
-} ccSamplerType_t;
-
-/**
- * @ingroup dnn
- * @brief NMS type
- */
-typedef enum tagCcNmsType {
-  CC_NMS_IOU = 0, /**< nms operation type, only IOU for now */
-  CC_NMS_RESERVED
-} ccNmsType_t;
-
-/**
- * @ingroup dnn
- * @brief Box Code type
- */
-typedef enum tagCcBoxCodeType {
-  CC_BOX_CORNER = 1, /**< Box CodeType in detection nets */
-  CC_BOX_CENTER_SIZE,
-  CC_BOX_CORNER_SIZE,
-  CC_BOX_RESERVED
-} ccBoxCodeType_t;
-
-/**
- * @ingroup dnn
- * @brief split mode
- */
-typedef enum tagSplitMode {
-  CC_SPLIT_MODE_SLICE = 0, /**< spilt data of one dim*/
-  CC_SPLIT_MODE_DUPLICATE, /**< copy data of one dim*/
-  CC_SPLIT_MODE_RESERVED
-} ccSplitMode_t;
-
-/**
- * @ingroup dnn
- * @brief mode of LRN
- */
-typedef enum tagCcLRNMode {
-  CC_LRN_CROSS_CHANNELS = 0, /**< CROSS_CHANNELS */
-  CC_LRN_WITHIN_CHANNELS,    /**< WITHIN_CHANNELS */
-  CC_LRN_RESERVED
-} ccLRNMode_t;
-
-/**
- * @ingroup dnn
- * @brief format of AIPP input
- */
-typedef enum tagCcAippInputFormat {
-  CC_AIPP_INPUT_YUV420SP_U8 = 1,
-  /**< YUV420SP */  // mini,lite,tiny
-  CC_AIPP_INPUT_XRGB8888_U8,
-  /**< XRGB8888 */  // mini,lite,tiny
-  CC_AIPP_INPUT_NC1HWC0DI_FP16,
-  /**< NC1HWC0DI_FP16 */  // mini
-  CC_AIPP_INPUT_NC1HWC0DI_S8,
-  /**< NC1HWC0DI_S8 */  // mini
-  CC_AIPP_INPUT_RGB888_U8,
-  /**< RGB888 */  // mini,tiny
-  CC_AIPP_INPUT_ARGB8888_U8,
-  /**< ARGB8888 */  // lite
-  CC_AIPP_INPUT_YUYV_U8,
-  /**< YUYV */  // lite
-  CC_AIPP_INPUT_YUV422SP_U8,
-  /**< YUV422SP */  // lite
-  CC_AIPP_INPUT_AYUV444_U8,
-  /**< AYUV444 */  // lite
-  CC_AIPP_INPUT_YUV400_U8,
-  /**< YUV400 */  // mini,lite,tiny
-  CC_AIPP_INPUT_RESERVED
-} ccAippInputFormat_t;
-
-/**
- * @ingroup dnn
- * @brief mode of AIPP padding
- */
-typedef enum tagCcAippPaddingMode {
-  CC_AIPP_PAD_DEFAULT_VALUE = 1, /**< CONFIG_VALUE */
-  CC_AIPP_PAD_LINE_COPY,         /**< ROW_COL_COPY */
-  CC_AIPP_PAD_BLOCK_COPY,        /**< BLOCK_COPY */
-  CC_AIPP_PAD_MIRROR_COPY,       /**< MIRROR_COPY */
-  CC_AIPP_PAD_RESERVED
-} ccAippPaddingMode_t;
-
-/**
- * @ingroup dnn
- * @brief format of cmp type
- */
-typedef enum tagCcccCMPType {
-  CC_CMP_EQ = 0,
-  CC_CMP_NE,
-  CC_CMP_LT,
-  CC_CMP_GT,
-  CC_CMP_GE,
-  CC_CMP_LE,
-  CC_CMP_TYPE_RESERVED
-} ccCMPType_t;
-
-/**
- * @ingroup dnn
- * @brief mode of logical op mode
- */
-typedef enum tagCcResultType {
-  CC_Result_AND = 0, /**logical and**/
-  CC_Result_OR,      /**logical or**/
-  CC_Result_RESERVED
-} ccResultType_t;
-
-/**
- * @ingroup dnn
- * @brief method of crop_and_resize operator
- */
-typedef enum tagCcResizeMethod {
-  CC_RESIZE_METHOD_BILINEAR = 0, /** BILINEAR */
-  CC_RESIZE_METHOD_NEAREST,      /** NEAREST */
-  CC_RESIZE_METHOD_RESERVED
-} ccResizeMethod_t;
-
-/**
- * @ingroup dnn
- * @brief mode of calculating new size of the images
- */
-typedef enum tagCcResizeOutputDimMode {
-  RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, /**< Output dimension specified by zoom factor*/
-  RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR,   /**< specified by shrink factor */
-  RESIZE_OUTPUT_DIM_EXPLICIT,           /**< specified explicitly */
-  RESIZE_OUTPUT_DIM_RESERVED
-} ccResizeOutputDimMode_t;
-
-typedef enum tagCcYoloVersion {
-  CC_YOLO_V2 = 1, /**< YOLOv2 */
-  CC_YOLO_V3,     /**< YOLOv3 */
-  CC_YOLO_RESERVED
-} ccYoloVersion_t;
-
-typedef enum tagCcAttentionAlgo {
-  // bahdanau-attention, for detail:https://pravn.wordpress.com/2017/11/14/bahdanau-attention/
-  CC_ATTENTION_ALGO_BAHDANAU = 0,
-  CC_ATTENTION_ALGO_NORMAL_BAHDANAU = 1,
-  CC_ATTENTION_ALGO_LUONG = 2,
-  CC_ATTENTION_ALGO_SCALED_LUONG = 3,
-  CC_ATTENTION_ALGO_RESERVED
-} AttentionAlgo_t;
-/**
- * @ingroup dnn
- * @brief desc of data layout
- */
-typedef enum ccEmAttnDecoderDataLayout {
-  CC_ATTN_5D_TX1BX,  //[max_time,Xt1,1,batch_size,Xt0]
-  CC_ATTN_5D_BTX1X,  //[batch_size*max_time,Xt1,1,1,Xt0]
-  CC_ATTN_DL_RESERVED
-} ccEmAttnDecoderDataLayout_t;
-
-/**
- * @ingroup dnn
- * @brief operation of Reduce
- */
-typedef enum {
-  CC_REDUCE_OP_SUM = 0,    /**< sum */
-  CC_REDUCE_OP_MEAN,       /**< mean */
-  CC_REDUCE_OP_PROD,       /**< product */
-  CC_REDUCE_OP_ALL,        /**< logical and */
-  CC_REDUCE_OP_ABS_SUM,    /**< absolute sum */
-  CC_REDUCE_OP_SQUARE_SUM, /**< square sum */
-  CC_REDUCE_OP_MAX,        /**< max */
-  CC_REDUCE_OP_MIN,        /**< min */
-  CC_REDUCE_OP_LOGSUMEXP,  /**< logsumexp */
-  CC_REDUCE_OP_INVALID
-} ccReduceOpType_t;
-
-/**
- * @ingroup dnn
- * @brief desc of tpye layout
- */
-typedef enum {
-  LSH_PROJECTION_TYPE_UNKNOWN = 0,
-  LSH_PROJECTION_TYPE_SPARSE = 1,
-  LSH_PROJECTION_TYPE_DENSE = 2
-} LSHProjectionType;
-
-/**
- * @ingroup dnn
- * @brief activation para
- */
-typedef struct tagCcActivationRelu {
-  double reluCoef; /* reluCoef for clipped RELU */
-  ccNanPropagation_t reluNanOpt;
-} ccActivationRelu_t;
-typedef union tagCcActivationPara {
-  ccActivationRelu_t actionRelu; /* relu Coef and NanOpt for clipped RELU */
-  double eluAlpha;               /* eluAlpha for ELU */
-  float leakyReluNegativeSlope;
-} ccActivationPara_u;
-
-/**
- * @ingroup dnn
- * @bref mode of square
- */
-typedef enum tagCcSquareMode {
-  CC_SQUARE_2 = 0, /* square */
-} ccSquareMode_t;
-
-/**
- * @ingroup dnn
- * @brief append operation type
- */
-typedef enum tagCcOpType {
-  CC_OP_TYPE_NO_RELU = 0,
-  CC_OP_TYPE_RELU = 1,
-  CC_OP_TYPE_RELU6 = 2,
-  CC_OP_TYPE_INVALID
-} ccOpType_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of fill operator type.
- */
-typedef enum tagCcFillOpType {
-  CC_CONSTANT = 0,
-  CC_RANGE,
-  CC_LENGTH_RANGE,
-  CC_GIVEN_TENSOR,
-  CC_DIAGONAL,
-  CC_UNIFORM,
-  CC_UNIFORM_INT,
-  CC_UNIQUE_UNIFORM,
-  CC_GAUSSIAN,
-  CC_XAVIER,
-  CC_MSRA,
-  CC_FILL_OP_TYPE_RESERVED
-} ccFillOpType_t;
-
-/**
- * @ingroup dnn
- * @brief loss function reduction mode
- */
-typedef enum tagCcLossReduction {
-  CC_LOSS_REDUCTION_NONE = 0,
-  CC_LOSS_REDUCTION_SUM,
-  CC_LOSS_REDUCTION_RESERVED
-} ccLossReduction_t;
-
-/**
- * @ingroup dnn
- * @brief max size of ccIntArray
- */
-#define CC_INT_ARRAY_MAX_SIZE (8)
-
-/**
- * @ingroup dnn
- * @brief struct define of int array less than 8.
- */
-typedef struct tagIntArray {
-  uint32_t size;
-  int32_t value[CC_INT_ARRAY_MAX_SIZE];
-} ccIntArray_t;
-
-typedef enum tagCcPadMode {
-  CC_PAD_CONSTANT = 0, /*CONSTANT */
-  CC_PAD_REFLECT,      /*REFLECT */
-  CC_PAD_SYMMETRIC,    /*SYMMETRIC*/
-  CC_PAD_EDGE,         /*EDGE */
-  CC_PAD_MODE_RESERVED
-} ccPadMode_t;
-
-/*
- * @ingroup dnn
- * @brief pad operation of extractImagePatches
- */
-typedef enum {
-  CC_EXTRACT_IMAGE_PATCHES_PAD_VALID = 1,
-  CC_EXTRACT_IMAGE_PATCHES_PAD_SAME,
-  CC_EXTRACT_IMAGE_PATCHES_PAD_RESERVED
-} ccExtractImagePatchesPadType_t;
-
-/**
- * @ingroup dnn
- * @brief image dimensions of aipp input
- */
-#define CC_AIPP_IMG_DIM (2)
-
-/**
- * @ingroup dnn
- * @brief image channel number of aipp input
- */
-#define CC_AIPP_IMG_CHN_NUM (4)
-
-/**
- * @ingroup dnn
- * @brief element number of aipp color space convertion matrix
- */
-#define CC_AIPP_CSC_MATRIX_DIM (9)
-
-/**
- * @ingroup dnn
- * @brief element number of aipp color space convertion bias
- */
-#define CC_AIPP_CSC_BIAS_DIM (3)
-
-/**
- * @ingroup dnn
- * @brief struct define of AIPP operator
- */
-
-typedef struct tagCcAipp {
-  ccAippInputFormat_t inputFormat;
-  ccDataType_t outputFormat;
-  int32_t srcImageSize[CC_AIPP_IMG_DIM];
-  int32_t loadStartPos[CC_AIPP_IMG_DIM];
-  int32_t loadSize[CC_AIPP_IMG_DIM];
-  int32_t scfInputSize[CC_AIPP_IMG_DIM];
-  int32_t scfOutputSize[CC_AIPP_IMG_DIM];
-  int32_t cscMatrix[CC_AIPP_CSC_MATRIX_DIM];
-  int32_t cscOutputBias[CC_AIPP_CSC_BIAS_DIM];
-  int32_t cscInputBias[CC_AIPP_CSC_BIAS_DIM];
-  int32_t dtcPixelMean[CC_AIPP_IMG_CHN_NUM];
-  float dtcPixelMin[CC_AIPP_IMG_CHN_NUM];
-  float dtcPixelVarReci[CC_AIPP_IMG_CHN_NUM];
-  ccAippPaddingMode_t paddingMode;
-  int32_t paddingSize[CC_AIPP_IMG_DIM * 2];  // up,down,left,right
-  float cpaddingVaule;
-  bool cscSwitch;  // 0:off,1:on
-  bool scfSwitch;  // 0:off,1:on
-  bool rbuvSwapSwitch;
-  bool axSwapSwitch;
-  bool singleLineMode;
-  bool cscConfigFlag;
-  bool dtcConfigFlag;
-  bool padConfigFlag;
-  bool commConfigFlag;
-  bool aippEn;
-  bool dyncAippFlag;
-  const void *dyncParaAddr;
-  bool rotationFlag;
-} ccConvolutionAipp_t;
-
-
-typedef struct tagCcQuantizePara {
-  ccConvolutionScaleValueMode_t scaleValueMode;
-  uint16_t *scale;
-  uint16_t *offsetq;
-  int32_t *offsetw;
-  uint8_t *allOffsetw;
-  uint8_t *offsetPad;
-} CcQuantizePara_t;
-
-typedef struct tagCcQuantize {
-  ccQuantizeAlgo_t quantAlgo;
-
-  ccConvolutionScaleType_t scaleWType;  // show scaleRq,scaleDq type
-
-  CcQuantizePara_t scaleQ;
-
-  CcQuantizePara_t scaleRq;
-
-  CcQuantizePara_t scaleDq;
-
-  // need relu
-  bool reluFlag;
-
-  // relu6
-  uint16_t *scaleRelu6;
-  bool bConcat;
-} ccQuantize_t;
-
-typedef struct tagCcPad *ccPadDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief operation of Cum
- */
-typedef enum {
-  CC_CUM_OP_SUM = 0, /**< sum */
-  CC_CUM_OP_PROD,    /**< product */
-  CC_CUM_OP_INVALID
-} CumOpType;
-
-/**
- * @ingroup dnn
- * @brief desciptor of tensor
- */
-typedef struct tagCcTensor *ccTensorDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of filter tensor
- */
-typedef struct tagCcFilter *ccFilterDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of convolution operator
- */
-typedef struct tagCcConvolution *ccConvolutionDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of correlation operator
- */
-typedef struct tagCcConvolution *ccCorrelationDescriptor_t;
-typedef struct tagCcFullConnection_t *ccFullConnectionDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of pooling operator
- */
-typedef struct tagCcPooling *ccPoolingDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of activation operator
- */
-typedef struct tagCcActivation *ccActivationDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of batchToSpace operator
- */
-typedef struct tagCcBatchToSpace *ccBatchToSpaceDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of spaceToBatch operator
- */
-typedef struct tagCcSpaceToBatch *ccSpaceToBatchDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of svdf operator
- */
-typedef struct tagCcSvdf *ccSvdfDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of crop operator
- */
-typedef struct tagCcCrop *ccCropDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of interp operator
- */
-typedef struct tagCcInterp *ccInterpDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of GetRegionBox operator
- */
-typedef struct tagCcGetRegionBox *ccGetRegionBoxDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of CorrectBoxes operator
- */
-typedef struct tagCorrectBoxes *ccCorrectBoxesDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of ClsProb operator
- */
-typedef struct tagClsProb *ccClsProbDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of NMS operator
- */
-typedef struct tagCcNms *ccNmsDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of MultiClassNms operator
- */
-typedef struct tagCcMultiClassNms *ccMultiClassNmsDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of MscnnBoxOutput operator
- */
-typedef struct tagCcMscnnBoxOutput *ccMscnnBoxOutputDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief define of SoftmaxTree
- */
-typedef void *ccSoftmaxTree_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of exp operator
- */
-typedef struct tagCcExp *ccExpDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of log operator
- */
-typedef struct tagCcLog *ccLogDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of pow operator
- */
-typedef struct tagCcPow *ccPowDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of padv2 operator
- */
-typedef struct tagCcPadV2 *ccPadV2Descriptor_t;
-
-/**
- * @ingroup dnn
- * @brief desciptor of ShapeClassify operator
- */
-typedef struct tagCcShapeClassify *ccShapeClassifyDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of DetectionFull3DOutput operator
- */
-typedef struct tagCcDetectionFull3DOutput *ccDetectionFull3DOutputDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of Quantize operator
- */
-typedef struct tagCcQuantize *ccQuantizeDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of StridedSlice operator
- */
-typedef struct tagCcStridedSlice *ccStridedSliceDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of StridedSliceAttrs operator
- */
-typedef struct tagCcStridedSliceAttrs *ccStridedSliceAttrsDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of ResizeBilinear operator
- */
-typedef struct tagCcResizeBilinear *ccResizeBilinearDescriptor_t;
-
-typedef struct tagCcEltwise *ccEltwiseDescriptor_t;
-
-typedef struct tagCcBatchNorm *ccBatchNormDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of Square operator
- */
-typedef struct tagCcSquare *ccSquareDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of NonMaxSuppression operator
- */
-typedef struct tagNonMaxSuppression *ccNonMaxSuppressionDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of NonMaxSuppression operator
- */
-typedef struct tagUpsamplePara *ccUpsampleParaDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of ResizeNearestNeighbor operator
- */
-typedef struct tagCcResizeNearestNeighbor *ccResizeNearestNeighborDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of Fill operator
- */
-typedef struct tagCcFillParam *ccFillParamDescriptor_t;
-
-/**
- * @ingroup dnn
- * @brief descriptor of Argmaxmin operator
- */
-typedef struct tagCcArgmaxmin *ccArgmaxminDescriptor_t;
-
-};  // namespace cce
-
-#endif  // DNN_BASE_HPP__
diff --git a/third_party/fwkacllib/inc/cce/dnn_op.h b/third_party/fwkacllib/inc/cce/dnn_op.h
deleted file mode 100644
index 627b8593..00000000
--- a/third_party/fwkacllib/inc/cce/dnn_op.h
+++ /dev/null
@@ -1,4838 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DNN_OP_H__
-#define DNN_OP_H__
-
-#include "cce/blas_struct.h"
-#include "cce/cce.h"
-#include "cce/customize.h"
-
-namespace cce {
-
-/**
- * @ingroup dnn
- * @brief create descriptor of parameters for exponential function
- * @param [in] point to descriptor of parameters for exponential function
- * @return ccStatus_t
- */
-ccStatus_t ccCreateExpDescriptor(ccExpDescriptor_t *expDesc);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of parameters for logarithmic function
- * @param [in] point to descriptor of parameters for logarithmic function
- * @return ccStatus_t
- */
-
-ccStatus_t ccCreateLogDescriptor(ccLogDescriptor_t *logDesc);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of parameters for pow function
- * @param [in] point to descriptor of parameters for pow function
- * @return ccStatus_t
- */
-
-ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *powDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for exponential function
- * @param [in] point to descriptor of parameters for exponential function
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyExpDescriptor(ccExpDescriptor_t *expDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for logarithmic function
- * @param [in] point to descriptor of parameters for exponential function
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyLogDescriptor(ccLogDescriptor_t *logDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for pow function
- * @param [in] point to descriptor of parameters for pow function
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *powDesc);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of parameters for NonMaxSuppress function
- * @param [in] point to descriptor of parameters for NonMaxSuppress function
- * @return ccStatus_t
- */
-ccStatus_t ccCreateNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for NonMaxSuppress function
- * @param [in] point to descriptor of parameters for NonMaxSuppress function
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
-
-ccStatus_t ccTransTensorIncertPads(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
-                                   void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum, bool interweave,
-                                   bool background, uint32_t boxTypeNumMax = 0, bool isScaleVec = false);
-
-ccStatus_t ccTransTensorIncertPadsInt32(const ccTensorDescriptor_t xDesc, const void *x,
-                                        const ccTensorDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
-                                        uint32_t boxTypeNum, bool interweave, bool background);
-
-ccStatus_t ccTransMskrcnnBbox(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
-                              void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum);
-
-ccStatus_t ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensorDesc,
-                                              const ccVecQuantizePara_t *vecQuantizePara);
-
-ccStatus_t ccGetTensorDescriptorQuantizeParam(const ccTensorDescriptor_t tensorDesc,
-                                              ccVecQuantizePara_t *vecQuantizePara);
-
-/**
- * @ingroup dnn
- * @brief init tensor to 4d filter
- * @param [in|out] filterDesc   descriptor of filter
- * @param [in] format   format of filter
- * @param [in] dataType   data type in device
- * @param [in] k   number of output feature maps
- * @param [in] c   number of input feature maps
- * @param [in] h   height of filter
- * @param [in] w   width of filter
- * @return ccStatus_t
- */
-ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
-                                   int32_t k, int32_t c, int32_t h, int32_t w);
-
-ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
-                                   int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0);
-/**
- * @ingroup dnn
- * @brief init tensor to Fractal filter
- * @param [in|out] filterDesc   descriptor of filter
- * @param [in] format   format of filter
- * @param [in] dataType   data type in device
- * @param [in] k   number of output feature maps
- * @param [in] c   number of input feature maps
- * @param [in] h   height of filter
- * @param [in] w   width of filter
- * @return ccStatus_t
- */
-ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
-                                        int32_t k, int32_t c, int32_t h, int32_t w);
-
-/**
- * @ingroup dnn
- * @brief init tensor to Fractal filter
- * @param [in|out] filterDesc   descriptor of filter
- * @param [in] format   format of filter
- * @param [in] dataType   data type in device
- * @param [in] k   number of output feature maps
- * @param [in] c   number of input feature maps
- * @param [in] h   height of filter
- * @param [in] w   width of filter
- * @return ccStatus_t
- */
-ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
-                                       int32_t k, int32_t c, int32_t h, int32_t w, ccDataType_t outputDataType);
-
-/**
- * @ingroup dnn
- * @brief read 4d filter
- * @param [in] filterDesc   descriptor of filter
- * @param [in|out] format   point to format of filter
- * @param [in|out] dataType   point to data type in device
- * @param [in|out] k   point to number of output feature maps
- * @param [in|out] c   point to number of input feature maps
- * @param [in|out] h   point to height of filter
- * @param [in|out] w   point to width of filter
- * @return ccStatus_t
- */
-ccStatus_t ccGetFilterFractalDescriptor(const ccFilterDescriptor_t filterDesc, ccTensorFormat_t *format,
-                                        ccDataType_t *dataType, int32_t *k, int32_t *c, int32_t *h, int32_t *w);
-
-/**
- * @ingroup dnn
- * @brief get data size of 4d filter
- * @param [in] filterDesc   descriptor of filter
- * @param [in|out] size   point to data size
- * @return ccStatus_t
- */
-ccStatus_t ccGetDepthWiseConvFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, int32_t groupNum, uint32_t *size);
-
-/**
- * @ingroup dnn
- * @brief trans group conv filter to fractal format
- * @param [in] filterSrcInfo   descriptor of input filter
- * @param [in] filterSrc   input data pointer
- * @param [in] filterDstInfo   descriptor of output filter
- * @param [in|out] filterDst   output data pointer
- * @param [in] group   group size
- * @return ccStatus_t
- */
-ccStatus_t ccTransGroupConvFilter(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
-                                  ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
-                                  uint32_t dstSize);
-
-/**
- * @ingroup dnn
- * @brief trans conv filter With BoxTypeNuM to fractal format
- * @param [in] filterSrcInfo   descriptor of input filter
- * @param [in] filterSrc   input data pointer
- * @param [in] filterDstInfo   descriptor of output filter
- * @param [in|out] filterDst   output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] boxTypeNum  the num of boxType
- * @param [in] interweave whether the axis interweave
- * @return ccStatus_t
- */
-ccStatus_t ccTransFilterWithBoxTypeNum(const ccFilterDescriptor_t xDesc, const void *x,
-                                       const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
-                                       uint32_t boxTypeNum, bool interweave, uint32_t boxTypeNumMax = 0);
-/**
- * @ingroup dnn
- * @brief trans conv filter With BoxTypeNuM to fractal format
- * @param [in] filterSrcInfo   descriptor of input filter
- * @param [in] filterSrc   input data pointer
- * @param [in] filterDstInfo   descriptor of output filter
- * @param [in|out] filterDst   output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] boxTypeNum  the num of boxType
- * @param [in] interweave whether the axis interweave
- * @param [in] outputDataType  output DataType
- * @return ccStatus_t
- */
-ccStatus_t ccTransFilterInt8WithBoxTypeNum(const ccFilterDescriptor_t wDesc, const void *x,
-                                           const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
-                                           uint32_t boxTypeNum, bool interweave, ccDataType_t outputDataType);
-
-/**
- * @ingroup dnn
- * @brief trans depthwise conv filter  to fractal format
- * @param [in]  wDesc descriptor of input filter
- * @param [in] w   input data pointer
- * @param [in] groupNum   groupNum of conv
- * @param [in]..yDesc descriptor of output filter
- * @param [in|out] y   output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @return ccStatus_t
- */
-
-ccStatus_t transDepthWiseConvFilterNCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
-                                                  ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief trans depthwise conv filter  to fractal format
- * @param [in]  wDesc descriptor of input filter
- * @param [in] w   input data pointer
- * @param [in] groupNum   groupNum of conv
- * @param [in]..yDesc descriptor of output filter
- * @param [in|out] y   output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @return ccStatus_t
- */
-ccStatus_t transDepthWiseConvFilterInt8NCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
-                                                      ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief trans depthwise conv filter  to fractal format, input format CHWN
- * @param [in]  wDesc descriptor of input filter
- * @param [in]..yDesc descriptor of output filter
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] w   input data pointer
- * @param [in|out] y   output data pointer
- * @return ccStatus_t
- */
-ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
-                                                  uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
-                                                  uint32_t destSize);
-
-/**
- * @ingroup dnn
- * @Check if it is surpported by HighPerformance depthwise
- * @param [in]  inputN,C,H,W   input param
- * @param [in]  filterN,C,H,W
- * @param [in]  dilationH,W    dilation param
- * @param [in]  padHHead,padHtail,padWHead,padWTail pad param
- * @param [in]  strideH,W  stride param
- * @param [in] groupNum  Conv groupNum
- * @param [in|out] isHighPerformance  isHighPerformance flag
- * @return ccStatus_t
- */
-ccStatus_t ccIsDepthwiseHighPerformance(int32_t inputN, int32_t inputC, int32_t inputH, int32_t inputW, int32_t filterN,
-                                        int32_t filterC, int32_t filterH, int32_t filterW, int32_t dilationH,
-                                        int32_t dilationW, int32_t padHHead, int32_t padHTail, int32_t padWHead,
-                                        int32_t padWTail, int32_t strideH, int32_t strideW, int32_t groupNum,
-                                        bool &isHighPerformance, bool isquant = false,
-                                        ccDataType_t inputDataType = CC_DATA_HALF,
-                                        ccDataType_t outputDataType = CC_DATA_HALF);
-
-/**
- * @ingroup dnn
- * @brief trans depthwise conv filter  to fractal format, input format CHWN
- * @param [in]  wDesc descriptor of input filter
- * @param [in]..yDesc descriptor of output filter
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] w   input data pointer
- * @param [in|out] y   output data pointer
- * @return ccStatus_t
- */
-ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
-                                                  uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
-                                                  uint32_t destSize);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of fullconnection operator
- * @param [in|out] fcDesc   point to descriptor of fullconnection operator
- * @return ccStatus_t
- */
-ccStatus_t ccCreateFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of fullconnection operator
- * @param [in] *fcDesc   descriptor of fullconnection operator
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
-
-/**
- * @ingroup dnn
- * @brief init conv descriptor to 2d conv, use for beforeHasPad
- * @param [in|out] convDesc   descriptor of convolution operator
- * @param [in] beforepadHHead   before padding in height head
- * @param [in] beforepadHTail   before padding in height tail
- * @param [in] beforepadWHead   before padding in width head
- * @param [in] beforepadWTail   before padding in width tail
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolution2dDescriptorForPad(ccConvolutionDescriptor_t convDesc, int32_t beforepadHHead,
-                                              int32_t beforepadHTail, int32_t beforepadWHead, int32_t beforepadWTail);
-
-/**
- * @ingroup dnn
- * @brief init conv descriptor to 2d conv, use for concat batch size
- * @param [in|out] convDesc   descriptor of convolution operator
- * @param [in] concatBatchSize   concat batch size
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolution2dDescriptorForConcatBatchSize(ccConvolutionDescriptor_t convDesc, int64_t concatBatchSize);
-
-/**
- * @ingroup dnn
- * @brief init conv descriptor to 2d conv
- * @param [in|out] convDesc   descriptor of convolution operator
- * @param [in] opType  operation type for append at convolution operation
- * @param [in] opDesc  operation descritpor for the opType
- * @return ccStatus_t
- */
-ccStatus_t ccConvolution2dAppendOp(ccConvolutionDescriptor_t convDesc, ccOpType_t opType, const void *opDesc);
-
-/**
- * @ingroup dnn
- * @brief read 2d conv beforeHasPad
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in|out] beforepadHHead   before padding in height head, default is 0
- * @param [in|out] beforepadHTail   before padding in height tail, default is 0
- * @param [in|out] beforepadWHead   before padding in width head, default is 0
- * @param [in|out] beforepadWTail   before padding in width tail, default is 0
- */
-ccStatus_t ccGetConvolution2dDescriptorForPad(const ccConvolutionDescriptor_t convDesc, int32_t *beforepadHHead,
-                                              int32_t *beforepadHTail, int32_t *beforepadWHead,
-                                              int32_t *beforepadWTail);
-
-/**
- * @ingroup dnn
- * @brief read 2d conv concat batch size
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in|out] concatBatchSize   concat batch size, default is 0
- */
-ccStatus_t ccGetConvolution2dDescriptorForConcatBatchSize(const ccConvolutionDescriptor_t convDesc,
-                                                          int64_t *concatBatchSize);
-
-/**
- * @ingroup dnn
- * @brief get the temp space size of convolution forward computation, maybe no need temp space
- * @param [in] handle   cce handle
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] wDesc   descriptor of filter
- * @param [in] yDesc   descriptor of output tensor
- * @param [in] algo   algorithm of convolution forward
- * @param [in|out] sizeInBytes   temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetConvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
-                                                const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
-                                                const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
-                                                uint32_t *sizeInBytes);
-/**
- * @ingroup dnn
- * @brief get the temp space size of convolution backward computation, maybe no need temp space
- * @param [in] handle   cce handle
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in] dyDesc   descriptor of input tensor
- * @param [in] wDesc   descriptor of filter
- * @param [in] dxDesc   descriptor of output tensor
- * @param [in] algo   algorithm of convolution forward
- * @param [in|out] sizeInBytes   temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetConvolutionBackwardDataWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
-                                                     const ccTensorDescriptor_t dyDesc,
-                                                     const ccFilterDescriptor_t wDesc,
-                                                     const ccTensorDescriptor_t dxDesc, ccConvolutionBwdAlgo_t algo,
-                                                     uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief get the temp space size of fc forward computation, maybe no need temp space
- * @param [in] handle  cce handle
- * @param [in] fcDesc  descriptor of fc operator
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] wDesc   descriptor of filter
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] sizeInBytes   temp space size need, 0 means no memeory needed
- * @return ccStatus_t
- */
-ccStatus_t ccGetFullConnectionForwardWorkspaceSize(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
-                                                   const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
-                                                   const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief convolution forward computation
- * @param [in] handle   cce handle
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in] alpha   scaling factors
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] x   input data in device memory
- * @param [in] wDesc   descriptor of filter
- * @param [in] w   filter data in device memory
- * @param [in] biasDesc   descriptor of bias
- * @param [in] bias   bias data in device memory
- * @param [in] algo   algorithm of convolution forward
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @param [in] beta   scaling factors
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] y   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccConvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
-                                const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
-                                const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
-                                ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
-                                const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @ingroup dnn
- * @brief full alloc float and reset to 0
- * @param [in] handle      cce handle
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in|out] x       output data in device memory
- * @param [in] beta        scaling factors
- * @return ccStatus_t
- */
-ccStatus_t ccAllocFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                              const void *beta);
-
-/**
- * @ingroup dnn
- * @brief full get data set by op
- * @param [in] handle      cce handle
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in|out] x       output data in device memory
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [out] y          output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccGetFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                            const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
-
-/**
- * @ingroup dnn
- * @brief full clear register
- * @param [in] handle      cce handle
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [out] y          output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccClearFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                              const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
-
-#ifndef DAVINCI_LITE
-/**
- * @ingroup dnn
- * @brief convolution backward data computation
- * @param [in] handle   cce handle
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in] alpha   scaling factors
- * @param [in] dyDesc   descriptor of input tensor
- * @param [in] dy   input data in device memory
- * @param [in] wDesc   descriptor of filter
- * @param [in] w   filter data in device memory
- * @param [in] algo   algorithm of convolution backward
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @param [in] beta   scaling factors
- * @param [in] dxDesc   descriptor of output tensor
- * @param [in|out] dx   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccConvolutionBackwardData(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
-                                     const ccTensorDescriptor_t dyDesc, const void *dy,
-                                     const ccFilterDescriptor_t wDesc, const void *w, ccConvolutionBwdAlgo_t algo,
-                                     void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
-                                     const ccTensorDescriptor_t dxDesc, void *dx);
-#endif
-
-/**
- * @ingroup dnn
- * @brief create descriptor of pooling operator
- * @param [in|out] poolingDesc   point to descriptor of pooling operator
- * @return ccStatus_t
- */
-ccStatus_t ccCreatePoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of pooling operator
- * @param [in] *poolingDesc   descriptor of pooling operator
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyPoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
-
-/**
- * @ingroup dnn
- * @brief init pooling descriptor to 2d pooling
- * @param [in|out] poolingDesc   descriptor of pooling operator
- * @param [in] mode   mode of pooling
- * @param [in] padMode   mode of padding
- * @param [in] maxpoolingNanOpt   Nan propagation mode
- * @param [in] windowH   height of pooling window
- * @param [in] windowW   width of pooling window
- * @param [in] padHHead   zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
- * value.
- * @param [in] padHTail   zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
- * @param [in] padWHead   zero padding in width head,  if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
- * value.
- * @param [in] padWTail   zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN..
- * @param [in] strideH   stride in height
- * @param [in] strideW   stride in width
- * @param [in] dataMode
- * @param [in] ceilMode   0:Floor  1:Ceil
- * @return ccStatus_t
- */
-ccStatus_t ccSetPooling2dDescriptor(ccPoolingDescriptor_t poolingDesc, ccPoolingMode_t mode, ccPaddingMode_t padMode,
-                                    ccNanPropagation_t maxpoolingNanOpt, int32_t windowH, int32_t windowW,
-                                    int32_t padHHead, int32_t padHTail, int32_t padWHead, int32_t padWTail,
-                                    int32_t strideH, int32_t strideW, int32_t dataMode, int32_t ceilMode,
-                                    ccPooingFwdAlgo_t algo = CC_POOLING_FWD_ALGO_HALF);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of 2d pooling
- * @param [in] poolingDesc   descriptor of pooling operator
- * @param [in] xDesc   descriptor of input tensor
- * @param [in|out] n   point to batch size
- * @param [in|out] c   point to channels
- * @param [in|out] h   point to height of feature map
- * @param [in|out] w   point to width of feature map
- * @return ccStatus_t
- */
-ccStatus_t ccGetPooling2dForwardOutputDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc,
-                                          int32_t *n, int32_t *c, int32_t *h, int32_t *w);
-
-/**
- * @ingroup dnn
- * @brief pooling forward computation
- * @param [in] handle   cce handle
- * @param [in] poolingDesc   descriptor of pooling operator
- * @param [in] alpha   scaling factors
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] x   input data in device memory
- * @param [in] beta   scaling factors
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] y   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccPoolingForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
-                            const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                            const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief pooling backward computation
- * @param [in] handle   cce handle
- * @param [in] poolingDesc   descriptor of pooling operator
- * @param [in] alpha   scaling factors
- * @param [in] beta   scaling factors
- * @param [in] argMaskDesc   descriptor of mask tensor
- * @param [in] argMask   mask data in device memory
- * @param [in] dyDesc   descriptor of input tensor
- * @param [in] dy   input data in device memory
- * @param [in] dxDesc   descriptor of output tensor
- * @param [in|out] dx   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccMaxPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
-                                const void *beta, const ccTensorDescriptor_t argMaskDesc, const void *argMask,
-                                const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t dxDesc,
-                                void *dx);
-/**
- * @ingroup dnn
- * @brief create descriptor of activation operator
- * @param [in|out] activationDesc   point to descriptor of activation operator
- * @return ccStatus_t
- */
-ccStatus_t ccCreateActivationDescriptor(ccActivationDescriptor_t *activationDesc);
-
-/**
- * @ingroup dnn
- * @brief init activation descriptor to 2d activation
- * @param [in|out] activationDesc   descriptor of activation operator
- * @param [in] mode                 mode of activation
- * @param [in] reluNanOpt           Nan propagation mode
- * @param [in] coef                 ceiling for clipped RELU, alpha for ELU
- * @param [in] activationPara       activation parameter union
- * @return ccStatus_t
- */
-ccStatus_t ccSetActivationDescriptor(ccActivationDescriptor_t activationDesc, ccActivationMode_t mode,
-                                     ccNanPropagation_t reluNanOpt, double coef,
-                                     ccActivationPara_u activationPara = {{0, CC_NAN_NOT_PROPAGATE}});
-
-/**
- * @ingroup dnn
- * @brief read activation param
- * @param [in] activationDesc     descriptor of activation operator
- * @param [in|out] mode           point to mode of activation
- * @param [in|out] reluNanOpt     point to Nan propagation mode
- * @param [in|out] coef           point to coef
- * @param [in|out] activationPara point to activation parameter union
- * @return ccStatus_t
- */
-ccStatus_t ccGetActivationDescriptor(const ccActivationDescriptor_t activationDesc, ccActivationMode_t *mode,
-                                     ccNanPropagation_t *reluNanOpt, double *coef,
-                                     ccActivationPara_u *activationPara = NULL);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of activation operator
- * @param [in] *activationDesc   descriptor of activation operator
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyActivationDescriptor(ccActivationDescriptor_t *activationDesc);
-
-/**
- * @ingroup dnn
- * @brief activation forward computation
- * @param [in] handle   cce handle
- * @param [in] activationDesc   descriptor of activation operator
- * @param [in] alpha   scaling factors
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] x   input data in device memory
- * @param [in] beta   scaling factors
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] y   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccActivationForward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
-                               const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                               const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief Derives a tensor descriptor from layer data descriptor for BatchNormalization
- * @param [in|out] derivedBnDesc   descriptor of mean, variance, bias, scale tensors tensor
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] mode    mode of BatchNormalization
- * @return ccStatus_t
- */
-ccStatus_t ccDeriveBNTensorDescriptor(ccTensorDescriptor_t derivedBnDesc, const ccTensorDescriptor_t xDesc,
-                                      ccBatchNormMode_t mode);
-
-/**
- * @ingroup dnn
- * @brief batchnorm forward computation
- * @param [in] handle   cce handle
- * @param [in] mode     mode of batchnorm
- * @param [in] alpha    scaling factors
- * @param [in] beta     scaling factors
- * @param [in] xDesc    descriptor of input tensor
- * @param [in] x        input data in device memory
- * @param [in] yDesc    descriptor of output tensor
- * @param [in|out] y        output data in device memory
- * @param [in] bnScaleBiasMeanVarDesc  descriptor of scale, bias, mean, variance tensor
- * @param [in] bnScale       scaling factor
- * @param [in] bnBias        bias factor
- * @param [in] estimatedMean    mean
- * @param [in] estimatedVariance   variance
- * @param [in] epsilon     epsilon
- * @return ccStatus_t
- */
-ccStatus_t ccBatchNormForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha, const void *beta,
-                                       const ccTensorDescriptor_t xDesc, const void *x,
-                                       const ccTensorDescriptor_t yDesc, void *y,
-                                       const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
-                                       const void *bnBias, const void *estimatedMean, const void *estimatedVariance,
-                                       double epsilon);
-
-/**
- * @ingroup dnn
- * @brief batchnorm forward computation
- * @param [in] handle   cce handle
- * @param [in] mode     mode of batchnorm
- * @param [in] reluFlag     relu fusion flag
- * @param [in] alpha    scaling factors
- * @param [in] beta     scaling factors
- * @param [in] xDesc    descriptor of input tensor
- * @param [in] x        input data in device memory
- * @param [in] yDesc    descriptor of output tensor
- * @param [in|out] y        output data in device memory
- * @param [in] bnScaleBiasMeanVarDesc  descriptor of scale, bias, mean, variance tensor
- * @param [in] bnScale       scaling factor
- * @param [in] bnBias        bias factor
- * @param [in] estimatedMean    mean
- * @param [in] estimatedVariance   variance
- * @param [in] epsilon     epsilon
- * @return ccStatus_t
- */
-ccStatus_t ccBatchNormFusionForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, ccBatchNormDescriptor_t bnDesc,
-                                             const void *alpha, const void *beta, const ccTensorDescriptor_t xDesc,
-                                             const void *x, const ccTensorDescriptor_t yDesc, void *y,
-                                             const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
-                                             const void *bnBias, const void *estimatedMean,
-                                             const void *estimatedVariance, double epsilon);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of batchnorm operator
- * @param [in|out] bnDesc   point to descriptor of batchnorm operator
- * @return ccStatus_t
- */
-ccStatus_t ccCreateBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy batchnorm descriptor
- * @param [in] descriptor of batchnorm operator
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
-
-/**
- * @ingroup dnn
- * @brief append operation after batchnorm
- * @param [in|out] bnDesc   descriptor of batchnorm operator
- * @param [in] opType  operation type for append at batchnorm operation
- * @param [in] opDesc  operation descritpor for the opType
- * @return ccStatus_t
- */
-ccStatus_t ccBatchNormAppendOp(ccBatchNormDescriptor_t bnDesc, ccOpType_t opType, const void *opDesc);
-
-/**
- * @ingroup dnn
- * @brief full get the output 4d dimension info of full connection
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] wDesc       descriptor of weight tensor
- * @param [in|out] n       point to batch size
- * @param [in|out] c       point to channels
- * @param [in|out] h       point to height of feature map
- * @param [in|out] w       point to width of feature map
- * @return ccStatus_t
- */
-ccStatus_t ccGetFullConnectionFwdOutputDim(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
-                                           int32_t *n, int32_t *c, int32_t *h, int32_t *w);
-
-/**
- * @ingroup dnn
- * @brief full connection forward computation
- * @param [in] handle      cce handle
- * @param [in] fcDesc      fc desc
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] wDesc       descriptor of weight tensor
- * @param [in] w           filter data in device memory
- * @param [in] biasDesc    bias data in device memory
- * @param [in] bias        descriptor of bias tensor
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccFullConnectionForwardEx2(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc, const void *alpha,
-                                      const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
-                                      const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
-                                      const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief full connection forward computation with workspace
- * @param [in] handle      cce handle
- * @param [in] fcDesc      fc desc
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] wDesc       descriptor of weight tensor
- * @param [in] w           filter data in device memory
- * @param [in] biasDesc    bias data in device memory
- * @param [in] bias        descriptor of bias tensor
- * @param [in] workSpace   workSpace in device memory
- * @param [in] workSpaceSizeInBytes     workSpace size in bytes
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccFullConnectionForwardWithWorkSpace(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
-                                                const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                                const ccFilterDescriptor_t wDesc, const void *w,
-                                                const ccTensorDescriptor_t biasDesc, const void *bias, void *workSpace,
-                                                uint32_t workSpaceSizeInBytes, const void *beta,
-                                                const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief full softmax forward computation
- * @param [in] handle      cce handle
- * @param [in] algo        softmax algorithm
- * @param [in] mode        mode of softmax
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] workSpace   workSpace in device memory
- * @param [in] workSpaceSizeInBytes     workSpace size in bytes
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSoftmaxForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
-                            const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
-                            uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @ingroup dnn
- * @brief full softmax forward computation
- * @param [in] handle      cce handle
- * @param [in] algo        softmax algorithm
- * @param [in] softmaxAxis mode of softmax
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] workSpace   workSpace in device memory
- * @param [in] workSpaceSizeInBytes     workSpace size in bytes
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @param [in] classNum    class number
- * @param [in] padNum      pad Num
- * @return ccStatus_t
- */
-ccStatus_t ccSoftmaxClassForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
-                                 const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
-                                 uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
-                                 void *y, uint32_t classNum, uint32_t padNum);
-
-/**
- * @ingroup dnn
- * @brief full scale forward computation
- * @param [in] handle      cce handle
- * @param [in] scaleBiasDesc  descriptor of scale and bias tensor
- * @param [in] scale       scaling factor
- * @param [in] bias        bias factor
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccScaleForward(ccHandle_t handle, const ccTensorDescriptor_t scaleBiasDesc, const void *scale,
-                          const void *bias, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief full scale forward computation
- * @param [in] handle      cce handle
- * @param [in] scaleDesc  descriptor of scale and bias tensor
- * @param [in] scale       scaling factor
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccScaleNoBiasForward(ccHandle_t handle, const ccTensorDescriptor_t scaleDesc, const void *scale,
-                                const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of depth to space
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] blockSize        the size of block
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @return ccStatus_t
- */
-
-ccStatus_t ccGetDepthToSpaceOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
-                                      int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief depth to space forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] blockSize        the size of block
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccDepthToSpaceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                 const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
-                                 void *output);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of space to depth
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] blockSize        the size of block
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @return ccStatus_t
- */
-
-ccStatus_t ccGetSpaceToDepthOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
-                                      int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief space to depth forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] blockSize        the size of block
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccSpaceToDepthForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                 const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
-                                 void *output);
-
-/**
- * @ingroup dnn
- * @brief full eltwise forward computation
- * @param [in] handle          cce handle
- * @param [in] eltDesc         eltwise descriptor
- * @param [in] mode            mode of eltwise
- * @param [in] alpha           scaling factors
- * @param [in] broadcast(Reserve) support tensor broadcasting or not
- * @param [in] xDesc[]         array of descriptor for input tensor
- * @param [in] x               array of input data in device memory
- * @param [in] inputNum        the number of input tensors
- * @param [in] beta            scaling factors
- * @param [in] yDesc           descriptor of output tensor
- * @param [in|out] y           output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccEltwiseForwardEx(ccHandle_t handle, ccEltwiseDescriptor_t eltDesc, ccEltwiseMode_t mode, int32_t inputNum,
-                              const void *alpha, bool broadcast, const ccTensorDescriptor_t xDesc[], const void *x[],
-                              const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @ingroup dnn
- * @brief create descriptor of eltwise operator
- * @param [in|out] eltwiseDesc   point to descriptor of eltwise operator
- * @return ccStatus_t
- */
-ccStatus_t ccCreateEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy eltwise descriptor
- * @param [in] descriptor of eltwise operator
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
-
-/**
- * @ingroup dnn
- * @brief append operation after eltwise
- * @param [in|out] eltDesc   descriptor of eltwise operator
- * @param [in] opType  operation type for append at eltwise operation
- * @param [in] opDesc  operation descritpor for the opType
- * @return ccStatus_t
- */
-ccStatus_t ccEltwiseAppendOp(ccEltwiseDescriptor_t eltDesc, ccOpType_t opType, const void *opDesc);
-
-/**
- * @ingroup dnn
- * @brief set eltwise desciptor's quantize  parameters
- * @param [in] eltDesc        eltwise descriptor
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetEltwiseQuantizeInfo(ccEltwiseDescriptor_t eltDesc, const ccQuantizeDescriptor_t QuantizeInfo);
-
-/**
- * @ingroup dnn
- * @brief get the temp space size of reshape forward computation, maybe no need temp space
- * @param [in] handle   cce handle
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] sizeInBytes   temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetReshapeForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
-                                            const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief reshape the input tensor
- * @param [in] handle  cce handle
- * @param [in] alpha   scaling factors
- * @param [in] xDesc   input tensor
- * @param [in] x   input data
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @param [in] beta   scaling factors
- * @param [in] yDesc   output tensor
- * @param [in|out] y   output data
- * @return ccStatus_t
- */
-ccStatus_t ccReshapeForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                            void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
-                            const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @ingroup dnn
- * @brief reshape the input tensor for data in ND format
- * @param [in] handle  cce handle
- * @param [in] alpha   scaling factors
- * @param [in] xDesc   input tensor
- * @param [in] x   input data
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @param [in] beta   scaling factors
- * @param [in] yDesc   output tensor
- * @param [in|out] y   output data
- * @return ccStatus_t
- */
-ccStatus_t ccNdReshapeForward(ccHandle_t handle, const void *alpha, ccTensorFormat_t rawFormat,
-                              const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
-                              uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
-                              void *y);
-
-/**
- * @ingroup dnn
- * @brief Four2Five forward computation
- * @param [in] handle          cce handle
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] yDesc           descriptor of output tensor
- * @param [in | out] y         output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccFour2FiveForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                              const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief Five2Four forward computation
- * @param [in] handle          cce handle
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] yDesc           descriptor of output tensor
- * @param [in | out] y         output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccFive2FourForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                              const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief get the temp space size of add forward computation
- * @param [in] handle             cce handle
- * @param [in] xDesc              descriptor of the first input tensor
- * @param [in] wDesc              descriptor of the second input tensor
- * @param [in] yDesc              descriptor of output tensor
- * @param [in|out] sizeInBytes    temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetAddForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
-                                        const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
-                                        uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief Add forward computation
- * @param [in] handle          cce handle
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               one input data in device memory
- * @param [in] wDesc           descriptor of input tensor
- * @param [in] w               the other input data in device memory
- * @param [in] beta            bias factors
- * @param [in] workSpace       the address apply in HBM
- * @param [in] workSpaceSizeInBytes   the size apply in HBM
- * @param [in] yDesc           descriptor of output tensor
- * @param [in|out] y           output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccAddForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                        const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
-                        uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief Stack forward computation
- * @param [in] handle          cce handle
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x[]             x array is host mem array, the element is device address of input data
- * @param [in] num             number of input tensor
- * @param [in] axis            along which axis to stack the input tensor
- * @param [in] beta            bias factors
- * @param [in] yDesc           descriptor of output tensor
- * @param [in|out] y           output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccStackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x[],
-                          uint32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of stack
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] num              number of input tensor
- * @param [in] axis             along which axis to stack the input tensor
- * @param [in|out] n            point to batch size
- * @param [in|out] c            point to channels
- * @param [in|out] h            point to height
- * @param [in|out] w            point to width
- * @param [in|out] realDimCnt   point to real dimCnt after stack
- * @return ccStatus_t
- */
-ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *n, int32_t *c,
-                               int32_t *h, int32_t *w, int32_t *realDimCnt);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of stack
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] num              number of input tensor
- * @param [in] axis             along which axis to stack the input tensor
- * @param [in|out] dimCnt       dimcnt
- * @param [in|out] dim          save dim value
- * @param [in| dimlen           length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *dimCnt,
-                               int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief return need grid generator or not
- * @param [in] inputH, inputW, outputH, outputW, alignCorner(interp=true,resizeBilinear depends para align corner)
- * @param [out] bool needGridFlag, true mean need, false mean not need
- * @return ccStatus_t
- */
-ccStatus_t ccIsGridGenetatorNeed(int32_t inputH, int32_t inputW, int32_t outputH, int32_t outputW, bool alignCorner,
-                                 bool &needGridFlag);
-
-/**
- * @ingroup dnn
- * @brief get the temp space size of Deconvolution forward computation, maybe no need temp space
- * @param [in] handle   cce handle
- * @param [in] deconvDesc   descriptor of Deconvolution operator
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] wDesc   descriptor of filter
- * @param [in] yDesc   descriptor of output tensor
- * @param [in] algo   algorithm of Deconvolution forward
- * @param [in|out] sizeInBytes   temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetDeconvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc,
-                                                  const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
-                                                  const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
-                                                  uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief Deconvolution forward computation
- * @param [in] handle   cce handle
- * @param [in] deconvDesc   descriptor of deconvolution operator
- * @param [in] alpha   scaling factors
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] x   input data in device memory
- * @param [in] wDesc   descriptor of filter
- * @param [in] w   filter data in device memory
- * @param [in] biasDesc   descriptor of bias
- * @param [in] bias   bias data in device memory
- * @param [in] algo   algorithm of deconvolution forward
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @param [in] beta   scaling factors
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] y   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccDeconvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc, const void *alpha,
-                                  const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
-                                  const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
-                                  ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
-                                  const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-#define MODE_C_N (0)
-#define MODE_N_C (1)
-
-/**
- * [ccArgMaxForward]
- * @param [in] handle        [handle]
- * @param [in] alpha         [reserved parameters]
- * @param [in] xDesc         [x tensor descriptor]
- * @param [in] x             [innput tensor]
- * @param [in] outMaxVaule   [Whether to return the maximum value, true: return max value; false: return max value index
- * ]
- * @param [in] topK          [The number that returns the maximum index or maximum value]
- * @param [in] axis          [Describes which axis of the input Tensor to reduce across]
- * @param [in] beta          [reserved parameters]
- * @param [in] yDesc         [y tensor descriptor]
- * @param [in] y             [The max value index or max value tensor]
- */
-ccStatus_t ccArgMaxForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                           bool outMaxVal, uint32_t topK, int32_t axis, const void *beta,
-                           const ccTensorDescriptor_t yDesc, void *y);
-/**
- * [ccGetArgMaxOutputDim]
- * @param [in] xDesc         [x tensor descriptor]
- * @param [in] outMaxVaule   [Whether to return the maximum value, true: return max value; false: return max value index
- * ]
- * @param [in] topK          [The number that returns the maximum index or maximum value]
- * @param [in] axis          [Describes which axis of the input Tensor to reduce across]
- * @param [in|out] dimCnt    [point to the output dimCnt]
- * @param [in|out] dim       [arrays to save dims]
- * @param [in| dimlen        length of dim
- */
-ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
-                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * [ccGetArgMaxOutputDim]
- * @param [in] xDesc         [x tensor descriptor]
- * @param [in] outMaxVaule   [Whether to return the maximum value, true: return max value; false: return max value index
- * ]
- * @param [in] topK          [The number that returns the maximum index or maximum value]
- * @param [in] axis          [Describes which axis of the input Tensor to reduce across]
- * @param [in] n             [Batch number of the output tensor]
- * @param [in] c             [Channel of the output tensor]
- * @param [in] h             [Height number of the output tensor]
- * @param [in] w             [Weight number of the output tensor]
- */
-ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
-                                int32_t *n, int32_t *c, int32_t *h, int32_t *w);
-
-/**
- * @ingroup dnn
- * @brief Yolo2ReorgForward computation
- * @param [in] handle   CCE handle
- * @param [in] stride  scale parameter
- * @param [in] reverse  reverse parameter
- * @param [in] alpha  alpha factor
- * @param [in] beta  beta factor
- * @param [in] xDesc x-tensor descriptor
- * @param [in] x  x-tensor in device memory
- * @param [out] workSpaceSizeInBytes  temporary work sapce size
- * @param [out] workSpace  temporary work sapce in device memory
- * @param [in] yDesc y-tensor descriptor
- * @param [out] y  y-tensor in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccYolo2ReorgForward(ccHandle_t handle, int32_t stride, bool reverse, const void *alpha,
-                               const ccTensorDescriptor_t xDesc, const void *x, uint32_t workSpaceSizeInBytes,
-                               void *workSpace, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @param [in] stride  scale parameter
- * @param [in] reverse  reverse parameter
- * @param [in] xDesc x-tensor descriptor
- * @param [in|out] n          point to batch size
- * @param [in|out] c          point to channels
- * @param [in|out] h          point to height of feature map
- * @param [in|out] w          point to width of feature map
- */
-ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *n, int32_t *c,
-                               int32_t *h, int32_t *w);
-
-/**
- * @param [in] stride  scale parameter
- * @param [in] reverse  reverse parameter
- * @param [in] xDesc x-tensor descriptor
- * @param [out] dimCnt               output tensor dim cnt
- * @param [out] dim                  output tensor dim
- * @param [in| dimlen           length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *dimCnt,
-                               int32_t dim[], int32_t dimLen);
-
-/**
- * @param [in] xDesc x-tensor descriptor
- * @param [out] temporary work sapce size
- */
-ccStatus_t ccGetYolo2ReorgForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief full shuffle       channel forward computation
- * @param [in] handle        cce handle
- * @param [in] groupNum      number of groups in a channal
- * @param [in] subgroupNum   number of sub-groups in a group
- * @param [in] alpha         scaling factors
- * @param [in] xDesc         descriptor of input tensor
- * @param [in] x             input data in device memory
- * @param [in] beta          scaling factors
- * @param [in] yDesc         descriptor of output tensor
- * @param [in|out] y         output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccShuffleChannelForward(ccHandle_t handle, int32_t groupNum, int32_t subgroupNum, const void *alpha,
-                                   const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                   const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @ingroup dnn
- * @brief get the temp space size of permute forward computation, maybe no need temp space
- * @param [in] handle   cce handle
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] yDesc   descriptor of output tensor
- * @param [in|out] sizeInBytes   temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetPermuteForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
-                                            const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief get the output dim of permute forward computation
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] dimIndex    dim Index
- * @param [in|out] dimCnt  dim count
- * @param [in|out] dim     dim value
- * @param [in| dimlen      length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetPermuteOutputDim(const ccTensorDescriptor_t xDesc, const int32_t dimIndex[], const int32_t dimIndexLen,
-                                 int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief full permute     forward computation
- * @param [in] handle      cce handle
- * @param [in] dimIndex    dim Index,only support [0,1,2,3]
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @param [in] beta        scaling factors
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccPermuteForward(ccHandle_t handle, const int32_t dimIndex[], const void *alpha,
-                            const ccTensorDescriptor_t xDesc, const void *x, void *workspace,
-                            uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief full split      forward computation
- * @param [in] handle     cce handle
- * @param [in] alpha      scaling factors
- * @param [in] xDesc      descriptor of input tensor
- * @param [in] x          input data in device memory
- * @param [in] axis       the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
- * @param [in] num        the number of outputs
- * @param [in] beta       scaling factors
- * @param [in] yDescArr      descriptors of output tensors
- * @param [in|out] yArr      output data array in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSplitForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          int32_t axis, uint32_t num, const void *beta, const ccTensorDescriptor_t yDescArr[],
-                          void *yArr[]);
-
-/**
- * @ingroup dnn
- * @brief get the output dimensions info of split
- * @param [in] xDesc      descriptor of input tensor
- * @param [in] axis       the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
- * @param [in] num        the number of outputs
- * @param [in] sizes      Optional, used to specify the sizes of each output tensor along split dim. The tensor x would
- * be split evenly along split dim if sizes is NULL
- * @param [in|out] nArr   point to the first element of batch sizes
- * @param [in|out] cArr   point to the first element of channels
- * @param [in|out] hArr   point to the first element of heights of feature map
- * @param [in|out] wArr   point to the first element of widths of feature map
- * @return ccStatus_t
- */
-ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
-                                      const uint32_t sizes[], uint32_t nArr[], uint32_t cArr[], uint32_t hArr[],
-                                      uint32_t wArr[]);
-
-/**
-    * @ingroup dnn
-    * @brief Get split output shape(s).
-    * @param [in] xDesc         input tensor, support ND and NC1HWC0
-    * @param [in] axis          split axis, negtive axis will increased by dimCnt once time.
-    * @param [in] num           splited nums.
-    * @param [in] sizes         splited dim size on axis. if NULL was set, The input will be divided into num equally.
-    * @param [output] dimCnt    splited dimCnt array. One to one correspondence with the splited output.
-    * @param [output] dim       array of splited dim array. One to one correspondence with the splited output.
-    * @param [in| dimlen        length of dim(Pass in the length of the entire space pointed to by dim,
-                                              not just the length of the dim array, because dim is a level 2 array
-                                              dimlen = lengthof dim[][], not just lengthof dim[])
-    * @return ccStatus_t
-    */
-ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
-                                      const uint32_t sizes[], int32_t *dimCnt, int32_t *dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief create weight compress info
- * @param [in|out] compressInfo   point to CompressInfo
- * @return ccStatus_t
- */
-ccStatus_t ccCreateWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
-
-/**
- * @ingroup dnn
- * @brief destory weight compress info
- * @param [in] *compressInfo   point to CompressInfo
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
-
-/**
- * @ingroup dnn
- * @brief create compress table
- * @param [in|out] compressTab   point to weight compress table
- * @return ccStatus_t
- */
-ccStatus_t ccCreateWeightCompressTab(ccWeightCompressTab_t **compressTab);
-
-/**
- * @ingroup dnn
- * @brief destory compress table
- * @param [in] compressTab   point to weight compress table
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyWeightCompressTab(ccWeightCompressTab_t **compressTab);
-
-/**
- * @ingroup dnn
- * @brief get fc compress info
- * @param [in] xDesc               descriptor of input tensor
- * @param [in] wDesc               descriptor of weight tensor
- * @param [in] biasDesc            descriptor of bias tensor
- * @param [in] dataTypeTransmode   mode of data type transform
- * @param [in] weightCompressInfo  compress info, compute based on tiling method
- * @param [in|out] outputSize      output data size in byte
- * @param [in|out] infoTabSize     compress info table
- * @return ccStatus_t
- */
-ccStatus_t ccGetCompressedFcWeightInfo(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
-                                       const ccTensorDescriptor_t biasDesc, ccDataTypeTransMode_t dataTypeTransmode,
-                                       ccWeightCompressInfo_t *weightCompressInfo, uint32_t *outputSize,
-                                       uint32_t *infoTabSize);
-/**
- * @ingroup dnn
- * @brief compress fc
- * @param [in] wDesc               descriptor of weight tensor
- * @param [in] w                   filter data in device memory
- * @param [in] weightCompressInfo  compress info, compute based on tiling method
- * @param [in] dataTypeTransmode   mode of data type transform
- * @param [in|out] y               output data in device memory
- * @param [in] ySize               transformed data size in byte
- * @param [in|out] yCompressedSize compressed output data size in byte
- * @param [in|out] infoTab         compressed info table
- * @param [in] infoTabSize         compressed info table size in byte
- * @return ccStatus_t
- */
-ccStatus_t ccCompressWeight(const ccFilterDescriptor_t wDesc, const void *w,
-                            const ccWeightCompressInfo_t *weightCompressInfo, ccDataTypeTransMode_t dataTypeTransmode,
-                            ccFilterDescriptor_t yDesc, void *y, uint32_t ySize, uint32_t *yCompressedSize,
-                            void *infoTab, uint32_t infoTabSize);
-
-/**
- * @ingroup dnn
- * @brief restore compressed fc data
- * @param [in] x               input data in device memory
- * @param [in] xSizeInBytes    input compressed weight data size in byte
- * @param [in|out] y           output data in device memory
- * @param [in] ySizeInBytes    output data size in byte
- * @return ccStatus_t
- */
-ccStatus_t ccRestoreCompressedWeight(const void *x, uint32_t xSizeInBytes, void *y, uint32_t ySizeInBytes,
-                                     rtMemcpyKind_t kind);
-
-/**
- * @ingroup dnn
- * @brief create quantize parameters struct
- * @param [in|out] quantizeInfo    descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccCreateQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
-
-/**
- * @ingroup dnn
- * @brief destroy quantize parameters struct
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccDestoryQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
-
-/**
- * @ingroup dnn
- * @brief set quantize parameters
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @param [in] scaleValMode    enmu type for quantize scale value type (normal or sqrt)
- * @param [in] scale           quantize scale value
- * @param [in] offset          quantize offset(when quantize algorithm is half offset or full offset,this should be
- * configed)
- * @param [in] offsetPad       padding value for load3d (only for half offset or full offset)
- * @return ccStatus_t
- */
-ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
-                                const uint16_t *scale, const uint16_t *offset, const uint8_t *offsetPad);
-
-/**
- * @ingroup dnn
- * @brief set Requantize parameters
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @param [in] scaleValMode    enmu type for requantize scale value type (normal or sqrt)
- * @param [in] scale           quantize scale value
- * @param [in] offset          quantize offset(when quantize algorithm is half offset or full offset,this should be
- * configed)
- * @param [in] offsetw         offset for filter (only config for full offset quantize)
- * @return ccStatus_t
- */
-ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
-                                  const uint16_t *scaleRq, const uint16_t *nextLayerOffset, const int32_t *offsetw);
-
-/**
- * @ingroup dnn
- * @brief set Dequantize parameters
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @param [in] scaleValMode    enmu type for dequantize scale value type (normal or sqrt)
- * @param [in] scaleDq           quantize scale value
- * @param [in] offsetw         offset for filter (only config for full offset quantize)
- * @return ccStatus_t
- */
-ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
-                                  const uint16_t *scaleDq, const int32_t *offsetw);
-
-/**
- * @ingroup dnn
- * @brief set convolution desciptor's quantize  parameters
- * @param [in] convDesc        convolution descriptor
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionQuantizeInfo(ccConvolutionDescriptor_t convDesc, const ccQuantizeDescriptor_t QuantizeInfo);
-
-/**
- * @ingroup dnn
- * @brief set convolution desciptor's all offset quantize  parameters
- * @param [in] convDesc        convolution descriptor
- * @param [in] offsetw         descriptor of quantize parameters
- * @param [in] scaleReq        descriptor of quantize parameters
- * @param [in] offset_d_next   descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, const uint8_t *offsetW,
-                                         const uint8_t *offsetD, const uint16_t *scaleReq, const uint16_t *offsetDNext);
-
-/**
- * @ingroup dnn
- * @brief set full connection desciptor's quantize  parameters
- * @param [in] fcDesc          full connection descriptor
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetFullConnectionQuantizeInfo(ccFullConnectionDescriptor_t fcDesc,
-                                           const ccQuantizeDescriptor_t QuantizeInfo);
-
-/**
- * @ingroup dnn
- * @brief set pooling desciptor's quantize  parameters
- * @param [in] poolingDesc     pooling descriptor
- * @param [in] quantizeInfo    descriptor of quantize parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetPoolingQuantizeInfo(ccPoolingDescriptor_t poolingDesc, const ccQuantizeDescriptor_t QuantizeInfo);
-
-/**
- * @ingroup dnn
- * @brief  set full connection  desciptor's info table
- * @param [in] fcDesc          full connection descriptor
- * @param [in] infoTabSize     table size
- * @param [in] infoTab         pointer to info table
- * @return ccStatus_t
- */
-ccStatus_t ccSetFullConnectionDescriptor(ccFullConnectionDescriptor_t fcDesc, uint32_t infoTabSize, const void *infoTab,
-                                         ccFullConnectFwdAlgo_t algo = CC_FULLCONNECT_FWD_ALGO_HALF);
-
-/**
- * @ingroup dnn
- * @brief  set full connection  desciptor's relu flag
- * @param [in] fcDesc          full connection descriptor
- * @param [in] opType  operation type for append at convolution operation
- * @param [in] opDesc  operation descritpor for the opType
- * @return ccStatus_t
- */
-ccStatus_t ccFullConnectionAppendOp(ccFullConnectionDescriptor_t fcDesc, tagCcOpType opType, const void *opDesc);
-
-/**
- * @ingroup dnn
- * @brief check aipp basic info
- * @param [in] inputFormat     format of input image
- * @param [in] loadStartPosH   vertical start position in source image
- * @param [in] loadStartPosW   horizontal start position in source image
- * @param [in] srcImageSizeH   vertical size of source image
- * @param [in] srcImageSizeW   horizontal size of source image
- * @param [in] cpaddingValue   C direction padding value
- * @param [in] cscSwitch       csc enable or not
- * @param [in] rbuvSwapSwitch  swap R/U and B/V position of the image
- * @param [in] axSwapSwitch    swap RGBA->ARGB, YUVA->AYUV
- * @param [in] singleLineMode  when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
- * not useful.
- * @return ccStatus_t
- */
-ccStatus_t ccCheckConvolutionAippCommInfo(ccAippInputFormat_t inputFormat, int32_t loadStartPosW, int32_t loadStartPosH,
-                                          int32_t srcImageSizeW, int32_t srcImageSizeH, float cpaddingValue,
-                                          bool cscSwitch, bool rbuvSwapSwitch, bool axSwapSwitch, bool singleLineMode);
-
-/**
- * @ingroup dnn
- * @brief check aipp dtc info
- * @param [in] dtcPixelMeanChnx      Mean value for YUV or RGB data channel x
- * @param [in] dtcPixelMinChnx       Min value for YUV or RGB data channel x
- * @param [in] dtcPixelVarReciChnx   Reciprocal of variance or (max-min) for YUV or RGB data channel x
- * @return ccStatus_t
- */
-ccStatus_t ccCheckConvolutionAippDtcInfo(int32_t dtcPixelMeanChn0, int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2,
-                                         float dtcPixelMinChn0, float dtcPixelMinChn1, float dtcPixelMinChn2,
-                                         float dtcPixelVarReciChn0, float dtcPixelVarReciChn1,
-                                         float dtcPixelVarReciChn2);
-
-/**
- * @ingroup dnn
- * @brief check aipp pad info
- * @param [in] paddingMode              padding mode
- * @param [in] leftPaddingSize          left hblank/padding size
- * @param [in] rightPaddingSize         right hblank/padding size
- * @param [in] topPaddingSize           top padding size
- * @param [in] bottomPaddingSize        bottom padding size
- * @return ccStatus_t
- */
-ccStatus_t ccCheckConvolutionAippPadInfo(ccAippPaddingMode_t paddingMode, int32_t leftPaddingSize,
-                                         int32_t rightPaddingSize, int32_t topPaddingSize, int32_t bottomPaddingSize);
-
-/**
- * @ingroup dnn
- * @brief check aipp csc info
- * @param [in] cscMatrixRmCn           3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
- * @param [in] cscOutputBiasm          output Bias for RGB to YUV, element of row m
- * @param [in] cscInputBiasm           input Bias for YUV to RGB, element of row m
- * @return ccStatus_t
- */
-ccStatus_t ccCheckConvolutionAippCscInfo(int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, int32_t cscMatrixR0C2,
-                                         int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, int32_t cscMatrixR1C2,
-                                         int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, int32_t cscMatrixR2C2,
-                                         int32_t cscOutputBias0, int32_t cscOutputBias1, int32_t cscOutputBias2,
-                                         int32_t cscInputBias0, int32_t cscInputBias1, int32_t cscInputBias2);
-
-/**
- * @ingroup dnn
- * @brief check aipp scf info
- * @param [in] scfSwitch               scaling enable or not
- * @param [in] scfInputW               input width of scaling
- * @param [in] scfInputH               input height of scaling
- * @param [in] scfOutputW              output width of scaling
- * @param [in] scfOutputH              output height of scaling
- * @return ccStatus_t
- */
-ccStatus_t ccCheckConvolutionAippScfInfo(bool scfSwitch, int32_t scfInputW, int32_t scfInputH, int32_t scfOutputW,
-                                         int32_t scfOutputH);
-
-/**
- * @ingroup dnn
- * @brief check aipp param
- * @param [in] convDesc                descriptor of conv operator
- * @param [in] xDesc                   input tensor info
- * @param [in] yDesc                   output tensor info
- * @return ccStatus_t
- */
-ccStatus_t ccCheckConvFwdAippParam(const ccConvolutionDescriptor_t convDesc, const ccTensorDescriptor_t xDesc,
-                                   const ccTensorDescriptor_t yDesc);
-
-/**
- * @ingroup dnn
- * @brief init aipp basic info
- * @param [in|out] convDesc   descriptor of conv operator
- * @param [in] inputFormat     format of input image
- * @param [in] loadStartPosH   vertical start position in source image
- * @param [in] loadStartPosW   horizontal start position in source image
- * @param [in] srcImageSizeH   vertical size of source image
- * @param [in] srcImageSizeW   horizontal size of source image
- * @param [in] cpaddingValue   C direction padding value
- * @param [in] cscSwitch       csc enable or not
- * @param [in] rbuvSwapSwitch  swap R/U and B/V position of the image
- * @param [in] axSwapSwitch    swap RGBA->ARGB, YUVA->AYUV
- * @param [in] singleLineMode  when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
- * not useful.
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionAippCommInfo(ccConvolutionDescriptor_t convDesc, ccAippInputFormat_t inputFormat,
-                                        int32_t loadStartPosW, int32_t loadStartPosH, int32_t srcImageSizeW,
-                                        int32_t srcImageSizeH, float cpaddingValue, bool cscSwitch, bool rbuvSwapSwitch,
-                                        bool axSwapSwitch, bool singleLineMode);
-/**
- * @ingroup dnn
- * @brief init aipp dtc info
- * @param [in|out] convDesc   descriptor of conv operator
- * @param [in] dtcPixelMeanChnx      Mean value for YUV or RGB data channel x
- * @param [in] dtcPixelMinChnx       Min value for YUV or RGB data channel x
- * @param [in] dtcPixelVarReciChnx   Reciprocal of variance or (max-min) for YUV or RGB data channel x
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionAippDtcInfo(ccConvolutionDescriptor_t convDesc, int32_t dtcPixelMeanChn0,
-                                       int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, float dtcPixelMinChn0,
-                                       float dtcPixelMinChn1, float dtcPixelMinChn2, float dtcPixelVarReciChn0,
-                                       float dtcPixelVarReciChn1, float dtcPixelVarReciChn2);
-/**
- * @ingroup dnn
- * @brief init aipp pad info
- * @param [in|out] convDesc   descriptor of conv operator
- * @param [in] paddingMode              padding mode
- * @param [in] leftPaddingSize          left hblank/padding size
- * @param [in] rightPaddingSize         right hblank/padding size
- * @param [in] topPaddingSize           top padding size
- * @param [in] bottomPaddingSize        bottom padding size
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionAippPadInfo(ccConvolutionDescriptor_t convDesc, ccAippPaddingMode_t paddingMode,
-                                       int32_t leftPaddingSize, int32_t rightPaddingSize, int32_t topPaddingSize,
-                                       int32_t bottomPaddingSize);
-
-/**
- * @ingroup dnn
- * @brief init aipp csc info
- * @param [in|out] convDesc   descriptor of conv operator
- * @param [in] cscMatrixRmCn           3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
- * @param [in] cscOutputBiasm          output Bias for RGB to YUV, element of row m
- * @param [in] cscInputBiasm           input Bias for YUV to RGB, element of row m
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionAippCscInfo(ccConvolutionDescriptor_t convDesc, int32_t cscMatrixR0C0, int32_t cscMatrixR0C1,
-                                       int32_t cscMatrixR0C2, int32_t cscMatrixR1C0, int32_t cscMatrixR1C1,
-                                       int32_t cscMatrixR1C2, int32_t cscMatrixR2C0, int32_t cscMatrixR2C1,
-                                       int32_t cscMatrixR2C2, int32_t cscOutputBias0, int32_t cscOutputBias1,
-                                       int32_t cscOutputBias2, int32_t cscInputBias0, int32_t cscInputBias1,
-                                       int32_t cscInputBias2);
-
-/**
- * @ingroup dnn
- * @brief init aipp scf info
- * @param [in|out] convDesc   descriptor of conv operator
- * @param [in] scfSwitch               scaling enable or not
- * @param [in] scfInputW               input width of scaling
- * @param [in] scfInputH               input height of scaling
- * @param [in] scfOutputW              output width of scaling
- * @param [in] scfOutputH              output height of scaling
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionAippScfInfo(ccConvolutionDescriptor_t convDesc, bool scfSwitch, int32_t scfInputW,
-                                       int32_t scfInputH, int32_t scfOutputW, int32_t scfOutputH);
-
-/**
- * @ingroup dnn
- * @brief set dynamic aipp parameter address and enflag info
- * @param [in|out] convDesc   descriptor of conv operator
- * @param [in] dyncParaAddr            aipp parameter address
- * @param [in] dyncAippFlag            flag to show whether to use dynamic aipp
- * @return ccStatus_t
- */
-ccStatus_t ccSetConvolutionAippDyncParaAddr(ccConvolutionDescriptor_t convDesc, const void *dyncParaAddr,
-                                            bool dyncAippFlag, bool rotationFlag = false);
-
-/**
- * @ingroup dnn
- * @brief check dynamic aipp parameter
- * @param [in] dyncParaAddr            aipp parameter address
- * @param [in] dataLength              parameter lenght
- * @param [in] convolutionDimW            convDimW
- * @param [in] convolutionDimH            convDimH
- * @return ccStatus_t
- */
-ccStatus_t ccCheckDynamicAippParam(const void *dynamicParamAddr, uint32_t dataLength, int64_t convolutionDimW,
-                                   int64_t convolutionDimH);
-
-/*** @ingroup dnn
- * @brief trans mean and var
- * @param [in|out] mean' = bnScale/sqrt(var)
- * @param [in|out] var' = -bnScale * mean / sqrt(var) + bnBias
- * @return ccStatus_t
- */
-
-ccStatus_t ccTransBatchnormMeanAndVar(void *mean, void *var, const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                      const void *alpha, const void *beta, void *bnScale, void *bnBias, double epsilon);
-
-/**
- * @ingroup dnn
- * @brief init deconvolution adj or targetShape info.
- * @param [in] convDesc  conv descriptor.
- * @param [in] adjH, adjust H output.
- * @param [in] adjW, adjust W output.
- * @param [in] targetShape, values of output shape, if this pointer was set, ignore adj.
- * @return ccStatus_t
- */
-ccStatus_t ccSetDeconvolutionOutShapeInfo(ccConvolutionDescriptor_t convDesc, uint32_t adjSize, const uint32_t *adj,
-                                          uint32_t targetShapeSize, const uint32_t *targetShape);
-
-/**
- * @ingroup dnn
- * @brief gather elements according to the indices.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the tensor from which to gather elements.
- * @param [in] x  data point of the tensor from which to gather elements.
- * @param [in] indicesDesc  description of the tensor of indices.
- * @param [in] indices  data point of the tensor of indices.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccGatherNdForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                             const ccTensorDescriptor_t indicesDesc, const void *indices, const void *beta,
-                             const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of gather_nd.
- * @param [in] xDesc  description of the tensor from which to gather elements.
- * @param [in] indicesDesc  description of the tensor of indices.
- * @param [output] n dim-size of n-dim.
- * @param [output] c dim-size of c-dim.
- * @param [output] h dim-size of h-dim.
- * @param [output] w dim-size of w-dim.
- * @param [output] realDimCnt real dim.
- * @return ccStatus_t
- */
-ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, int32_t *n,
-                                  int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc,
-                                  int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief tile tensor by multiples.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the tensor which to be tiled.
- * @param [in] x  data point of the tensor which to be tiled.
- * @param [in] multiples tile coefficient of each dim.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccTileForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                         const ccIntArray_t *multiples, const void *beta, const ccTensorDescriptor_t outputDesc,
-                         void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of tile.
- * @param [in] xDesc  description of the dividend tensor.
- * @param [in] multiples  multiples of each dim.
- * @param [in|out] dimCnt    [point to the output dimCnt]
- * @param [in|out] dim       [arrays to save dims]
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *multiples, int32_t *dimCnt,
-                              int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get output shape of tile.
- * @param [in] xDesc  description of the dividend tensor.
- * @param [in] multiples  multiples of each dim.
- * @param [output] n dim-size of n-dim.
- * @param [output] c dim-size of c-dim.
- * @param [output] h dim-size of h-dim.
- * @param [output] w dim-size of w-dim.
- * @param [output] realDimCnt real dim.
- * @return ccStatus_t
- */
-ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc,
-                              // const ccIntArrayDescriptor_t multiples,
-                              const ccIntArray_t *multiples, int32_t *n, int32_t *c, int32_t *h, int32_t *w,
-                              int32_t *realDimCnt);
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                 int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the dividend tensor.
- * @param [in] x  data point of the dividend tensor.
- * @param [in] yDesc  description of the divisor tensor.
- * @param [in] y  data point of the divisor tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccRealdivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                            const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                            const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the dividend tensor.
- * @param [in] yDesc  description of the divisor tensor.
- * @param [output] n dim-size of n-dim.
- * @param [output] c dim-size of c-dim.
- * @param [output] h dim-size of h-dim.
- * @param [output] w dim-size of w-dim.
- * @param [output] realDimCnt real dim.
- * @return ccStatus_t
- */
-ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *n,
-                                 int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-
-/**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccFloordivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                             const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                             const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] realDimCnt real dim.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetFloordivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                  int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccGreaterForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                            const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                            const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetGreaterOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                 int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccLessForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                         const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                         const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetLessOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                              int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get output shape of LogicalOr.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetLogicalOrOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                   int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get output shape of LogicalXor.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in] dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetLogicalXorOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                    int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief sqrt forward:
- * data type only support bool
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccLogicalNotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                               const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief equal between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-
-ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                          const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief dump data during inference, only for eng ver.
- * @param [in] handle        cce handle
- * @return ccStatus_t
- */
-ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t bufLen, const uint32_t taskIndex);
-
-/**
- * @ingroup dnn
- * @brief logicaland between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccLogicalAndForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                               const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                               const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief logical or between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                              const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                              const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @ingroup dnn
- * @brief logical Xor between two tensors(x ^ y = (x | y) & ~(x & y).
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccLogicalXorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                               const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                               const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of equal.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                               int32_t *dim, int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief get output shape of logicaland.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetLogicalAndOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                    int32_t *dim, int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccFloormodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                             const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                             const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetFloormodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                  int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief compare between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compareType, const void *alpha,
-                            const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
-                            const void *y, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                                 int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of FillParam
- * @param [in|out] fillParamDesc   point to descriptor of fill param
- * @return ccStatus_t
- */
-ccStatus_t ccCreateFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of FillParam
- * @param [in] *fillParamDesc   point to descriptor of fill param
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
-
-/**
- * @ingroup dnn
- * @brief get output shape of broadcat operations.
- * @param [in] inputNum  input number of the operation tensors.
- * @param [in] xDesc[]  description of the input operation tensors list.
- * @param [output] dimCnt dim-size of output tensor.
- * @param [output] dim dim of output tensor.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetMultiNdBroadcastOpOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
-                                            int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get output shape of maximultitensor.
- * @param [in] inputNum  the num of input operator tensors.
- * @param [in] xDesc[]  description of the input operator tensors list.
- * @param [output] dimCnt dim count of output tensor.
- * @param [output] dim array of output tensor.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetMaxMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
-                                        int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get output shape of minmultitensor.
- * @param [in] inputNum  the num of input operator tensors.
- * @param [in] xDesc[]  description of the input operator tensors list.
- * @param [output] dimCnt dim count of output tensor.
- * @param [output] dim array of output tensor.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetMinMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
-                                        int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief MaxMultitensor forward:
- *          data type only support float float16 and int32
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] inputNum     input tensor number
- * @param [in] alpha        common scale factor
- * @param [in] xDesc[]      descriptor of input tensors list
- * @param [in] x[]          input data in device memory list
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccMaxMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
-                                   const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
-                                   const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief MinMultitensor forward:
- *          data type only support float float16 and int32
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] inputNum     input tensor number
- * @param [in] alpha        common scale factor
- * @param [in] xDesc[]      descriptor of input data list
- * @param [in] x[]          input data in device memory list
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccMinMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
-                                   const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
-                                   const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of StridedSlice
- * @param [in|out] stridedSliceDesc   point to descriptor of StridedSlice param
- * @return ccStatus_t
- */
-ccStatus_t ccCreateStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of StridedSlice
- * @param [in] *stridedSliceDesc   point to descriptor of StridedSlice param
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
-
-/**
- * @ingroup dnn
- * @brief init stridedSlice descriptor_t.
- * @param [out] stridedSliceDesc   struct of stridedslice param
- * @param [in] dimCnt    dimension of the input tensor
- * @param [in] begin     slice begin(include)
- * @param [in] end       slice end index(not include)
- * @param [in] strides   slice stride
- * @return ccStatus_t
- */
-ccStatus_t ccSetStridedSliceDescriptor(ccStridedSliceDescriptor_t stridedSliceDesc, int32_t dimCnt, int32_t begin[],
-                                       int32_t end[], int32_t strides[]);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of StridedSlice
- * @param [in|out] stridedSliceDesc   point to descriptor of StridedSlice attr
- * @return ccStatus_t
- */
-ccStatus_t ccCreateStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of StridedSlice
- * @param [in] *stridedSliceDesc   point to descriptor of StridedSlice attr
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
-
-/**
- * @ingroup dnn
- * @brief init stridedSlice mask attrs desescriptor.
- * @param [out] attrDesc   struct of stridedslice mask attrs
- * @param [in] beginMask     begin mask
- * @param [in] endMask       end mask
- * @param [in] ellipsisMask  ellipsis mask
- * @param [in] newAxisMask   new axis mask
- * @param [in] shrinkAxisMask  shrink axis mask
- * @return ccStatus_t
- */
-ccStatus_t ccSetStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t attrDesc, int32_t beginMask,
-                                            int32_t endMask, int32_t ellipsisMask, int32_t newAxisMask,
-                                            int32_t shrinkAxisMask);
-
-/**
- * @ingroup dnn
- * @brief Extracts a strided slice of a tensor.
- * @param [in] xDesc   descriptor of input data
- * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
- * @param [in] attrDesc  reserve for optional attributes.
- * @param [out] n       point to n size
- * @param [out] c       point to c size
- * @param [out] h       point to h size
- * @param [out] w       point to w size
- * @return ccStatus_t
- */
-ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
-                                      const ccStridedSliceDescriptor_t stridedSliceDesc,
-                                      const ccStridedSliceAttrsDescriptor_t attrDesc, int32_t *n, int32_t *c,
-                                      int32_t *h, int32_t *w, int32_t *realDimCnt);
-
-/**
- * @ingroup dnn
- * @brief Extracts a strided slice of a tensor.
- * @param [in] handle  cce handle
- * @param [in] stridedSliceDesc specifies the  begin, end, strides of slice
- * @param [in] attrDesc  reserve for optional attributes.
- * @param [in] alpha   common scale factor
- * @param [in] xDesc   descriptor of input data
- * @param [in] x   input data in device memory
- * @param [in] beta    common scale factor
- * @param [in] yDesc   descriptor of output data
- * @param [in|out] y   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccStridedSliceForward(ccHandle_t handle, const ccStridedSliceDescriptor_t stridedSliceDesc,
-                                 const ccStridedSliceAttrsDescriptor_t attrDesc, const void *alpha,
-                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                 const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @
- * @brief get out put descrition of slice tensor.
- * @param [in] xDesc         descriptor of input data
- * @param [in] begin         begin position of tensor
- * @param [in] size          size to slice
- * @param [out] n            point to n size
- * @param [out] c            point to c size
- * @param [out] h            point to h size
- * @param [out] w            point to w size
- * @param [out] realDimCnt   realdim count
- * @return ccStatus_t
- */
-ccStatus_t ccGetSliceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *begin, const ccIntArray_t *size,
-                               int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-
-/**
- * @ingroup dnn
- * @brief slice of a tensor.
- * @param [in] handle  cce handle
- * @param [in] alpha   common scale factor
- * @param [in] xDesc   descriptor of input data
- * @param [in] x       input data in device memory
- * @param [in] begin   begin position of tensor
- * @param [in] size    size to slice
- * @param [in] beta    common scale factor
- * @param [in] yDesc   descriptor of output data
- * @param [in|out] y   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSliceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const ccIntArray_t *begin, const ccIntArray_t *size, const void *beta,
-                          const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief gather forward computation
- * @param [in] handle            cce handle
- * @param [in] paramsDesc        descriptor of params tensor
- * @param [in] params            input data in device memory
- * @param [in] indicesDesc       descriptor of indices tensor
- * @param [in] indices           indices data in device memory
- * @param [in] axis              descriptor of roi tensor
- * @param [in] alpha             reserved
- * @param [in] beta              reserved
- * @param [in] outputDesc        descriptor of output tensor
- * @param [out] output           output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccGatherForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc,
-                           const void *params, const ccTensorDescriptor_t indicesDesc, const void *indices,
-                           const int32_t axis, const void *beta, ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief gather output dim computation, for NC1HWC0
- * @param [in] paramsDesc        descriptor of params tensor
- * @param [in] indicesDesc       descriptor of indices tensor
- * @param [in] axis              descriptor of roi tensor
- * @param [out] n                dim of n
- * @param [out] c                dim of c
- * @param [out] h                dim of h
- * @param [out] w                dim of w
- * @param [out] realDimCnt       real dim count
- * @return ccStatus_t
- */
-ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
-                                int32_t axis, int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-
-/**
- * @ingroup dnn
- * @brief gather output dim computation
- * @param [in] paramsDesc        descriptor of params tensor
- * @param [in] indicesDesc       descriptor of indices tensor
- * @param [in] axis              descriptor of roi tensor
- * @param [out] dimCnt           dimcnt of output
- * @param [out] dim              dim of output
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
-                                int32_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief exp forward computation
- * @param [in] handle                    cce handle
- * @param [in] expDesc                   descriptor of expParam
- * @param [in] expParam                  a ternary array
- * @param [in] alpha                     reserved parameter
- * @param [in] xDesc                     descriptor of input tensor
- * @param [in] x                         input data in device memory
- * @param [in] beta                      reserved parameter
- * @param [in] yDesc                     descriptor of output tensor
- * @param [out] y                        output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccExpForward(ccHandle_t handle, const ccExpDescriptor_t expDesc, const void *expParam, const void *alpha,
-                        const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                        const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief expm1 forward:
- *          data type only support float float16 and double
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccExpm1Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief log1p forward:
- *          data type only support float float16 and double
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccLog1pForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief init descriptor for parameter of exp function
- * @param [in|out] powDesc   descriptor of tensor
- * @param [in] dataType   data type in device
- * @param [in] paramCnt   number of parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetExpDescriptor(ccExpDescriptor_t expDesc, ccDataType_t dataType, uint32_t paramCnt);
-
-/**
- * @ingroup dnn
- * @brief exp forward computation
- * @param [in] handle                    cce handle
- * @param [in] logDesc                   descriptor of logParam
- * @param [in] logParam                  a ternary array
- * @param [in] alpha                     reserved parameter
- * @param [in] xDesc                     descriptor of input tensor
- * @param [in] x                         input data in device memory
- * @param [in] beta                      reserved parameter
- * @param [in] yDesc                     descriptor of output tensor
- * @param [in] y                         output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccLogForward(ccHandle_t handle, const ccLogDescriptor_t logDesc, const void *logParam, const void *alpha,
-                        const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                        const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief init descriptor for parameter of log function
- * @param [in|out] logDesc   descriptor of tensor
- * @param [in] dataType   data type in device
- * @param [in] paramCnt   number of parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetLogDescriptor(ccLogDescriptor_t logDesc, ccDataType_t dataType, uint32_t paramCnt);
-
-/**
- * @ingroup dnn
- * @brief pow forward computation
- * @param [in] handle                    cce handle
- * @param [in] powDesc                   descriptor of logParam
- * @param [in] powParam                  a ternary array
- * @param [in] alpha                     reserved parameter
- * @param [in] xDesc                     descriptor of input tensor
- * @param [in] x                         input data in device memory
- * @param [in] beta                      reserved parameter
- * @param [in] yDesc                     descriptor of input tensor
- * @param [in] y                         input data in device memory
- * @param [in] zDesc                     descriptor of output tensor
- * @param [out] z                        output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t powDesc, const void *powParam, const void *alpha,
-                        const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
-                        const void *y, const void *beta, const ccTensorDescriptor_t zDesc, void *z);
-
-/**
- * @brief init descriptor for parameter of pow function
- * @param [in|out] powDesc   descriptor of tensor
- * @param [in] dataType   data type in device
- * @param [in] paramCnt   number of parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t powDesc, ccDataType_t dataType, uint32_t paramCnt);
-
-/**
- * @ingroup dnn
- * @brief non max suppression forward.
- * @param [in] handle                 cce handle
- * @param [in] nonmaxParaDesc         descriptor of para
- * @param [in] nonmaxPara             input para in host memory
- * @param [in] maxoutputsizex         input para in host memory
- * @param [in] alpha                  common scale factor
- * @param [in] boxesDesc              descriptor of input data boxesDesc
- * @param [in] boxes                  input data boxes in device memory
- * @param [in] scoresDesc             descriptor of input data boxesDesc
- * @param [in] scores                 input data scores in device memory
- * @param [in] workSpaceSizeInBytes   workspace size
- * @param [in] workSpace              input workspace in device memory
- * @param [in] beta                   common scale factor
- * @param [in] outputDesc             descriptor of output data
- * @param [in|out] output             output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccNonMaxSuppressionForward(ccHandle_t handle, const ccNonMaxSuppressionDescriptor_t nonmaxParaDesc,
-                                      const void *nonmaxPara, const int *maxoutputsize, const void *alpha,
-                                      const ccTensorDescriptor_t boxesDesc, const void *boxes,
-                                      const ccTensorDescriptor_t scoresDesc, const void *scores,
-                                      const uint32_t workSpaceSizeInBytes, void *workSpace, const void *beta,
-                                      const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @brief init descriptor for parameter of NonMaxSuppression function
- * @param [in|out] powDesc   descriptor of tensor
- * @param [in] dataType   data type in device
- * @param [in] paramCnt   number of parameters
- * @return ccStatus_t
- */
-ccStatus_t ccSetNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t nonMaxSuppressionDesc,
-                                            ccDataType_t dataType, uint32_t paramCnt);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of resizeBilinear op.
- * @param [in] xDesc                    descriptor of input data
- * @param [in] resizeBilinearDesc       descriptor of resize_bilinear operator
- * @param [out] dimCnt
- * @param [out] dim[]                   dim of output
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetResizeBilinearOutputDim(const ccTensorDescriptor_t xDesc,
-                                        const ccResizeBilinearDescriptor_t resizeBilinearDesc, int32_t *dimCnt,
-                                        int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of interp op.
- * @param [in] xDesc                    descriptor of input data
- * @param [in] resizeBilinearDesc       descriptor of resize_bilinear operator
- * @param [out] dimCnt
- * @param [out] dim[]                   dim of output
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetInterpOutputDim(const ccTensorDescriptor_t xDesc, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
-                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief resize bilinear forward for t network.
- * @param [in] handle    cce handle
- * @param [in] resizeBilinearDesc   descriptor of resize_bilinear operator
- * @param [in] alpha     common scale factor
- * @param [in] xDesc     descriptor of input data
- * @param [in] x         input data in device memory
- * @param [in] beta      common scale factor
- * @param [in] yDesc     descriptor of output data
- * @param [in|out] y     output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccResizeBilinearForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
-                                   const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                   const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief resize bilinear forward for c network.
- * @param [in] handle    cce handle
- * @param [in] resizeBilinearDesc   descriptor of resize_bilinear operator
- * @param [in] alpha     common scale factor
- * @param [in] xDesc     descriptor of input data
- * @param [in] x         input data in device memory
- * @param [in] beta      common scale factor
- * @param [in] yDesc     descriptor of output data
- * @param [in|out] y     output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccInterpForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, const void *alpha,
-                           const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                           const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of ResizeBilinear
- * @param [in|out] resizeBilinearDesc   point to descriptor of resizeBilinear attr
- * @return ccStatus_t
- */
-ccStatus_t ccCreateResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of Interp
- * @param [in|out] resizeBilinearDesc   point to descriptor of resizeBilinear attr
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
-
-/**
- * @ingroup dnn
- * @brief set descriptor of resizeBilinear.
- * @param [in|out] resizeBilinearDesc   descriptor of resize_bilinear operator
- * @param [in] resizeOutputDimMode      way to decide output dimensions
- * @param [in] alignCorners             whether the centers of input and output are aligned
- * @param [in] zoom_factor              zoom factor
- * @param [in] shrink_factor            shrink factor
- * @param [in] height                   height of output
- * @param [in] width                    width of output
- * @param [in] pad_begin                padding at begin of input
- * @param [in] pad_end                  padding at end of input
- * @return ccStatus_t
- */
-ccStatus_t ccSetResizeBilinearDescriptor(ccResizeBilinearDescriptor_t resizeBilinearDesc,
-                                         ccResizeOutputDimMode_t resizeOutputDimMode, bool alignCorners,
-                                         int32_t zoom_factor, int32_t shrink_factor, int32_t height, int32_t width,
-                                         int32_t pad_begin, int32_t pad_end);
-
-/**
- * @ingroup dnn
- * @brief fill forward computation
- * @param [in] handle                  cce handle
- * @param [in] fillParamDesc           descriptor of fill parameter
- * @param [in] alpha                   reserved
- * @param [in] givenDesc               descriptor of given tensor
- * @param [in] givenData               given data in device memory
- * @param [in] workspace               space for fill algorithm
- * @param [in] workSpaceSizeInBytes    space size in byte
- * @param [in] beta                    reserved
- * @param [in] outputDesc              descriptor of output tensor
- * @param [out] output                 output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccFillForward(ccHandle_t handle, const ccFillParamDescriptor_t fillParamDesc, const void *alpha,
-                         const ccTensorDescriptor_t givenDesc, const void *givenData, const void *workspace,
-                         const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
-                         void *output);
-
-/**
- * @ingroup dnn
- *[ccGetFillWorkspaceSize]
- *@param fillType         [fill type]
- *@param givenDesc        [given tensor descriptor]
- *@param xDesc            [input tensor descriptor]
- *@param sizeInBytes      [output size]
- *@return ccStatus_t      [status]
- */
-ccStatus_t ccGetFillWorkspaceSize(const ccFillOpType_t fillType, const ccTensorDescriptor_t xDesc,
-                                  uint32_t *sizeInBytes);
-
-/**
- *[ccCast]
- *@param handle     [cce handler]
- *@param alpha       [alpha]
- *@param xDesc      [tensor Description of tensor x]
- *@param x             [input tensor x]
- *@param beta         [beta
- *@param yDesc      [tensor Description of tensor y]
- *@param y             [output tensor y]
- *@return ccStatus_t  [status]
- */
-ccStatus_t ccCast(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                  const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief round forward:
- *          data type only support float float16 and int32
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccRoundForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief rint forward:
- *          data type only support float float16
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccRintForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief sqrt forward:
- *          data type only support float float16
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSqrtForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- *[ccCast]
- *@param filterSrcInfo     [cce filtersrc descriptor]
- *@param filterSrc       [filterSrc address]
- *@param filterDstInfo      [cce filterdst descriptor]
- *@param filterDst             [filterdst address]
- *@param group         [group]
- *@param ySizeInBytes      [fraczfilter size]
- *@param outputDataType            [datatype]
- *@return ccStatus_t  [status]
- */
-ccStatus_t ccTransGroupConvFilterInt8(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
-                                      ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
-                                      uint32_t ySizeInBytes, ccDataType_t outputDataType);
-
-/**
- *[ccGetConcatOutputDim]
- *@param xDesc[]     [input tensor descriptor]
- *@param axis        [concat axis]
- *@param inputNum    [input tensor numbers]
- *@param dim[]       [output dim]
- *@param [in| dimlen        length of dim
- *@return ccStatus_t [status]
- */
-ccStatus_t ccGetConcatOutputDim(const ccTensorDescriptor_t xDesc[], int32_t axis, int32_t inputNum, int32_t *dimCnt,
-                                int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of reduce.
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] axis             The dimensions to reduce
- * @param [in] keepDims         If true, retains reduced dimensions with length 1.
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetReduceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *axis, bool keepDims,
-                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief reduce sum forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceSumForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
-                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                              const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reduce max forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceMaxForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
-                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                              const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reduce min forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceMinForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
-                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                              const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reduce mean forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceMeanForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
-                               const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                               const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reduce prod forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceProdForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
-                               const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                               const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reduce all forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceAllForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
-                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                              const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- *@brief print times stats
- *@return ccStatus_t  [status]
- */
-ccStatus_t ccPrintTimeStat();
-
-/**
- * @ingroup dnn
- * @brief reduce abs sum forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceAbsSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
-                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                 const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reduce square sum forward computation
- * @param [in] handle          cce handle
- * @param [in] axis            The dimensions to reduce
- * @param [in] keepDims        If true, retains reduced dimensions with length 1.
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReduceSquareSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
-                                    const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                    const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of crop and resize
- * @param [in] imageDesc             descriptor of images
- * @param [in] boxesDesc             descriptor of boxes
- * @param [in] boxidxDesc            descriptor of boxidx
- * @param [in] resizeHeight          resize height
- * @param [in] resizeWidth           resize width
- * @param [out] dimCnt               dimcnt of output
- * @param [out] dim                  dim of output
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetCropAndResizeOutputDim(const ccTensorDescriptor_t imageDesc, const ccTensorDescriptor_t boxesDesc,
-                                       const ccTensorDescriptor_t boxidxDesc, const int32_t resizeHeight,
-                                       const int32_t resizeWidth, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief crop and resize forward.
- * @param [in] handle                cce handle
- * @param [in] alpha                 common scale factor
- * @param [in] imageDesc             descriptor of images
- * @param [in] image                 input data in device memory
- * @param [in] boxesDesc             descriptor of boxes
- * @param [in] boxes                 input data in device memory
- * @param [in] boxidxDesc            descriptor of boxidx
- * @param [in] boxidx                input data in device memory
- * @param [in] method                enum of resize method
- * @param [in] extrapolationValue    Value used for extrapolation, when applicable
- * @param [in] beta                  common scale factor
- * @param [in] outputDesc            descriptor of output data
- * @param [out] output               output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccCropAndResizeForward(ccHandle_t handle, const ccResizeMethod_t method, const float extrapolationValue,
-                                  const void *alpha, const ccTensorDescriptor_t imageDesc, const void *image,
-                                  const ccTensorDescriptor_t boxesDesc, const void *boxes,
-                                  const ccTensorDescriptor_t boxidxDesc, const void *boxidx, const void *beta,
-                                  const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief select forward computation
- * @param [in] handle            cce handle
- * @param [in] alpha             reserved
- * @param [in] condDesc          descriptor of cond tensor
- * @param [in] cond              cond data in device memory
- * @param [in] xDesc             descriptor of x tensor
- * @param [in] x                 x data in device memory
- * @param [in] yDesc             descriptor of y tensor
- * @param [in] y                 y data in device memory
- * @param [in] beta              reserved
- * @param [in] outputDesc        descriptor of output tensor
- * @param [out] output           output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSelect(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t condDesc, const void *cond,
-                    const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, const void *y,
-                    const void *beta, const ccTensorDescriptor_t outDesc, void *out);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of where
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @return ccStatus_t
- */
-ccStatus_t ccGetWhereOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief where forward computation
- * @param [in] handle            cce handle
- * @param [in] alpha             reserved
- * @param [in] condDesc          descriptor of cond tensor
- * @param [in] cond              cond data in device memory
- * @param [in] xDesc             descriptor of x tensor
- * @param [in] x                 x data in device memory
- * @param [in] yDesc             descriptor of y tensor
- * @param [out] y                y data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccWhere(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                   const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief reverse forward.
- * @param [in] handle       cce handle
- * @param [in] axis         dim that need reverse
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReverseForward(ccHandle_t handle, const ccIntArray_t *axis, const void *alpha,
-                            const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                            const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief floor forward:
- *          data type only support float float16
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccFloorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief ceil forward:
- *          data type only support float float16
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccCeilForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of truncate mod
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] yDesc            descriptor of input tensor
- * @param [out] dimCnt        [dim count of the output tensor]
- * @param [out] dim[]         [shape of the output tensor]
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetTruncatemodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc,
-                                     int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief truncate mod forward computation
- * @param [in] handle          cce handle
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] yDesc           descriptor of input tensor
- * @param [in] y               input data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [out] output         output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccTruncatemodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                                const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @ingroup dnn
- * @brief Spatial Pyramid Pooling
- * @param [in] handle                cce handle
- * @param [in] alpha                 reserved
- * @param [in] xDesc                 descriptor of input tensor
- * @param [in] x                     input data in device memory
- * @param [in] workspace             temp workspace
- * @param [in] workspaceSizeInBytes  temp workspace size
- * @param [in] pyramidHeight         pyramid height
- * @param [in] poolingMode           pooling mode
- * @param [in] beta                  reserved
- * @param [in] outputDesc            descriptor of output tensor
- * @param [out] output               output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSPPForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                        void *workspace, const uint32_t workspaceSizeInBytes, const uint32_t pyramidHeight,
-                        const ccPoolingMode_t poolingMode, const void *beta, const ccTensorDescriptor_t outputDesc,
-                        void *output);
-/**
- * @ingroup dnn
- * @brief Get Spatial Pyramid Pooling output dim
- * @param [in] xDesc                 descriptor of input tensor
- * @param [in] pyramidHeight         pyramid height
- * @param [in] dimLen                length of dim
- * @param [out] dimCnt               output tensor dim cnt
- * @param [out] dim                  output tensor dim
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetSPPOutputDim(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, int32_t *dimCnt,
-                             int32_t dim[], const int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief Get Spatial Pyramid Pooling workspace size
- * @param [in] xDesc                 descriptor of input tensor
- * @param [in] pyramidHeight         pyramid height
- * @param [out] workspaceSizeInBytes workspace size
- * @return ccStatus_t
- */
-ccStatus_t ccGetSPPWorkspaceSize(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight,
-                                 uint32_t *workspaceSizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief BNLL forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccBNLLForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief bias forward.
- * @param [in] handle       cce handle
- * @param [in] axis            axis
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data x
- * @param [in] x            input data x in device memory
- * @param [in] biasDesc        descriptor of input data bias
- * @param [in] bias            input data bias in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccBiasForward(ccHandle_t handle, const int axis, const void *alpha, const ccTensorDescriptor_t xDesc,
-                         const void *x, const ccTensorDescriptor_t biasDesc, const void *bias, const void *beta,
-                         const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief threshold forward computation
- * @param [in] handle           cce handle
- * @param [in] threshold        threshold
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccThresholdForward(ccHandle_t handle, const void *threshold, const void *alpha,
-                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                              const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief shufflechannel forward.
- * @param [in] handle    cce handle
- * @param [in] alpha     common scale factor
- * @param [in] group     number of groups
- * @param [in] xDesc     descriptor of input data
- * @param [in] x         input data in device memory
- * @param [in] beta      common scale factor
- * @param [in] outputDesc     descriptor of output data
- * @param [in|out] output     output data in device memory
- * @return ccStatus_t
- */
-// TODO AICPU: please add shufflechannel custom params and comment
-ccStatus_t ccShuffleChannelForward(ccHandle_t handle, const void *alpha, uint32_t group,
-                                   const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                   const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief mvn forward.
- * @param [in] handle               cce handle
- * @param [in] acrossChannel        across channel. true: across, false: not
- * @param [in] normalizeVariance    normalizeVariance. true: normalizeVariance, false: not
- * @param [in] alpha                common scale factor
- * @param [in] xDesc                descriptor of input data
- * @param [in] x                    input data in device memory
- * @param [in] beta                 common scale factor
- * @param [in] outputDesc           descriptor of output data
- * @param [in|out] output           output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccMVNForward(ccHandle_t handle, bool acrossChannel, bool normalizeVariance, const void *alpha,
-                        const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, uint32_t workSpaceSizeInBytes,
-                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get the workspace size of mvn
- * @param [in] xDesc                descriptor of input data
- * @param [in] acrossChannel        across channel. true: across, false: not
- * @param [in|out] sizeInBytes      Workspace size need for whole computation
- */
-ccStatus_t ccGetMVNWorkspaceSize(const ccTensorDescriptor_t xDesc, bool acrossChannel, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief heatmap2coord forward output is hotspot value and corresponding coordinates
- * @param [in] handle        cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x        input data in device memory
- * @param [in] coordh       calibration high
- * @param [in] coordw       calibration wide
- * @param [in] beta        common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccHeatmap2coordForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                  int32_t coordh, int32_t coordw, const void *beta,
-                                  const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @ingroup dnn
- * @brief get the output dimension info of heatmap2coord
- * @param [in] xDesc           descriptor of input tensor
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim           arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetHeatmap2coordOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief swish forward.
- * @param [in] handle           cce handle
- * @param [in] scale            param of swish function, y = x / (1 + sigmoid(scale * x))
- * @param [in] alpha            common scale factor
- * @param [in] xDesc            descriptor of input data
- * @param [in] x                input data in device memory
- * @param [in] beta             common scale factor
- * @param [in] outputDesc       descriptor of output data
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccSwishForward(ccHandle_t handle, const float scale, const void *alpha, const ccTensorDescriptor_t xDesc,
-                          const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-ccStatus_t ccTeForward(ccHandle_t handle, const void *stubFunc, uint32_t coreDim, const void *args, uint32_t argsSize,
-                       const rtL2Ctrl_t *l2ctrl, int32_t inputNum, const ccTensorDescriptor_t xDesc[], const void *x[],
-                       int32_t outputNum, const ccTensorDescriptor_t yDesc[], void *y[], bool isAiCore);
-
-#ifndef DAVINCI_LITE
-ccStatus_t ccAiCpuCustomizeForward(ccHandle_t handle, aicpu_run_func stubFunc, opTensor_t *xOpDesc[], void *x[],
-                                   int32_t inputNum, opTensor_t *yOpDesc[], void *y[], void *op_attr_handle,
-                                   int32_t outputNum, const ccTensorDescriptor_t xDesc[],
-                                   const ccTensorDescriptor_t yDesc[], const void *op_attr_str, uint32_t op_attr_size);
-#endif
-/**
- * @ingroup dnn
- * @brief embedding lookup forward.
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data x
- * @param [in] x            input data x in device memory
- * @param [in] idxDesc        descriptor of input data idx
- * @param [in] idx            input data idx in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
-                                    const void *x, const ccTensorDescriptor_t idxDesc, const void *idx,
-                                    const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup
- * @brief embedding lookup forward.
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] inputNum   inputNum
- * @param [in] xDesc[]        descriptor array of input data x
- * @param [in] x[]            input data x array in device memory
- * @param [in] workSpace    workSpace addr
- * @param [in] workSpaceSizeInBytes    workSpace size
- * @param [in] idxDesc        descriptor of input data idx
- * @param [in] idx            input data idx in device memory
- * @param [in] partitionStrategy  partitionStrategy
- * @param [in] maxNorm            addr of maxNorm
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const int32_t inputNum,
-                                    const ccTensorDescriptor_t xDesc[], const void *x[], void *workSpace,
-                                    const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t idxDesc,
-                                    const void *idx, ccPartitionStrategy_t partitionStrategy, const void *maxNorm,
-                                    const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- *[ccGetEmbeddingLookupOutputDim]
- *@param inputNum    [input tensor numbers]
- *@param xDesc[]     [input tensor descriptor]
- *@param idxDesc     [idx tensor descriptor]
- *@param dimCnt      [output dim count]
- *@param dim[]       [output dim]
- *@param [in| dimlen        length of dim
- *@return ccStatus_t [status]
- */
-ccStatus_t ccGetEmbeddingLookupOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[],
-                                         const ccTensorDescriptor_t idxDesc, int32_t *dimCnt, int32_t dim[],
-                                         int32_t dimLen);
-
-/**
- * @ingroup dnn
- *[ccGetEmbeddingLookupWorkspaceSize]
- *@param inputNum    [input tensor numbers]
- *@param idxDesc      [input tensor descriptor]
- *@param isMaxNormExist      [isMaxNormExist]
- *@param sizeInBytes      [output size]
- *@return ccStatus_t [status]
- */
-ccStatus_t ccGetEmbeddingLookupWorkspaceSize(const int32_t inputNum, const ccTensorDescriptor_t idxDesc,
-                                             const bool isMaxNormExist, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief check if it is the first layer of resnet50 and semecefc
- * @param [in] tensorDesc           descriptor of input tensor.
- * @param [in] convDesc             conv descriptor.
- * @param [in] filterDesc           descriptor of weight tensor.
- * @return ccStatus_t
- */
-ccStatus_t c04DescParamCheck(const ccTensorDescriptor_t tensorDesc, const ccConvolutionDescriptor_t convDesc,
-                             const ccFilterDescriptor_t filterDesc);
-
-#ifndef DAVINCI_LITE
-/**
- * @ingroup dnn
- * @brief convolution forward computation
- * @param [in] handle   cce handle
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in] alpha   scaling factors
- * @param [in] beta   scaling factors
- * @param [in] xDesc  x descriptor of input tensor
- * @param [in] x   x data in device memory
- * @param [in] dyDesc   descriptor of dy
- * @param [in] dy   dy data in device memory
- * @param [in] dwDesc   descriptor of dwDesc
- * @param [out] dw   dw data in device memory
- * @param [in] algo   algorithm of convolution forward
- * @param [in] workSpace   temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes   sizeof workspace
- * @return ccStatus_t
- */
-ccStatus_t ccConvolutionBackwardFilter(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, void *alpha,
-                                       void *beta, const ccTensorDescriptor_t xDesc, const void *x,
-                                       const ccTensorDescriptor_t dyDesc, const void *dy,
-                                       const ccFilterDescriptor_t dwDesc, void *dw, ccConvolutionBwdAlgo_t algo,
-                                       void *workSpace, uint32_t workSpaceSizeInBytes);
-#endif
-
-/**
- * @ingroup dnn
- * @brief get the temp space size of convolution forward computation, maybe no need temp space
- * @param [in] handle   cce handle
- * @param [in] dyDesc   descriptor of input tensor dy
- * @param [in] convDesc   descriptor of convolution operator
- * @param [in] xDesc   descriptor of input tensor
- * @param [in] dwDesc   descriptor of filter
- * @param [in] algo   algorithm of convolution forward
- * @param [in|out] sizeInBytes   temp space size need for specified algorithm
- * @return ccStatus_t
- */
-ccStatus_t ccGetConvolutionBackwardFilterWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t dyDesc,
-                                                       const ccConvolutionDescriptor_t convDesc,
-                                                       const ccTensorDescriptor_t xDesc,
-                                                       const ccFilterDescriptor_t dwDesc, ccConvolutionBwdAlgo_t algo,
-                                                       uint32_t *sizeInBytes);
-
-#ifndef DAVINCI_LITE
-ccStatus_t ccBatchNormalizationBackward(ccHandle_t handle, ccBatchNormMode_t mode, const void *alphaDataDiff,
-                                        const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff,
-                                        const ccTensorDescriptor_t xDesc, const void *x,
-                                        const ccTensorDescriptor_t dyDesc, const void *dy,
-                                        const ccTensorDescriptor_t dxDesc, void *dx,
-                                        const ccTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale,
-                                        void *resultBnScaleDiff, void *resultBnBiasDiff, const void *workSpace,
-                                        const uint32_t workSpaceSizeInBytes, double epsilon, const void *SaveMean,
-                                        const void *SaveInvVariance);
-#endif
-
-ccStatus_t ccGetBatchNormalizationBackwardWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
-                                                        ccTensorDescriptor_t xDesc, ccTensorDescriptor_t dyDesc,
-                                                        ccTensorDescriptor_t dxDesc,
-                                                        ccTensorDescriptor_t bnScaleBiasDesc, uint32_t *sizeInBytes);
-
-#ifndef DAVINCI_LITE
-ccStatus_t ccBatchNormalizationForwardTraining(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha,
-                                               const void *beta, const ccTensorDescriptor_t xDesc, const void *x,
-                                               const ccTensorDescriptor_t yDesc, void *y,
-                                               const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
-                                               const void *bnBias, double exponentialAverageFactor,
-                                               void *resultRunningMean, void *resultRunningVariance, void *workSpace,
-                                               uint32_t workSpaceSizeInBytes, double epsilon, void *resultSaveMean,
-                                               void *resultSaveInvVariance, const bool isTraining);
-#endif
-
-ccStatus_t ccGetBatchNormalizationForwardTrainingWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
-                                                               ccTensorDescriptor_t xDesc, ccTensorDescriptor_t yDesc,
-                                                               const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                                               uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief generate an random normal Tensor use given on/off scale.
- * @param [in] handle        Stream handle.
- * @param [in] alpha         reserved.
- * @param [in] meanDesc      Mean description of one-hot position.
- * @param [in] mean          Data pointer of mean.
- * @param [in] scaleDesc     On/off scale description.
- * @param [in] scale         Data pointer of on/off scale.
- * @param [in] seed          random seed used to generate random number
- * @param [in] seed2         random seed used to generate random number
- * @param [in] beta          reserved.
- * @param [in] outputDesc    Description of the generated one-hot tensor.
- * @param [output] output    Data pointer of output.
- * @return ccStatus_t
- */
-ccStatus_t ccRandomNormalForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t meanDesc,
-                                 const void *mean, const ccTensorDescriptor_t scaleDesc, const void *scale,
-                                 const int64_t seed1, const int64_t seed2, const void *beta,
-                                 const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief generate random uniform tensor.
- * @param [in] handle        Stream handle.
- * @param [in] alpha         reserved.
- * @param [in] minvalDesc    Mean description of one-hot position.
- * @param [in] minval        Data pointer of mean.
- * @param [in] maxvalDesc    On/off scale description.
- * @param [in] maxval        Data pointer of on/off scale.
- * @param [in] seed          random seed used to generate random number
- * @param [in] seed2         random seed used to generate random number
- * @param [in] beta          reserved.
- * @param [in] outputDesc    Description of the generated one-hot tensor.
- * @param [output] output    Data pointer of output.
- * @return ccStatus_t
- */
-ccStatus_t ccRandomUniformForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t minvalDesc,
-                                  const void *minval, const ccTensorDescriptor_t maxvalDesc, const void *maxval,
-                                  const int64_t seed1, const int64_t seed2, const void *beta,
-                                  const ccTensorDescriptor_t outputDesc, void *output);
-
-/**^M
- * @ingroup dnn^M\r	10932
- * @brief generate BatchMatMul tensor.^M\r	10933
- * @param [in] handle        Stream handle.^M\r	10934
- * @param [in] alpha         reserved.^M\r	10935
- * @param [in] xDesc         tensorA Desc.^M\r	10936
- * @param [in] x             Data pointer of tensorA.^M\r	10937
- * @param [in] yDesc         tensorB Desc.^M\r	10938
- * @param [in] y             Data pointer of tensorB.^M\r	10939
- * @param [in] beta          reserved.^M\r	10940
- * @param [in] adj_x         tensorA transpose flag^M\r	10941
- * @param [in] adj_y         tensorB transpose flag^M\r	10942
- * @param [in] outpDesc      Description of the tensor output .^M\r	10943
- * @param [output] out       Data pointer of output.^M\r	10944
- * @return ccStatus_t^M
- */
-ccStatus_t ccBatchMatMulForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                const ccTensorDescriptor_t yDesc, const void *y, const void *beta, const bool adj_x,
-                                const bool adj_y, const ccTensorDescriptor_t outDesc, void *out);
-
-ccStatus_t ccGetBatchMatMulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, bool adj_x,
-                                     bool adj_y, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief generator conv int8 all offset factor
- * @param [in] para   the struct for scale and offset of input, filter and output
- * @param [in|out] offsetW   offset of filter
- * @param [in|out] offsetPad   offset of input
- * @param [in|out] scaledQrq   scale computing result of input , filter and output
- * @param [in|out] nextoffsetq   offset of output
- * @return ccStatus_t
- */
-ccStatus_t ccGenQuantAllOffsetFactor(const ccQuantAllOffsetPara_t *para, uint8_t &offsetW, uint8_t &offsetPad,
-                                     uint16_t &scaledQrq, uint16_t &nextoffsetq);
-
-/**
- * @ingroup dnn
- * @brief get conv int8 all offset fracZ size
- * @param [in] filterDesc   descriptor of filter tensor
- * @param [in|out] conv int8 all offset fracZ size
- * @param [in] groupNum   group conv num
- * @return ccStatus_t
- */
-ccStatus_t ccSetGroupConvScene(const ccFilterDescriptor_t tensorDesc, ccConvolutionDescriptor_t convDesc);
-
-ccStatus_t ccGetInt8AllOffsetFilterFracZSizeInBytes(const ccFilterDescriptor_t filterSrcDesc,
-                                                    const ccFilterDescriptor_t filterDesc, uint32_t &size,
-                                                    uint32_t groupNum);
-
-/**
- * @ingroup dnn
- * @brief transform filter in conv int8 all offset scene
- * @param [in] filterSrcInfo    descriptor of filter tensor before fracZ transform
- * @param [in] filterSrc        filter addr before fracZ transform
- * @param [in] filterDstInfo   descriptor of filter tensor after fracZ transform
- * @param [in] filterDst   filter addr after fracZ transform
- * @param [in] quantPara   the struct for scale and offset of input, filter and output
- * @param [in] ySizeInBytes   filter size after fracZ transform
- * @param [in|out] outputDataType   output data type
- * @param [in] groupNum   group conv num
- * @return ccStatus_t
- */
-ccStatus_t ccTransFilterInt8AllOffset(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
-                                      ccFilterDescriptor_t filterDstInfo, void *filterDst,
-                                      const ccQuantAllOffsetPara_t *quantPara, uint32_t ySizeInBytes,
-                                      ccDataType_t outputDataType, uint32_t groupNum);
-
-/**
- * @ingroup dnn
- * @brief transform bias in conv int8 all offset scene
- * @param [in] filterDesc    descriptor of filter tensor
- * @param [in] biasDesc     descriptor of bias tensor
- * @param [in] quantPara   the struct for scale and offset of input, filter and output
- * @param [in] w      filter addr
- * @param [in] bias   bias addr
- * @return ccStatus_t
- */
-ccStatus_t ccTransInt8AllOffsetBias(const ccFilterDescriptor_t filterDesc, const ccTensorDescriptor_t biasDesc,
-                                    const ccQuantAllOffsetPara_t *quantPara, const void *w, const void *bias);
-
-/**
- * @ingroup dnn
- * @get dequantize
- * @param [in] handle  handle id
- * @param [in] alpha  alpha addr
- * @param [in] xDesc the input Desc  descriptor
- * @param [in] x   x data addr
- * @param [in] beta beta data addr
- * @param [in] yDesc the output Desc  descriptor
- * @param [in] y   y data addr
- * @return ccStatus_t
- */
-ccStatus_t ccDequantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
-                                   const void *x, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-/**
- * @ingroup dnn
- * @get quantize
- * @param [in] handle  handle id
- * @param [in] alpha  alpha addr
- * @param [in] xDesc the input Desc  descriptor
- * @param [in] x   x data addr
- * @param [in] beta beta data addr
- * @param [in] yDesc the output Desc  descriptor
- * @param [in] y   y data addr
- * @return ccStatus_t
- */
-ccStatus_t ccQuantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                 const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
-#ifndef DAVINCI_LITE
-ccStatus_t ccActivationBackward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
-                                const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t xDesc,
-                                const void *x, const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
-#endif
-
-ccStatus_t ccL2LossForward(ccHandle_t handle, const ccL2LossDescriptor_t l2lossDesc, const void *alpha,
-                           const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                           const ccTensorDescriptor_t yDesc, void *y);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of top k v2
- * @param [in] xDesc            descriptor of input tensor x
- * @param [in] yDesc            descriptor of input tensor y
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetTopKV2OutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, const void *k,
-                                const int64_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief top k v2 forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor x
- * @param [in] x                input data x in device memory
- * @param [in] yDesc            descriptor of input tensor y
- * @param [in] y                input data y in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccTopKV2Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                           const ccTensorDescriptor_t kDesc, const void *k, const void *beta, const bool sorted,
-                           const int64_t axis, void *workSpace, const uint32_t workSpaceSizeInBytes,
-                           const ccTensorDescriptor_t outputValuesDesc, void *outputValues,
-                           const ccTensorDescriptor_t outputIndicesDesc, void *outputIndices);
-
-/**
- * @ingroup dnn
- * @brief get the workspace size of top k v2
- * @param [in] xDesc            descriptor of input tensor x
- * @param [in] yDesc            descriptor of input tensor y
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] sizeInBytes  point to workspace size
- * @return ccStatus_t
- */
-ccStatus_t ccGetTopKV2ForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc,
-                                           const ccTensorDescriptor_t indiceDesc, const void *k, const int64_t axis,
-                                           uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief Get unsorted segment reduction output dim
- * @param [in] xDesc                 descriptor of input tensor
- * @param [in] segmentIdsDesc        descriptor of input segmentIds tensor
- * @param [in] segmentsNum           output slice num
- * @param [out] dimCnt               output tensor dim cnt
- * @param [out] dim                  output tensor dim
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetUnsortedSegmentReductionOutputDim(const ccTensorDescriptor_t xDesc,
-                                                  const ccTensorDescriptor_t segmentIdsDesc, int32_t segmentsNum,
-                                                  int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief reduce all forward computation
- * @param [in] handle          cce handle
- * @param [in] segmentsNum     output slice num
- * @param [in] alpha           scaling factors
- * @param [in] xDesc           descriptor of input tensor
- * @param [in] x               input data in device memory
- * @param [in] segmentIdsDesc  descriptor of input segmentIds tensor
- * @param [in] x               input segmentIds data in device memory
- * @param [in] beta            bias factors
- * @param [in] outputDesc      descriptor of output tensor
- * @param [in|out] output      output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccUnsortedSegmentSumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
-                                       const void *x, const ccTensorDescriptor_t segmentIdsDesc, const void *segmentIds,
-                                       const int32_t segmentsNum, const void *beta,
-                                       const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief reverse sequence forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor x
- * @param [in] x                input data x in device memory
- * @param [in] yDesc            descriptor of input tensor y
- * @param [in] y                input data y in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccReverseSequenceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t inputDesc,
-                                    const void *input, const ccTensorDescriptor_t seqLengthsDesc,
-                                    const void *seqLengths, int64_t seqAxis, int64_t batchAxis, const void *beta,
-                                    const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha  reserved.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] x  data point of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [in] y  data point of the right operator tensor.
- * @param [in] beta  reserved.
- * @param [in] outputDesc  description of the output tensor.
- * @param [output] output  data point of the output tensor.
- * @return ccStatus_t
- */
-
-ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                          const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
-                          const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc  description of the left operator tensor.
- * @param [in] yDesc  description of the right operator tensor.
- * @param [out] dimCnt       output tensor dim cnt
- * @param [out] dim          output tensor dim
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
-                               int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief invert permutation forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccInvertPermutationForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
-                                      const void *x, const void *beta, const ccTensorDescriptor_t outputDesc,
-                                      void *output);
-
-/**
- * @ingroup dnn
- * @brief get the workspace size of non max suppression
- * @param [in] handle            descriptor of handle
- * @param [in] scoresDesc        descriptor of input tensor scoresDesc
- * @param [in] boxesDesc         descriptor of input tensor boxesDesc
- * @param [in|out] sizeInBytes   point to workspace size
- * @return ccStatus_t
- */
-ccStatus_t ccGetNonMaxSuppressionWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t scoresDesc,
-                                               const ccTensorDescriptor_t boxesDesc, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief get the output dim of non max suppression
- * @param [in] scoresDesc            descriptor of input tensor scoresDesc
- * @param [in] maxOutPutSize         the max size of output
- * @param [in|out] dimCnt            point to the count of dim
- * @param [in|out] dim[]             the array of output dim
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetNonMaxSuppressionOutputDim(const ccTensorDescriptor_t scoresDesc, const int32_t maxOutPutSize,
-                                           int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief multinomial forward.
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x        input data in device memory
- * @param [in] numSamples    number of independent samples to draw for each row slice
- * @param [in] seed1   sed to create a random seed for the distribution
- * @param [in] seed2  sed to create a random seed for the distribution
- * @param [in] workSpace  work space for inter access
- * @param [in] workSpaceSizeInBytes  work space size
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccMultinomialForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                int32_t numSamples, int64_t seed1, int64_t seed2, void *workSpace,
-                                uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
-                                void *output);
-/**
- * @ingroup dnn
- * @brief get output dim of generated one-hot tensor.
- * @param [in] indicesDesc   Indices description of one-hot position.
- * @param [in] depth         On/off value description.
- * @param [in] axis          Data pointer of on/off value.
- * @param [output] dimCnt    Description of the generated one-hot tensor.
- * @param [output] dim       Data pointer of output.
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetOneHotOutputDim(const ccTensorDescriptor_t indicesDesc, int32_t depth, int32_t axis, int32_t *dimCnt,
-                                int32_t *dim, int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief generate an one-hot Tensor use given on/off value.
- * @param [in] handle        Stream handle.
- * @param [in] alpha         reserved.
- * @param [in] indicesDesc   Indices description of one-hot position.
- * @param [in] indices       Data pointer of indices.
- * @param [in] onDesc        On value description.
- * @param [in] on            Data pointer of on value.
- * @param [in] offDesc       Off value description.
- * @param [in] off           Data pointer of off value.
- * @param [in] depth         On/off value description.
- * @param [in] axis          Data pointer of on/off value.
- * @param [in] beta          reserved.
- * @param [in] outputDesc    Description of the generated one-hot tensor.
- * @param [output] output    Data pointer of output.
- * @return ccStatus_t
- */
-ccStatus_t ccOneHotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t indicesDesc,
-                           const void *indices, const ccTensorDescriptor_t onDesc, const void *on,
-                           const ccTensorDescriptor_t offDesc, const void *off, const int32_t depth, const int32_t axis,
-                           const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @ingroup dnn
- * @brief get the workspaceSize of multinomial
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] numSamples       number sample
- * @param [out] sizeInBytes       wor space size of byte
- * @return ccStatus_t
- */
-ccStatus_t ccGetMultinomialWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
-/**
- * @ingroup dnn
- * @brief get the output dimension info of multinomial
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] numSample        number of independent samples to draw for each row slice
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetMultinomialOutputDim(const ccTensorDescriptor_t xDesc, int32_t numSample, int32_t *dimCnt,
-                                     int32_t dim[], int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief get the output dimension info of BiasAddBackward
- * @param [in] dyDesc            descriptor of input tensor
- * @param [in] out] n             outputTensor [N]CHW
- * @param [in|out] c             outputTensor N[C]HW
- * @param [in|out] h             outputTensor NC[H]W
- * @param [in|out] w            outputTensor NCH[W]
- * @return ccStatus_t
- */
-ccStatus_t ccGetBiasAddBackwardOutputDim(const ccTensorDescriptor_t dyDesc, int32_t *n, int32_t *c, int32_t *h,
-                                         int32_t *w);
-
-/**
- * @ingroup dnn
- * @brief biasadd backward.
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] dyDesc       descriptor of input data
- * @param [in] dy       input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] dbDesc   descriptor of output data
- * @param [in|out] db   output data in device memory
- * @return ccStatus_t
- */
-#ifndef DAVINCI_LITE
-ccStatus_t ccBiasAddBackward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t dyDesc, const void *dy,
-                             const void *beta, const ccTensorDescriptor_t dbDesc, void *db);
-
-ccStatus_t ccMaxPoolWithArgmaxForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
-                                      const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                      const ccTensorDescriptor_t yDesc, void *y, const ccTensorDescriptor_t argMaskDesc,
-                                      void *argMask);
-#endif
-
-ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
-
-ccStatus_t ccDestroyPoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
-
-ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t poolingMaskDesc, ccTensorFormat_t format,
-                                            ccDataType_t dataType, int32_t n, int32_t c, int32_t h, int32_t w,
-                                            int32_t windowH, int32_t windowW);
-
-ccStatus_t ccGetPoolingMaskTensorSizeInBytes(ccTensorDescriptor_t poolingMaskDesc, uint32_t *size);
-
-/**
- * @ingroup dnn
- * @brief get the mask output dimension info of maxpooling training forward
- * @param [in] pooling   descriptor of convolution operator
- * @param [in] xDesc   descriptor of input tensor
- * @param [in|out] n   point to batch size
- * @param [in|out] c   point to channels
- * @param [in|out] h   point to height of feature map
- * @param [in|out] w   point to width of feature map
- * @param [in|out] windowH   point to height of window
- * @param [in|out] windowW   point to width of windowW
- * @return ccStatus_t
- */
-ccStatus_t ccGetPoolingMaskDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc, int32_t *n,
-                               int32_t *c, int32_t *h, int32_t *w, int32_t *windowH, int32_t *windowW);
-
-#ifndef DAVINCI_LITE
-ccStatus_t ccSoftmaxCrossEntropyLoss(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
-                                     ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
-                                     const ccTensorDescriptor_t logitsDesc, const void *logits,
-                                     const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
-                                     const void *beta, const ccTensorDescriptor_t lossDesc, void *loss);
-
-ccStatus_t ccSoftmaxCrossEntropyDx(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
-                                   ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
-                                   const ccTensorDescriptor_t logitsDesc, const void *logits,
-                                   const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
-                                   const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
-
-ccStatus_t ccAvgPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
-                                const ccTensorDescriptor_t dyDesc, const void *dy, const void *beta,
-                                const ccTensorDescriptor_t dxDesc, const void *dx);
-
-ccStatus_t ccTrainingAssignOp(ccHandle_t handle, const ccAssignOpMode_t assignOpDesc, const void *alpha,
-                              const void *beta, const ccTensorDescriptor_t aDesc, void *a,
-                              const ccTensorDescriptor_t bDesc, const void *b);
-
-/**
- * @ingroup dnn
- * @brief momentum optimizer for variable update
- * @param [in] handle                       cce handle
- * @param [in] inputDesc                    descriptor of input tensor: gradient,accumulation,variable
- * @param [in] gradient                     gradient input
- * @param [in|out] accumulation             accumulation input and updated output
- * @param [in|out] variable                 variable input and updated output
- * @param [in] algo                         indicate whether need FP16 output
- * @param [in] momentum                     scaler to control accumulation
- * @param [in] learningRate                 scaler
- * @param [in] lossScaleReciprocal          scaler
- * @param [in] workSpace                    additional memory address
- * @param [in] workSpaceSizeInBytes         additional memory size
- * @param [out] variableUpdatedFP16Desc     descriptor of FP16 output tensor: variableUpdatedFP16
- * @param [out] variableUpdatedFP16         variableUpdatedFP16
- * @return ccStatus_t
- */
-ccStatus_t ccApplyMomentum(ccHandle_t handle, const ccTensorDescriptor_t inputDesc, const void *gradient,
-                           void *accumulation, void *variable, const ccMomentumAlgo_t algo, const void *momentum,
-                           const void *learningRate, const void *lossScaleReciprocal, void *workSpace,
-                           const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t variableUpdatedFP16Desc,
-                           void *variableUpdatedFP16);
-
-ccStatus_t ccSsdClassifyLossTrain(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t labelDesc,
-                                  const void *label, const ccTensorDescriptor_t greaterConstDesc,
-                                  const void *greaterConst, const ccTensorDescriptor_t subConstDesc,
-                                  const void *subConst, const ccTensorDescriptor_t sparseDesc, const void *sparse,
-                                  const void *beta, const ccTensorDescriptor_t castoutDesc, const void *castout,
-                                  const ccTensorDescriptor_t muloutDesc, const void *mulout);
-
-#endif
-
-/**
- * @ingroup dnn
- * @brief get the workspace size of applymomentum
- * @param [in] inputDesc                    descriptor of input tensor
- * @return ccStatus_t
- */
-ccStatus_t ccGetApplyMomentumWorkspaceSize(const ccTensorDescriptor_t inputDesc, uint32_t *sizeInBytes);
-#ifndef DAVINCI_LITE
-ccStatus_t ccHwck2FracZ(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
-                        const ccFilterDescriptor_t yDesc, void *y);
-
-ccStatus_t ccFracZ2Hwck(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
-                        const ccFilterDescriptor_t yDesc, void *y);
-ccStatus_t ccAddNForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
-                         const void *x[], const void *beta, void *workSpace, uint32_t workSpaceSizeInBytes,
-                         const ccTensorDescriptor_t yDesc, void *y);
-#endif
-ccStatus_t ccGetAddNForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
-                                         const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-ccStatus_t ccGetAddNForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-ccStatus_t ccAddTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                             const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
-                             uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
-ccStatus_t ccGetAddTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
-                                             const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
-                                             uint32_t *sizeInBytes);
-ccStatus_t ccGetAddTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
-                                         int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-ccStatus_t ccMulTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                             const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
-                             uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
-ccStatus_t ccGetMulTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
-                                             const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
-                                             uint32_t *sizeInBytes);
-ccStatus_t ccGetMulTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
-                                         int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief get workspace size
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] sizeInBytes  workspace size
- * @return ccStatus_t
- */
-ccStatus_t ccGetRandomShuffleWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief random shuffle forward computation
- * @param [in] handle               cce handle
- * @param [in] alpha                common scale factor
- * @param [in] xDesc                descriptor of input data
- * @param [in] x                    input data in device memory
- * @param [in] workspace            temporary space
- * @param [in] workspaceSizeInBytes temporary space size
- * @param [in] seed                 random seed used to generate random number
- * @param [in] seed2                random seed used to generate random number
- * @param [in] beta                 common scale factor
- * @param [in] outputDesc           descriptor of output data
- * @param [in|out] output           output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccRandomShuffleForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                  void *workspace, const uint32_t workspaceSizeInBytes, const int64_t seed1,
-                                  const int64_t seed2, const void *beta, const ccTensorDescriptor_t outputDesc,
-                                  void *output);
-/**
- * @ingroup dnn
- * @brief sin forward:
- *          data type only support float float16 double
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] input        input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccSinForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
-                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief cos forward:
- *          data type only support float float16 double
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] input        input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccCosForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
-                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief tan forward:
- *          data type only support float float16 double
- *          data format only support ND
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] input        input data in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
-                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of unstack
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] axis             the axis to unstack along
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetUnstackOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, int32_t *dimCnt, int32_t dim[],
-                                 int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief unstack forward.
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data
- * @param [in] x            input data in device memory
- * @param [in] num          the length of the dimension axis
- * @param [in] axis         the axis to unstack along
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccUnstackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                            int32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t outputDesc,
-                            void *output[]);
-
-ccStatus_t ccResizeNearestNeighborCpuForward(ccHandle_t handle, const ccResizeNearestNeighborDescriptor_t resizeDesc,
-                                             const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                             const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @ingroup dnn
- * @brief get the output dimension info of resize nearest neighbor
- * @param [in] resizeDesc       descriptor of resize
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetResizeNearestNeighborOutputDim(const ccResizeNearestNeighborDescriptor_t resizeDesc,
-                                               const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t dim[],
-                                               int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of ResizeNearestNeighbor
- * @param [in|out] resizeDesc   point to descriptor of ResizeNearestNeighbor attr
- * @return ccStatus_t
- */
-ccStatus_t ccCreateResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of ResizeNearestNeighbor
- * @param [in|out] resizeDesc   point to descriptor of ResizeNearestNeighbor attr
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
-
-/**
- * @ingroup dnn
- * @brief set descriptor of ResizeNearestNeighbor.
- * @param [in|out] resizeDesc           descriptor of resize nearest neighbor operator
- * @param [in] alignCorners             whether the centers of input and output are aligned
- * @param [in] height                   height of output
- * @param [in] width                    width of output
- * @return ccStatus_t
- */
-ccStatus_t ccSetResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t resizeDesc, bool alignCorners,
-                                                int32_t height, int32_t width);
-
-/**
- * @ingroup dnn
- * [ccGetPadV2OutputDim]
- * @brief get the output dimension info of pad
- * @param [in] xDesc            descriptor of input tensor x
- * @param [in] padDesc          descriptor of input paddings
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t xDesc, const ccPadV2Descriptor_t padDesc, int32_t *dimCnt,
-                               int32_t dim[], int32_t dimLen);
-
-ccStatus_t ccPadV2CpuForward(ccHandle_t handle, const ccPadV2Descriptor_t padDesc, const void *alpha,
-                             const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                             const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of parameters for padv2 function
- * @param [in] point to descriptor of parameters for padv2 function
- * @return ccStatus_t
- */
-ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *padDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for padv2 function
- * @param [in] point to descriptor of parameters for padv2 function
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *padDesc);
-
-/**
- * @brief init descriptor for parameter of padv2 function
- * @param [in|out] padDesc   descriptor of pad
- * @param [in] padShapeCnt   padshape count
- * @param [in] padShapeLow   padshape low
- * @param [in] padShapeHigh  padshape high
- * @param [in] padMode       pad mode
- * @param [in] padValue      pad value ptr
- * @param [in] padValueType  pad value data type
- * @return ccStatus_t
- */
-ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t padDesc, const int32_t padShapeCnt, const int32_t padShapeLow[],
-                                const int32_t padShapeHigh[], const ccPadMode_t padMode, const void *padValue,
-                                const ccDataType_t padValueType);
-/**
- * @ingroup dnn
- * @brief create descriptor of batchToSpace
- * @param [in|out] batchToSpaceDesc  point to descriptor of batchToSpace
- * @return ccStatus_t
- */
-ccStatus_t ccCreateBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
-
-/**
- * @ingroup dnn
- * @brief set batchToSpaceDesc
- * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
- * @param [in] blockShape  blockShape of batchToSpace
- * @param [in] crops  crops of batchToSpace
- * @param [in] blockShapeLength  blockShapeLength of batchToSpace
- * @return ccStatus_t
- */
-ccStatus_t ccSetBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t paramsDesc, const int32_t *blockShape,
-                                       const int32_t *crops, const int32_t blockShapeLength);
-
-/**
- * @ingroup dnn
- * @brief get batchToSpaceDesc
- * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
- * @param [in] blockShape  blockShape of batchToSpace
- * @param [in] crops  crops of batchToSpace
- * @param [in] blockShapeLength  blockShapeLength of batchToSpace
- * @return ccStatus_t
- */
-ccStatus_t ccGetBatchToSpaceDescriptor(const ccBatchToSpaceDescriptor_t paramsDesc, int32_t *blockShape, int32_t *crops,
-                                       int32_t *blockShapeLength);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of batchToSpace
- * @param [in] *batchToSpaceDesc descriptor of batchToSpace
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of batch to space
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-
-ccStatus_t ccGetBatchToSpaceOutputDim(const ccTensorDescriptor_t xDesc,
-                                      const ccBatchToSpaceDescriptor_t batchToSpaceDesc, int32_t *dimCnt, int32_t dim[],
-                                      int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief batch to space forward computation
- * @param [in] handle           cce handle
- * @param [in] paramsDesc       descriptor of input params
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccBatchToSpaceForward(ccHandle_t handle, const ccBatchToSpaceDescriptor_t paramsDesc, const void *alpha,
-                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                 const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of spaceToBatch
- * @param [in|out] spaceToBatchDesc  point to descriptor of spaceToBatch
- * @return ccStatus_t
- */
-ccStatus_t ccCreateSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
-
-/**
- * @ingroup dnn
- * @brief set spaceToBatchDesc
- * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
- * @param [in] blockShape  blockShape of spaceToBatch
- * @param [in] paddings  paddings of spaceToBatch
- * @param [in] blockShapeLength  blockShapeLength of spaceToBatch
- * @return ccStatus_t
- */
-ccStatus_t ccSetSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t paramsDesc, const int32_t *blockShape,
-                                       const int32_t *paddings, const int32_t blockShapeLength);
-
-/**
- * @ingroup dnn
- * @brief get spaceToBatchDesc
- * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
- * @param [in] blockShape  blockShape of spaceToBatch
- * @param [in] paddings  paddings of spaceToBatch
- * @param [in] blockShapeLength  blockShapeLength of spaceToBatch
- * @return ccStatus_t
- */
-ccStatus_t ccGetSpaceToBatchDescriptor(const ccSpaceToBatchDescriptor_t paramsDesc, int32_t *blockShape,
-                                       int32_t *paddings, int32_t *blockShapeLength);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of spaceToBatch
- * @param [in] *spaceToBatchDesc descriptor of spaceToBatch
- * @return ccStatus_t
- */
-ccStatus_t ccDestroySpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of space to batch
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-
-ccStatus_t ccGetSpaceToBatchOutputDim(const ccTensorDescriptor_t xDesc,
-                                      const ccSpaceToBatchDescriptor_t spaceToBatchDesc, int32_t *dimCnt, int32_t dim[],
-                                      int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief space to batch forward computation
- * @param [in] handle           cce handle
- * @param [in] paramsDesc       descriptor of input params
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-
-ccStatus_t ccSpaceToBatchForward(ccHandle_t handle, const ccSpaceToBatchDescriptor_t paramsDesc, const void *alpha,
-                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                                 const ccTensorDescriptor_t outputDesc, void *output);
-
-ccStatus_t ccTransFilterDesc2TensorDesc(ccFilterDescriptor_t wDesc, ccTensorDescriptor_t tensorDesc);
-
-/*
- * @brief get the output dimension info of extractImagePatches
- * @param [in] xDesc            descriptor of input tensor x
- * @param [in] ksizes           ksizes array
- * @param [in] strides          strides array
- * @param [in] rates            rates array
- * @param [in] padding          padding type
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @return ccStatus_t
- */
-ccStatus_t ccGetExtractImagePatchesOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *ksizes,
-                                             const ccIntArray_t *strides, const ccIntArray_t *rates,
-                                             const ccExtractImagePatchesPadType_t padding, int32_t *dimCnt,
-                                             int32_t dim[], const int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief cum forward.
- * @param [in] handle       cce handle
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data, dimCnt:1~8
- * @param [in] x            input data in device memory
- * @param [in] axisDesc      scale factor, dimCnt:0
- * @param [in] axis            which axis to cum calc, device memory
- * @param [in] beta         common scale factor
- * @param [in] opType         calc type, eg. sum, prod....
- * @param [in] exclusive       cum  flag, true or false
- * @param [in] reverse         cum  flag, true or false
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccCumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                        const ccTensorDescriptor_t axisDesc, const void *axis, const void *beta, const CumOpType opType,
-                        const bool exclusive, const bool reverse, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @ingroup dnn
- * @brief ExtractImagePatches forward.
- * @param [in] handle       cce handle
- * @param [in] ksizes       ksizes array
- * @param [in] strides      strides array
- * @param [in] rates        rates array
- * @param [in] padding      padding type
- * @param [in] alpha        common scale factor
- * @param [in] xDesc        descriptor of input data x
- * @param [in] x            input data x in device memory
- * @param [in] beta         common scale factor
- * @param [in] outputDesc   descriptor of output data
- * @param [in|out] output   output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccExtractImagePatchesForward(ccHandle_t handle, const ccIntArray_t *ksizes, const ccIntArray_t *strides,
-                                        const ccIntArray_t *rates, const ccExtractImagePatchesPadType_t padding,
-                                        const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @brief get argmax output dim info
- * @param [in] argDesc          argmaxmin descriptor
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] dimCnt       output dim count
- * @param [in|out] dim          output dim
- * @param [in| dimlen        length of dim
- * @return ccStatus_t
- */
-ccStatus_t ccGetArgMaxOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
-                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief argmax forward computation
- * @param [in] handle           cce handle
- * @param [in] argDesc          argmaxmin descriptor
- * @param [in] alpha            scaling factors
- * @param [in] xDesc            descriptor of input tensor
- * @param [in] x                input data in device memory
- * @param [in] workSpace        workspace pointer
- * @param [in] workSpaceSizeInBytes   workspace size in bytes
- * @param [in] beta             bias factors
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccArgMaxForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
-                           const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
-                           const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
-                           void *output);
-
-/**
- * @ingroup dnn
- * @brief get the output dimension info of argmaxmin
- * @param [in] argDesc          descriptor of tagCcArgmaxmin
- * @param [in] xDesc            descriptor of input tensor
- * @param [in|out] sizeInBytes  workspace size
- * @return ccStatus_t
- */
-ccStatus_t ccGetArgMaxWorkspaceSize(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
-                                    uint32_t *sizeInBytes);
-
-/**
- * @ingroup dnn
- * @brief create descriptor of Argmaxmin
- * @param [in|out] resizeDesc   point to descriptor of Argmaxmin attr
- * @return ccStatus_t
- */
-ccStatus_t ccCreateArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of Interp
- * @param [in|out] resizeDesc   point to descriptor of Argmaxmin attr
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
-
-/**
- * @ingroup dnn
- * @brief destroy descriptor of Interp
- * @param [in|out] argDesc      descriptor of tagCcArgmaxmin
- * @param [in] axisType
- * @param [in] outMaxVal        whether to return the maximum value
- * @param [in] topK             number that returns the maximum index or maximum value
- * @param [in] axis             Describes which axis of the input Tensor to reduce across
- * @param [in] keepDims         whether to keep reduced dim
- * @param [in] reduceSize       the num of elements to be reduce to get topK elements, reduceSize=-1 means the total num
- * of elements in axis dimension
- * @param [in] reduceStride     the stride for reduce operation, reduceStride=1 means the layout of target data is
- * continuous
- * @return ccStatus_t
- */
-ccStatus_t ccSetArgmaxminDescriptor(ccArgmaxminDescriptor_t argDesc, int32_t axisType, bool outMaxVal, int64_t topK,
-                                    int64_t axis, bool keepDims, int64_t reduceSize = -1, int64_t reduceDStride = 1);
-
-ccStatus_t ccArgMinForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
-                           const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
-                           const ccTensorDescriptor_t outputDesc, void *output);
-
-ccStatus_t ccGetArgMinOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
-                                int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief lsh projection forward computation
- * @param [in] handle           cce handle
- * @param [in] alpha            scaling factors
- * @param [in] hashDesc         descriptor of input tensor hashDesc
- * @param [in] hash             input data hash in device memory
- * @param [in] weightDesc       descriptor of input tensor weightDesc
- * @param [in] weight           input data weight in device memory
- * @param [in] inputDesc       descriptor of input tensor inputDesc
- * @param [in] lookup           input data lookup in device memory
- * @param [in] type             1:SPARSE 2.DENSE
- * @param [in] beta             bias factors
- * @param [in] workSpace          workSpace data in device memory
- * @param [in] workSpaceSizeInBytes   workSpace length
- * @param [in] outputDesc       descriptor of output tensor
- * @param [in|out] output       output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccLshProjectionForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t hashDesc,
-                                  const void *hash, const ccTensorDescriptor_t weightDesc, const void *weight,
-                                  const ccTensorDescriptor_t inputDesc, const void *input, const LSHProjectionType type,
-                                  const void *beta, void *workSpace, const uint32_t workSpaceSizeInBytes,
-                                  const ccTensorDescriptor_t outputDesc, void *output);
-/**
- * @ingroup dnn
- * @brief get the workspace size of lsh projection
- * @param [in] inputDesc         descriptor of input tensor input
- * @param [in] hashDataType      data type of hash
- * @param [in|out] sizeInBytes   workspace size
- * @return ccStatus_t
- */
-ccStatus_t ccGetLshProjectionForwardWorkspaceSize(const ccTensorDescriptor_t inputDesc, const ccDataType_t hashDataType,
-                                                  uint32_t *sizeInBytes);
-/**
- * @ingroup dnn
- * @brief get the output dimension info of LshProjection,
- * @param [in] hashDesc         descriptor of hash
- * @param [in] type             type of mode
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in] dimLen           dim length
- * @return ccStatus_t
- */
-ccStatus_t ccGetLshProjectionOutputDim(const ccTensorDescriptor_t hashDesc, const LSHProjectionType type,
-                                       int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
-/**
- * @ingroup dnn
- * @brief get the weight dimension info of LshProjection,
- * @param [in] inputDesc          descriptor of input
- * @param [in|out] dimCnt       point to the weight dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in] dimLen           dim length
- * @return ccStatus_t
- */
-ccStatus_t ccGetLshProjectionWeightDim(const ccTensorDescriptor_t inputDesc, int32_t *dimCnt, int32_t dim[],
-                                       const int32_t dimLen);
-
-/**
- * @ingroup dnn
- * @brief init descriptor for parameter of upsample function
- * @param [in] handle                 cce handle
- * @param [in] upsamplePara           input para in host memory
- * @param [in] alpha                  common scale factor
- * @param [in] bottomDesc             descriptor of input data bottomDesc
- * @param [in] bottom                 input data bottom in device memory
- * @param [in] bottomMaskDesc         descriptor of input data bottomMaskDesc
- * @param [in] bottomMask             input data bottomMask in device memory
- * @param [in] beta                   common scale factor
- * @param [in] outputDesc             descriptor of output data
- * @param [in|out] output             output data in device memory
- * @return ccStatus_t
- */
-ccStatus_t ccUpsampleForward(ccHandle_t handle, const ccUpsampleParaDescriptor_t upsamplePara, const void *alpha,
-                             const ccTensorDescriptor_t bottomDesc, const void *bottom,
-                             const ccTensorDescriptor_t bottomMaskDesc, const void *bottomMask, const void *beta,
-                             const ccTensorDescriptor_t outputDesc, void *output);
-
-/**
- * @brief creat descriptor for parameter of usample function
- * @param [in|out] upsampleDesc   descriptor of upsamplepara
- * @return ccStatus_t
- */
-ccStatus_t ccCreateUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
-
-/**
- * @brief destroy descriptor for parameter of upsample function
- * @param [in|out] upsampleDesc   descriptor of upsamplepara
- * @return ccStatus_t
- */
-ccStatus_t ccDestroyUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
-
-/**
- * @brief set descriptor for parameter of upsample function
- * @param [in|out] upsampleDesc   descriptor of upsamplepara
- * @param [in] scale              the scale of height and width
- * @param [in] scaleHeight        the scale of height
- * @param [in] scaleWidth         the scale of Width
- * @param [in] upsampleHeight     the height of output
- * @param [in] upsampleWidth      the width of output
- * @param [in] padOutHeight       pad value height
- * @param [in] padOutWidth        pad value width
- * @return ccStatus_t
- */
-ccStatus_t ccSetUpsampleDescriptor(ccUpsampleParaDescriptor_t upsampleDesc, const int32_t scale,
-                                   const int32_t scaleHeight, const int32_t scaleWidth, const int32_t upsampleHeight,
-                                   const int32_t upsampleWidth, const bool padOutHeight, const bool padOutWidth);
-/**
- * @ingroup dnn
- * @brief get the output dimension info of upsample
- * @param [in] upsamplePara     para of upsample
- * @param [in] bottomDesc       descriptor of input bottom tensor
- * @param [in|out] dimCnt       point to the output dimCnt
- * @param [in|out] dim          arrays to save dims
- * @param [in] dimLen           the len of dim array
- * @return ccStatus_t
- */
-ccStatus_t ccGetUpsampleOutputDim(const ccUpsampleParaDescriptor_t upsamplePara, const ccTensorDescriptor_t bottomDesc,
-                                  int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
-
-#ifndef DAVINCI_LITE
-ccStatus_t ccMatmul(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                    const ccTensorDescriptor_t wDesc, const void *w, const ccTensorDescriptor_t biasDesc,
-                    const void *bias, const ccFullConnectFwdAlgo_t algo, void *workSpace,
-                    const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y,
-                    const bool transposeA, const bool transposeB);
-ccStatus_t ccGetMatmulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, int32_t *n,
-                                int32_t *c, int32_t *h, int32_t *w, bool transposeA, bool transposeB);
-ccStatus_t ccGetMatmulWorkspaceSize(ccHandle_t handle, const ccFullConnectFwdAlgo_t algo,
-                                    const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
-                                    const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes, bool transposeA,
-                                    bool transposeB);
-#endif
-
-/**
- * @ingroup dnn
- * @brief gather_v2 function
- * @param [in] handle                cce handle
- * @param [in] alpha                 common scale factor
- * @param [in] paramsDesc            descriptor
- * @param [in] params                device memory
- * @param [in] indicesDesc           descriptor
- * @param [in] indices               device memory
- * @param [in] axisDesc              descriptor
- * @param [in] axis                  device memory
- * @param [in] beta                  common scale factor
- * @param [in] outputDesc            descriptor
- * @param [in|out] output            device memory
- * @return ccStatus_t
- */
-ccStatus_t ccGatherV2(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, const void *params,
-                      const ccTensorDescriptor_t indicesDesc, const void *indices, const ccTensorDescriptor_t axisDesc,
-                      const void *axis, const void *beta, const ccTensorDescriptor_t outputDesc, const void *output);
-
-/**
- * @ingroup dnn
- * @brief memory_clear function
- * @param [in] handle                 cce handle
- * @param [in] addrSpaceSizeInBytes   addr space size
- * @param [in|out] addr               device memory
- * @return ccStatus_t
- */
-ccStatus_t ccMemoryClear(ccHandle_t handle, const uint64_t addrSpaceSizeInBytes, const void *addr);
-
-/**
- * @ingroup dnn
- * @brief check input is overflow
- * @param [in] handle      cce handle
- * @param [in] alpha       scaling factors
- * @param [in] xDesc       descriptor of input tensor
- * @param [in] x           input data in device memory
- * @param [in] yDesc       descriptor of output tensor
- * @param [in|out] y       output data in device memory
- * @param [in] beta        scaling factors
- * @return ccStatus_t
- */
-ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
-                      const ccTensorDescriptor_t yDesc, const void *y, const void *beta);
-};  // namespace cce
-
-#endif  // DNN_OP_H__
diff --git a/third_party/fwkacllib/inc/cce/dnn_struct.hpp b/third_party/fwkacllib/inc/cce/dnn_struct.hpp
deleted file mode 100644
index 96566074..00000000
--- a/third_party/fwkacllib/inc/cce/dnn_struct.hpp
+++ /dev/null
@@ -1,23 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DNN_STRUCT_HPP__
-#define DNN_STRUCT_HPP__
-
-#include "dnn.h"
-#include "dnn_struct_base.hpp"
-
-#endif  // DNN_STRUCT_HPP__
diff --git a/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp b/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp
deleted file mode 100644
index dd75e9ea..00000000
--- a/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp
+++ /dev/null
@@ -1,894 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DNN_STRUCT_BASE_HPP__
-#define DNN_STRUCT_BASE_HPP__
-
-#include "cce/cce_def.hpp"
-
-namespace cce {
-
-/**
- * @ingroup dnn
- * @brief max number of dimensions
- */
-#define CC_DIM_MAX (8)
-
-/**
- * @ingroup dnn
- * @brief max number of dimensions when use NC1HWC0 format
- */
-#define CC_REALDIM_MAX (4)
-
-/**
- * @ingroup dnn
- * @brief max input count of MscnnBoxOutput
- */
-#define CC_MAX_INPUT_CNT (10)
-
-/**
- * @ingroup dnn
- * @brief image dimensions of aipp input
- */
-#define CC_AIPP_IMG_DIM (2)
-
-/**
- * @ingroup dnn
- * @brief image channel number of aipp input
- */
-#define CC_AIPP_IMG_CHN_NUM (4)
-
-/**
- * @ingroup dnn
- * @brief element number of aipp color space convertion matrix
- */
-#define CC_AIPP_CSC_MATRIX_DIM (9)
-
-/**
- * @ingroup dnn
- * @brief element number of aipp color space convertion bias
- */
-#define CC_AIPP_CSC_BIAS_DIM (3)
-
-/**
- * @ingroup dnn
- * @brief parameter number of op exp/log/pow
- */
-#define PARAM_CNT_THREE (3)
-
-/**
- * @ingroup dnn
- * @brief parameter number of op nonmaxsuppression
- */
-#define PARAM_CNT_TWO (2)
-#define DIMCNT_NUMBER_ONE (1)
-#define DIMCNT_NUMBER_TWO (2)
-#define DIMCNT_NUMBER_FOUR (4)
-
-#define COMMON_FORMAT_NCHW_N_INDEX (0)
-#define COMMON_FORMAT_NCHW_C_INDEX (1)
-#define COMMON_FORMAT_NCHW_H_INDEX (2)
-#define COMMON_FORMAT_NCHW_W_INDEX (3)
-
-/**
- * @ingroup dnn
- * @brief parameter number of op upsample
- */
-#define UPSAMPLE_SCAL_DEFAULT_TWO (2)
-#define UPSAMPLE_ILLEGAL_VALUE_1 (1)
-
-/**
- * @ingroup dnn
- * @brief struct define of StridedSlice required params.
- */
-
-typedef struct tagCcStridedSlice {
-  uint32_t dimCnt;
-  int32_t begin[CC_DIM_MAX];
-  int32_t end[CC_DIM_MAX];
-  int32_t strides[CC_DIM_MAX];
-} ccStridedSlice_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of Strided_slice attrs
- */
-typedef struct tagCcStridedSliceAttrs {
-  uint32_t beginMask;
-  uint32_t endMask;
-  uint32_t ellipsisMask;
-  uint32_t newAxisMask;
-  uint32_t shrinkAxisMask;
-} ccStridedSliceAttrs_t;
-
-/**
- * @ingroup dnn
- * @brief params of batchToSpace
- */
-typedef struct tagCcBatchToSpace {
-  int32_t blockShapeLength;
-  int32_t blockShape[CC_DIM_MAX];
-  int32_t crops[2 * CC_DIM_MAX];
-} ccBatchToSpace_t;
-
-/**
- * @ingroup dnn
- * @brief params of spaceToBatch
- */
-typedef struct tagCcSpaceToBatch {
-  int32_t blockShapeLength;
-  int32_t blockShape[CC_DIM_MAX];
-  int32_t paddings[2 * CC_DIM_MAX];
-} ccSpaceToBatch_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of tensor
- */
-typedef struct tagCcTensor {
-  ccTensorFormat_t format;
-  ccDataType_t dataType;
-  int32_t dimCnt;
-  int32_t realDimCnt;
-  uint32_t dataSize;
-  int32_t dim[CC_DIM_MAX];
-  int32_t stride[CC_DIM_MAX];
-  ccVecQuantizePara_t vecQuantizePara;
-} ccTensor_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of filter tensor
- */
-typedef struct tagCcFilter {
-  ccTensorFormat_t format;
-  ccDataType_t dataType;
-  int32_t dimCnt;
-  uint32_t dataSize;
-  int32_t dim[CC_DIM_MAX];
-} ccFilter_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of convolution operator
- */
-typedef struct tagCcConvolution {
-  ccConvolutionMode_t mode;
-  ccPaddingMode_t padMode;
-  int32_t dimCnt;
-  int32_t padding[2 * (CC_DIM_MAX - 2)];
-  int32_t filterStride[CC_DIM_MAX - 2];
-  int32_t dilation[CC_DIM_MAX - 2];
-  int32_t group;
-  ccQuantizeDescriptor_t quantInfo;
-  ccConvolutionAipp_t aippInfo;
-  int32_t adj[CC_DIM_MAX - 2];
-  int32_t targetShape[CC_DIM_MAX - 2];
-  int32_t beforePadding[2 * (CC_DIM_MAX - 2)];  // pad before conv
-  uint32_t reluFlag;
-  int64_t concatBatchSize;
-} ccConvolution_t;
-
-#define ccCorrelation_t ccConvolution_t
-typedef struct tagCcFullConnection_t {
-  ccQuantizeDescriptor_t quantInfo;
-  uint32_t infoTabSize;
-  const void *infoTab;
-  bool reluFlag;
-  ccFullConnectFwdAlgo_t algo;
-} ccFullConnection_t;
-
-typedef struct tagCcConcatFour2Five_t {
-  uint32_t branchNum;  // how many branch for box or class
-  uint32_t classNum;   // box branch's classNum is four, class branch's classNum is class number
-} ccConcatFour2Five_t;
-
-typedef struct tagCcTransdata_t {
-  uint64_t scaleQAddr;
-  uint8_t scaleQValueMode;
-  uint64_t offsetQAddr;
-  uint8_t quantAlgo;
-  uint8_t quantize8bitFlag;
-} ccTransdata_t;
-/**
- * @ingroup dnn
- * @brief struct define of pooling operator
- */
-typedef struct tagCcPooling {
-  ccPoolingMode_t mode;
-  ccPaddingMode_t padMode;
-  ccNanPropagation_t maxpoolingNanOpt;
-  int32_t dimCnt;
-  int32_t windowDim[CC_DIM_MAX - 2];
-  int32_t padding[CC_DIM_MAX - 2];
-  int32_t stride[CC_DIM_MAX - 2];
-  int32_t dataMode;
-  int32_t ceilMode;
-  ccQuantizeDescriptor_t quantInfo;
-  ccPooingFwdAlgo_t algo;
-} ccPooling_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of activation operator
- */
-typedef struct tagCcActivation {
-  ccActivationMode_t mode;
-  ccNanPropagation_t reluNanOpt;
-  double coef; /* ceiling for clipped RELU, alpha for ELU */
-  ccActivationPara_u activationPara;
-} ccActivation_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of svdf operator
- */
-typedef struct tagCcSvdf {
-  ccTensorFormat_t format;
-  ccDataType_t dataType;
-  uint32_t batches;
-  uint32_t features;
-  uint32_t rank;
-  uint32_t inputSize;
-  uint32_t memorySize;
-} ccSvdf_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of svdf operator
- */
-typedef struct tagCcHashTableLookup {
-  ccTensorFormat_t format;
-  ccDataType_t lookupType;
-  ccDataType_t keyType;
-  ccDataType_t valueType;
-  ccDataType_t outputType;
-  ccDataType_t hitsType;
-  uint32_t lookups;
-  uint32_t keys;
-  uint32_t rows;
-  uint32_t features;
-  uint16_t valueScale;
-  uint16_t outputScale;
-  uint16_t valueOffset;
-  uint16_t outputOffset;
-} ccHashTableLookup_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of prelu operator
- */
-typedef struct tagCcPRelu {
-  ccNanPropagation_t reluNanOpt;
-  int32_t slopeCount;
-  bool channelShared;
-} ccPRelu_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of crop operator
- */
-typedef struct tagCcCrop {
-  int32_t startAxis;
-  int32_t offset[CC_DIM_MAX];
-  int32_t offsetCnt;
-} ccCrop_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of SpatialTransformer operator
- */
-typedef struct tagCcSpatialTransformer {
-  ccSamplerType_t samplerType;
-  ccDataType_t dataType;
-  int32_t dimCnt;
-  uint64_t dim[CC_DIM_MAX];
-  uint64_t alignCorner;
-} ccSpatialTransformer_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of ShiftTransformer operator
- */
-typedef struct tagCcShiftTransformer {
-  ccSamplerType_t samplerType;
-  double xPreDefined;
-  double yPreDefined;
-  bool xShift;
-  bool yShift;
-  int32_t gridH;
-  int32_t gridW;
-} ccShiftTransformer_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of FasterRcnnProposal operator
- */
-typedef struct tagCcFasterRcnnProposal {
-  int32_t preNMStopK;
-  int32_t postNMStopK;
-  float nmsTresh;
-  float minSize;
-  float featStride;
-  float baseSize;
-  int32_t ratioCnt;
-  int32_t scaleCnt;
-  float *ratio;
-  float *scale;
-  int32_t imgH;
-  int32_t imgW;
-} ccFasterRcnnProposal_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of LRN operator
- */
-typedef struct tagCcLRN {
-  ccLRNMode_t lrnMode;
-  int32_t lrnN;
-  double lrnAlpha;
-  double lrnBeta;
-  double lrnK;
-} ccLRN_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of instanceNorm
- */
-typedef struct tagCcInstancenorm {
-  ccInstanceNormMode_t mode;
-  double epsilon;
-} ccInstancenorm_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of assignOp operator
- */
-typedef struct tagCcAssignOp {
-  ccAssignOpMode_t assignOpMode;
-} ccAssignOp_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of arcSinCos operator
- */
-typedef struct tagCcArcSinCos {
-  ccArcSinCosMode_t arcSinCosMode;
-} ccArcSinCos_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of Detectpostprocess operator
- */
-typedef struct tagCcDetectpostprocess {
-  int32_t numClasses;
-  float confThreshold;
-  float nmsThreshold;
-  int32_t outTopK;
-  float bboxRegWeightsDx;
-  float bboxRegWeightsDy;
-  float bboxRegWeightsDw;
-  float bboxRegWeightsDh;
-} ccDetectpostprocess_t;
-/**
- * @ingroup dnn
- * @brief struct define of FasterRcnnDetectionOutput operator
- */
-typedef struct tagCcFasterRcnnDetectionOutput {
-  int32_t numClasses;
-  float nmsThreshold;
-  float postConfThreshold;
-  int32_t imgH;
-  int32_t imgW;
-  int32_t batchSize;
-} ccFasterRcnnDetectionOutput_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of SsdDetectionOutput operator
- */
-typedef struct tagCcSsdDetectionOutput {
-  int32_t numClasses;
-  int32_t backgroundLabelId;
-  double preConfThreshold;
-  int32_t preTopK;
-  double nmsThreshold;
-  double nmsEta;
-  ccBoxCodeType_t codeType;
-  int32_t outTopK;
-  bool shareLocation;
-  bool varianceEncodedInTarget;
-  uint32_t boxTypeNum;
-  float var[4];
-  uint32_t variance_num;
-} ccSsdDetectionOutput_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of RefinedetDetectionOutput operator
- */
-typedef struct tagCcRefinedetDetectionOutput {
-  int32_t numClasses;
-  int32_t backgroundLabelId;
-  double preConfThreshold;
-  int32_t preTopK;
-  double nmsThreshold;
-  double nmsEta;
-  ccBoxCodeType_t codeType;
-  int32_t outTopK;
-  bool shareLocation;
-  bool varianceEncodedInTarget;
-  uint32_t boxTypeNum;
-  float var[4];
-  uint32_t variance_num;
-  double objectness_score;
-} ccRefinedetDetectionOutput_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of MsrGenerateRpnProposals operator
- */
-typedef struct tagCcMsrGenerateRpnProposals {
-  int32_t preNmsTopK;
-  int32_t postNmsTopK;
-  float nmsThreshold;
-  float rpnMiniSize;
-  int32_t imgH;
-  int32_t imgW;
-  uint32_t boxTypeNum;
-  float scoreThreshold;
-} ccMsrGenerateRpnProposals_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of RetinaPostprocessor operator
- */
-typedef struct tagCcRetinaPostprocessor {
-  int32_t numClasses;
-  int32_t maxDetections;
-  float nmsThreshold;
-  float scoreThreshold;
-  int32_t imgH;
-  int32_t imgW;
-  uint32_t boxTypeNum;
-  float mean[4];
-  int32_t meanNum;
-  float std[4];
-  int32_t stdNum;
-  int32_t outputNum;
-  bool ocrFlag;
-} ccRetinaPostprocessor_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of GenerateSsdAnchors operator
- */
-typedef struct tagCcGenerateSsdAnchors {
-  int32_t featureMapShapeList[20];
-  uint32_t featureMapShapeListSize;
-  int32_t boxSpecsNum[10];
-  uint32_t boxSpecsNumSize;
-  float scales[10];
-  uint32_t scalesNum;
-  float aspectRatios[10];
-  uint32_t aspectRatiosNum;
-  int32_t baseAnchorSize[2];
-  uint32_t baseAnchorSizeNum;
-  int32_t anchorStride[2];
-  uint32_t anchorStrideNum;
-  int32_t anchorOffset[2];
-  uint32_t anchorOffsetNum;
-  bool reduceBoxesInLowestLayer;
-  float minScale;
-  float maxScale;
-  int32_t imgH;
-  int32_t imgW;
-} ccGenerateSsdAnchors_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of MscnnBoxOutput operator
- */
-typedef struct tagCcMscnnBoxOutput {
-  double fgThreshold;
-  double nmsThreshold;
-  ccNmsType_t nmsType;
-  int32_t fieldH[CC_MAX_INPUT_CNT];
-  int32_t fieldW[CC_MAX_INPUT_CNT];
-  int32_t downsampleRate[CC_MAX_INPUT_CNT];
-  int32_t defaultBoxCnt;
-  double fieldWhr;
-  double fieldXyr;
-  int32_t maxNmsNum;
-  int32_t maxPostNmsNum;
-  double minSize;
-} ccMscnnBoxOutput_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of NMS operator
- */
-typedef struct tagCcNms {
-  int32_t numClasses;
-  int32_t backgroundLabelId;
-  double preConfThreshold;
-  int32_t preTopK;
-  double nmsThreshold;
-  double nmsEta;
-  int32_t postTopK;
-  int32_t outTopK;
-  double postConfThreshold;
-  bool shareLocation;
-} ccNms_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of NMS/MultiClassNMS operator
- */
-typedef struct tagCcMultiClassNms {
-  uint64_t numClasses;
-  float objThreshold;
-  float nmsThreshold;
-  float clsThreshold;
-  bool normal;
-  uint64_t coorType;
-} ccCcMultiClassNms_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of YoloDetectionOutput operator
- */
-typedef struct tagCcYoloDetectionOutput {
-  ccYoloVersion_t yoloVersion;
-  uint32_t netH;
-  uint32_t netW;
-  uint32_t postTopK;
-  uint32_t classes;
-  float nmsThreshold;
-  float iouThreDecay;
-  float coorScaleFactor;
-  bool relative;
-  float objThreshold;
-  float clsThreshold;
-  uint32_t biasNum;
-  float *bias;
-} ccYoloDetectionOutput_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of GetRegionBox operator
- */
-#ifndef CC_MAX_YOLO_BIAS_NUM
-#define CC_MAX_YOLO_BIAS_NUM (16)
-#endif
-
-typedef struct tagCcGetRegionBox {
-  uint32_t biasNum;
-  uint32_t H;
-  uint32_t W;
-  float bias[CC_MAX_YOLO_BIAS_NUM];
-} ccGetRegionBox_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of CorrectBoxes operator
- */
-typedef struct tagCorrectBoxes {
-  uint32_t netW;
-  uint32_t netH;
-  bool relative;
-} ccCorrectBoxes_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of ClsProb operator
- */
-typedef struct tagClsProb {
-  float objThreshold;
-} ccClsProb_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of SsdPriorBox operator
- */
-typedef struct tagCcSsdPriorBox {
-  ccBoxCodeType_t codeType;
-  double *minSize;
-  int32_t minSizeNum;
-  double *maxSize;
-  int32_t maxSizeNum;
-  double *aspectRatio;
-  int32_t aspectRatioNum;
-  double *variance;
-  int32_t varianceNum;
-  int32_t imgH;
-  int32_t imgW;
-  double stepH;
-  double stepW;
-  double offset;
-  bool flip;
-  bool clip;
-} ccSsdPriorBox_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of Yolo2Region operator
- */
-typedef struct tagCcYolo2Region {
-  ccSoftmaxTree_t softmaxTree;
-  bool softmax;
-  bool background;
-  bool treeSoftmax;
-} ccYolo2Region_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of YoloRegion operator
- */
-typedef struct tagCcYoloRegion {
-  ccSoftmaxTree_t softmaxTree;
-  bool softmax;
-  bool background;
-  bool treeSoftmax;
-  int32_t classes;
-  int32_t coords;
-  int32_t boxes;
-  ccYoloVersion_t yoloV;
-} ccYoloRegion_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of power operator
- */
-typedef struct tagCcPower {
-  float scale;
-  float shift;
-  float power;
-} ccPower_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of exp operator
- */
-typedef struct tagCcExp {
-  ccDataType_t dataType;
-  uint32_t paramCnt;
-} ccExp_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of exp operator
- */
-typedef struct tagCcLog {
-  ccDataType_t dataType;
-  uint32_t paramCnt;
-} ccLog_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of pow operator
- */
-typedef struct tagCcPow {
-  ccDataType_t dataType;
-  uint32_t paramCnt;
-} ccPow_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of padv2 operator
- */
-typedef struct tagCcPadV2 {
-  ccPadMode_t padMode;
-  void *padValue;
-  ccDataType_t padValueType;
-  int32_t padDimCnt;
-  int32_t padShapeLow[CC_DIM_MAX];
-  int32_t padShapeHigh[CC_DIM_MAX];
-} ccPadV2_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of psROIPooling operator
- */
-typedef struct tagCcPsRoiPooling {
-  ccPoolingMode_t poolingMode;
-  int32_t pooledH;
-  int32_t pooledW;
-  float spatialScale;
-  float padRatio;
-  int32_t groupSize;
-  int32_t outputDim;
-} ccPsRoiPooling_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of RoIAlign operator
- */
-typedef struct tagCcRoiAlign {
-  int32_t pooledH;
-  int32_t pooledW;
-  float spatialScale;
-  int32_t samplingRatio;
-} ccRoiAlign_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of RoiInterpPooling operator
- */
-typedef struct tagCcRoiInterpPooling {
-  int32_t pooledH;
-  int32_t pooledW;
-  int32_t poolKernelH;
-  int32_t poolKernelW;
-  int32_t pooledTailH;
-  int32_t pooledTailW;
-  float spatialScaleH;
-  float spatialScaleW;
-} ccRoiInterpPooling_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of DetectionFull3DOutput operator
- */
-typedef struct tagCcDetectionFull3DOutput {
-  int32_t imageWidth;
-  int32_t imageHeight;
-  int32_t numAngleBins;
-  float trcMarginRatioX;
-  float trcMarginRatioY;
-  int32_t pitchRangeD;
-  int32_t pitchPresetD;
-  float mountHeight;
-  int32_t visiblenessBins;
-  float meanVisibleness;
-  bool discreteVisibleness;
-} ccDetectionFull3DOutput_t;
-
-/**
- * @ingroup dnn
- * @brief struct define of MsrFastRcnnPredictions operator
- */
-typedef struct tagMsrFastRcnnPredictions {
-  int32_t numClasses;    // num of classes
-  float scoreThreshold;  // the threshold of the score
-  double nmsThreshold;   // the threshold of nms
-  int32_t postTopK;
-  int32_t outTopK;
-  int32_t imgH;  // the height of image
-  int32_t imgW;  // the width of image
-} ccMsrFastRcnnPredictions_t;
-
-typedef struct tagCcResizeBilinear {
-  ccResizeOutputDimMode_t resizeOutputDimMode;
-  bool alignCorners;
-  int32_t zoom_factor;
-  int32_t shrink_factor;
-  int32_t height;
-  int32_t width;
-  int32_t pad_begin;
-  int32_t pad_end;
-} ccResizeBilinear_t;
-
-typedef struct tagCcResizeNearestNeighbor {
-  bool alignCorners;
-  int32_t height;
-  int32_t width;
-} ccResizeNearestNeighbor_t;
-
-typedef struct tagCcEltwise {
-  ccQuantize_t *quantInfo;
-  bool reluFlag;
-} ccEltwise_t;
-
-typedef struct tagCcBatchNorm {
-  bool reluFlag;
-} ccBatchNorm_t;
-
-typedef struct tagCcPad {
-  ccPadMode_t padMode;
-  float padValue;
-  int32_t htoppad;     // padLow[0]
-  int32_t hbottompad;  // padHigh[0]
-  int32_t wleftpad;    // padLow[1]
-  int32_t wrightpad;   // padHigh[1]
-} ccPad_t;
-
-typedef struct tagCcSubCondition {
-  uint32_t BaseCondValue[4];
-  ccCMPType_t condType[4];
-  ccResultType_t resultType;
-} ccSubCondition;
-
-typedef struct tagCcShapeClassifyCond {
-  uint32_t subConditionNum;
-  ccResultType_t resultType;
-  uint32_t true_value;
-  ccSubCondition subCond[2];
-} ccShapeClassifyCond;
-
-#ifndef CC_SHAPE_CLASSIFY_CONDITION_NUM
-#define CC_SHAPE_CLASSIFY_CONDITION_NUM (8)
-#endif
-
-typedef struct tagCcShapeClassify {
-  uint32_t shapeClassifyConditionNum;
-  uint32_t defaultValue;
-  ccShapeClassifyCond shapeClassifyCond[CC_SHAPE_CLASSIFY_CONDITION_NUM];
-} ccShapeClassify_t;
-
-/**
- * @ingroup dnn
- * @bref struct define of square operator
- */
-typedef struct tagCcSquare {
-  ccSquareMode_t mode;
-} ccSquare_t;
-
-/*
- * @ingroup dnn
- * @brief operation of segment reduction
- */
-typedef enum {
-  CC_SEGMENT_REDUCTION_OP_SUM = 0, /**< sum */
-  CC_SEGMENT_REDUCTION_OP_INVALID
-} ccSegmentReductionOpType_t;
-
-typedef struct tagCcFillParam {
-  // The filler type.
-  ccFillOpType_t fillType;
-  ccDataType_t valueDatatype;
-  const void *value;  // the value in constant fill
-  const void *min;    // the min value in uniform fill
-  const void *max;    // the max value in uniform fill
-  const void *mean;   // the mean value in Gaussian fill
-  const void *std;    // the std value in Gaussian fill
-  // the seed used to generate data in Gaussian and uniform fill
-  int64_t seed1;
-  int64_t seed2;
-} ccFillParam_t;
-
-typedef struct tagNonMaxSuppression {
-  ccDataType_t dataType;
-  uint32_t paraCount;
-} ccNonMaxSuppression_t;
-
-typedef struct tagCcArgmaxmin {
-  int32_t axisType;
-  bool outMaxVal;
-  int64_t topK;
-  int64_t reduceSize;
-  int64_t reduceStride;
-  int64_t axis;
-  bool keepDims;
-} ccArgmaxmin_t;
-
-typedef struct tagUpsamplePara {
-  int32_t scale;
-  int32_t scaleHeight;
-  int32_t scaleWidth;
-  int32_t upsampleHeight;
-  int32_t upsampleWidth;
-  bool padOutHeight;
-  bool padOutWidth;
-} ccUpsamplePara_t;
-
-typedef struct tagCcConcatFive2Four_t {
-  ccTransForLossMode_t mode;
-  uint32_t classNum;
-} ccConcatFive2Four_t;
-
-};     // namespace cce
-#endif  // DNN_STRUCT_BASE_HPP__
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
deleted file mode 100644
index 5733d68f..00000000
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef FWK_ADPT_STRUCT_H__
-#define FWK_ADPT_STRUCT_H__
-
-#include <cstdint>
-
-namespace aicpu {
-namespace FWKAdapter {
-
-// API RETURN CODE
-enum FWKAdptAPIRetCode {
-  FWK_ADPT_SUCCESS = 0,                  // success
-  FWK_ADPT_NOT_INIT = 1,                 // not init
-  FWK_ADPT_ALLOC_FAILED = 2,             // allocate memory failed
-  FWK_ADPT_PARAM_INVALID = 3,            // invalid input param
-  FWK_ADPT_PARAM_PARSE_FAILED = 4,       // parase input param failed
-  FWK_ADPT_NATIVE_ERROR = 5,             // error code
-  FWK_ADPT_NOT_SUPPORT_OPTYPE = 6,       // unsupport operate type
-  FWK_ADPT_INTERNAL_ERROR = 7,           // adpter internal error
-  FWK_ADPT_NOT_SUPPORT_DATATYPE = 8,     // unsupport input/output data type
-  FWK_ADPT_KERNEL_ALREADY_RUNING = 9,    // kernel already runing, not support parallel run
-  FWK_ADPT_SESSION_NOT_EXIST = 10,       // session id not exist
-  FWK_ADPT_SESSION_ALREADY_EXIST = 11,   // session id alread exist for create session
-  FWK_ADPT_NATIVE_END_OF_SEQUENCE = 12,  // end of sequence
-  FWK_ADPT_EXTEND_TYPE_NOT_EXIST = 13,   // extend info type not exist
-  FWK_ADPT_UNKNOWN_ERROR = 99            // unknown error code
-};
-
-// FWKAdapter operate type
-// Notice: add new operate type  need check with OMM, and make sure append to the end line.
-enum FWKOperateType {
-  FWK_ADPT_SESSION_CREATE = 0,
-  FWK_ADPT_KERNEL_RUN,
-  FWK_ADPT_KERNEL_DESTROY,
-  FWK_ADPT_SESSION_DESTROY,
-  FWK_ADPT_SINGLE_OP_RUN,
-  FWK_ADPT_KERNEL_RUN_NO_SESS,
-};
-
-// Extend Info type for task
-enum FWKTaskExtInfoType {
-  FWK_ADPT_EXT_SHAPE_TYPE = 0,
-  FWK_ADPT_EXT_INPUT_SHAPE,
-  FWK_ADPT_EXT_OUTPUT_SHAPE,
-  FWK_ADPT_EXT_UPDATE_ADDR,
-  FWK_ADPT_EXT_OP_NAME,
-  FWK_ADPT_EXT_SESSION_INFO,
-  FWK_ADPT_EXT_BITMAP,
-  FWK_ADPT_EXT_TOPIC_TYPE,
-  FWK_ADPT_EXT_ASYNCWAIT,
-  FWK_ADPT_EXT_INVALID
-};
-
-enum FWKExtTopicType {
-  FWK_ADPT_TOPIC_DEVICE_ONLY = 0,
-  FWK_ADPT_TOPIC_DEVICE_FIRST,
-  FWK_ADPT_TOPIC_HOST_ONLY,
-  FWK_ADPT_TOPIC_HOST_FIRST,
-  FWK_ADPT_TOPIC_INVALID
-};
-
-enum FWKExtUpdateAddrType {
-  FWK_ADPT_UPDATE_NULL = 0,
-  FWK_ADPT_UPDATE_INPUT,
-  FWK_ADPT_UPDATE_OUTPUT,
-  FWK_ADPT_UPDATE_INPUT_OUTPUT
-};
-
-enum FWKExtWaitType {
-  FWK_ADPT_WAIT_TYPE_NULL = 0,
-  FWK_ADPT_WAIT_TYPE_EVENT,
-  FWK_ADPT_WAIT_TYPE_INVALID
-};
-
-#pragma pack(push, 1)
-// API Parameter Structure
-struct StrFWKKernel {
-  FWKOperateType opType;
-  uint64_t sessionID;  // unique
-
-  uint64_t stepIDAddr;    // step id addr
-  uint64_t kernelID;      // run kernel id, unique in session
-  uint64_t nodeDefLen;    // nodeDef protobuf len
-  uint64_t nodeDefBuf;    // NodeDef protobuf offset addr, need convert to void*
-  uint64_t funDefLibLen;  // FunctionDefLibrary protobuf len
-  uint64_t funDefLibBuf;  // FunctionDefLibrary protobuf addr which use in NodeDef, need convert to void*
-
-  uint64_t inputOutputLen;     // InputOutput shap protobuf len
-  uint64_t inputOutputBuf;     // InputOutput shap protobuf addr, need convert to void*
-  uint64_t workspaceBaseAddr;  // Workspace base addr, need convert to void*
-  uint64_t inputOutputAddr;    // InputOutput addr, need convert to void*
-
-  uint64_t extInfoLen;         // extend info total length
-  uint64_t extInfoAddr;        // extend info addr, ExtInfo structure
-};
-#pragma pack(pop)
-
-typedef StrFWKKernel FWKOperateParam;
-
-// Extent info ShapeAndType
-const uint32_t kMaxShapeDims = 8;
-#pragma pack(push, 1)
-struct ShapeAndType {
-  int32_t type;
-  int64_t dims[kMaxShapeDims];
-};
-#pragma pack(pop)
-
-// Extend info structure for extInfoAddr
-const uint32_t kExtInfoHeadSize = 8;
-
-#pragma pack(push, 1)
-struct ExtInfo {
-  int32_t  infoType;    // extend type
-  uint32_t infoLen;     // length for infoMsg
-  char     infoMsg[0];  // extend value
-};
-#pragma pack(pop)
-
-#pragma pack(push, 1)
-struct ResultSummary {
-  uint64_t shape_data_ptr;   // shape data addr, need convert to void*
-  uint64_t shape_data_size;  // num of dims
-  uint64_t raw_data_ptr;     // raw data addr,  need convert to void*
-  uint64_t raw_data_size;    // size of raw data
-};
-#pragma pack(pop)
-
-#pragma pack(push, 1)
-struct AsyncWait {
-  uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
-  uint32_t waitId; // wait id, GE refresh
-  uint32_t timeOut; // reserved
-  uint64_t reserved;
-};
-#pragma pack(pop)
-}  // end  namespace FWKAdapter
-}  // namespace aicpu
-
-#endif  // FWK_ADPT_STRUCT_H__
diff --git a/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp b/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp
deleted file mode 100644
index fa5a95c9..00000000
--- a/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef L2FUSION_STRUCT_HPP_
-#define L2FUSION_STRUCT_HPP_
-
-#include <map>
-#include <string>
-#include "runtime/kernel.h"
-
-#define L2_DYNAMIC_SPLIT_NUM
-
-using namespace std;
-
-namespace fusion {
-
-typedef struct tagL2Data {
-  uint32_t l2Index;
-  uint64_t l2Addr;
-  uint64_t l2PageNum;
-} L2Data_t;
-
-typedef std::map<uint64_t, L2Data_t> L2DataMap_t;    // the key is ddr addr
-typedef std::pair<uint64_t, L2Data_t> L2DataPair_t;  // the key is ddr addr
-
-typedef struct TagTaskL2Info {
-  string nodeName;
-  rtL2Ctrl_t l2ctrl;
-
-  L2DataMap_t input;
-  L2DataMap_t output;
-  uint32_t isUsed;
-} TaskL2Info_t;
-
-typedef std::map<uint32_t, TaskL2Info_t> TaskL2InfoMap_t;    // the key is nodeId
-typedef std::pair<uint32_t, TaskL2Info_t> TaskL2InfoPair_t;  // the key is nodeId
-
-typedef std::map<string, TaskL2Info_t> TaskL2InfoFEMap_t;    // the key is nodeName
-typedef std::pair<string, TaskL2Info_t> TaskL2InfoFEPair_t;  // the key is nodeName
-
-}  // namespace fusion
-
-#endif  // L2FUSION_STRUCT_HPP_
diff --git a/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h b/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h
deleted file mode 100644
index 299998e3..00000000
--- a/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef FUSION_ENGINE_HPP_
-#define FUSION_ENGINE_HPP_
-
-#include "cce/cce.h"
-#include "graph/compute_graph.h"
-#include "proto/task.pb.h"
-
-#include <map>
-#include <vector>
-
-using namespace domi;
-using namespace std;
-
-namespace fusion {
-enum {
-  FUSION_STATUS_SUCCESS = 0,
-  FUSION_STATUS_FAIL = 1,
-};
-
-typedef struct {
-  uint64_t weightSize;
-  uint64_t memorySize;
-  uint8_t *dataMemBase;
-  uint8_t *weightMemBase;
-  uint32_t l2Enable;      // 1 //1 - enable l2 buffer allocation, 0 - disable l2 buffer allocation
-  uint32_t fusionEnable;  // 1    // 1 - enable buffer fusion, 0 - disable buffer fusion
-} ModelRes;
-
-static const std::string SCOPE_ID_ATTR = "fusion_scope";
-static const std::string L2FUSION_DYNAMIC_CONVERGE_OP = "l2fusion_dynamic_converge_op";
-static const std::string L2FUSION_DYNAMIC_SPLIT_NUM = "l2fusion_dynamic_split_num";
-static const std::string FUSION_VIRTUAL_OP = "fusion_virtual_op";
-static const std::string FUSION_MULTI_BATCH_STRIDE = "fusion_multi_bathc_stride";
-
-#define TVM_TYPE 1
-
-typedef std::map<int64_t, std::vector<ge::NodePtr>> kScopeNodeMap_t;
-typedef std::pair<int64_t, std::vector<ge::NodePtr>> kScopeNodePair_t;
-
-uint32_t BufferFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph, bool enable_l2dynamic = true);
-uint32_t BufferFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
-uint32_t GraphFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
-uint32_t FusionTaskBuild(cce::ccHandle_t ccHandle, ge::ComputeGraphPtr fusionGraph, ge::Buffer &buffer,
-                         ModelRes &modelRes, std::vector<TaskDef> &task_def_list_);
-void FusionTaskBuildComplete(std::vector<cce::ccHandle_t> cchandleList);
-uint32_t GraphFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
-}  // namespace fusion
-
-#endif  // FUSION_ENGINE_HPP_
diff --git a/third_party/fwkacllib/inc/cce/taskdown_api.h b/third_party/fwkacllib/inc/cce/taskdown_api.h
deleted file mode 100644
index 2323aaa7..00000000
--- a/third_party/fwkacllib/inc/cce/taskdown_api.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TASKDOWN_API_H_
-#define TASKDOWN_API_H_
-
-#include <map>
-#include <vector>
-#include "cce/cce.h"
-#include "l2fusion_struct.hpp"
-#include "taskdown_common.hpp"
-
-namespace cce {
-
-#define CC_FUSION_OP_MAX 32
-
-typedef struct tagOpAddrsInfo {
-  void *addrPos;
-  uintptr_t addrData;
-} ccOpAddrsInfo;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-ccStatus_t ccUpdateKernelArgs(ccOpContext &opContext, uint64_t dataBaseAddr, uint64_t weightBaseAddr,
-                              uint64_t variableBaseAddr, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr);
-
-#ifdef __cplusplus
-}
-#endif
-
-ccStatus_t ccGetKernelArgsAddrs(ccOpContext &opContext, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr,
-                                std::vector<ccOpAddrsInfo> &opAddrsInfo);
-
-ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &dateInfo);
-
-ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);
-
-}  // namespace cce
-#endif  // TASKDOWN_API_H_
diff --git a/third_party/fwkacllib/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/cce/taskdown_common.hpp
deleted file mode 100644
index 7954162e..00000000
--- a/third_party/fwkacllib/inc/cce/taskdown_common.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TASKDOWN_COMMON_H_
-#define TASKDOWN_COMMON_H_
-
-#include <map>
-#include "cce/cce_def.hpp"
-#include "common/attr_list.hpp"
-#include "l2fusion_struct.hpp"
-
-namespace cce {
-
-#define CC_FUSION_OP_MAX 32
-
-typedef enum tagccKernelType {
-  CCE_AI_CORE = 0,   /* cce aicore */
-  CCE_AI_CPU = 1,    /* cce aicpu */
-  TE = 2,            /* te operator*/
-  CUSTOMIZED = 3,    /* customized operator */
-  TE_AI_CORE = 4,    /* te aicore operator*/
-  TE_AI_CPU = 5,     /* te aicpu operator */
-  AI_CPU = 6,        /* aicpu */
-  CUST_AI_CPU = 7,   /* custom aicpu*/
-  HOST_CPU = 8,      /* host cpu */
-  INVALID = 10000    /* unknown kernel type */
-} ccKernelType;
-
-typedef struct tagOpContext {
-  ccKernelType kernelType;
-  uint32_t opId;
-  uint32_t kernelFuncId;
-  uint32_t opIndex;
-  uint32_t opCount;
-  uint32_t opIndex2[CC_FUSION_OP_MAX];
-  bool isFlowtable;
-  uint16_t *argsOffset;
-  uint32_t argsCount;
-  uint64_t genDataBaseAddr;
-  uint64_t genDataBaseSize;
-  uint64_t genWeightBaseAddr;
-  uint64_t genWeightBaseSize;
-  uint64_t genVariableBaseAddr;
-  uint64_t genVariableBaseSize;
-  uint64_t l2ctrlSize;
-} ccOpContext;
-
-typedef struct tagOpReadCount {
-  bool isEnable;
-  std::map<uint64_t, uint32_t> tensorRc;
-} ccOpReadCount;
-
-typedef enum tagTaskDownKernelIdMode {
-  CC_TASKDOWN_RESERVED = 0,
-  CC_TASKDOWN_ROIPOOLING,
-  CC_TASKDOWN_ROIPOOLING_PERF,
-  CC_TASKDOWN_ROIALIGN,
-  CC_TASKDOWN_ROIALIGN_PERF,
-  CC_TASKDOWN_FC,
-  CC_TASKDOWN_FC_COMPRESS,
-  CC_TASKDOWN_SOFTMAX_LOWEST,
-  CC_TASKDOWN_ROIALIGN_FP16,
-  CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR,
-  CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR_COMMON,
-} ccTaskDownKernelIdMode_t;
-
-ccStatus_t GetStream(ccHandle_t handle, rtStream_t *streamId);
-
-ccStatus_t ccClearOpMap(ccHandle_t handle);
-
-ccStatus_t ccSetKernelOpMap(ccHandle_t handle);
-
-ccStatus_t ccSetKernelContext(ccHandle_t handle, uint32_t opId, AttrList &attrList, bool isFlowtable,
-                              ccKernelType kernelType, void *pgraph);
-
-ccStatus_t ccGetKernelContext(rtStream_t streamId, ccOpContext &opContext);
-
-ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);
-
-ccStatus_t ccSetStreamL2Map(ccHandle_t handle, fusion::TaskL2InfoMap_t &l2AllocRes);
-
-ccStatus_t ccGetStreamL2Map(rtStream_t streamId, uint32_t opIndex, fusion::TaskL2Info_t *&l2Data);
-
-ccStatus_t ccSetOpIndex(ccHandle_t handle, uint32_t opIndex);
-
-ccStatus_t ccGetOpIndex(ccHandle_t handle, uint32_t &opIndex);
-
-ccStatus_t ccGetOpIndexByStream(rtStream_t streamId, uint32_t &opIndex);
-
-ccStatus_t ccClearStreamL2Map(ccHandle_t handle);
-
-ccStatus_t ccGetKernelReadCount(rtStream_t streamId, ccOpReadCount &rc);
-
-}  // namespace cce
-#endif  // TASKDOWN_COMMON_H_
diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h
index 6db4d783..86805f72 100644
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h
index b67ead37..6c4d615d 100644
--- a/third_party/fwkacllib/inc/ops/all_ops.h
+++ b/third_party/fwkacllib/inc/ops/all_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,4 +78,5 @@
 #include "condtake_ops.h"
 #include "warp_perspective_ops.h"
 #include "vector_search.h"
+#include "deep_md.h"
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index c02537cd..d56ac5bb 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1015,6 +1015,25 @@ REG_OP(Shape)
     .OP_END_FACTORY_REG(Shape)
 
 /**
+*@brief Gather selected dims of input which returns the shape of tensor shape after gathershapes.\n
+
+*@par Inputs:
+*x: A list of input tensors. It's a dynamic input. \n
+
+*@par Attributes:
+*axes: Select some dims of input. \n
+
+*@par Outputs:
+*shape: The shape of tensor shape after gathershapes. \n
+*/
+REG_OP(GatherShapes)
+    .DYNAMIC_INPUT(x, TensorType::ALL())
+    .OUTPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(axes, ListListInt)
+    .ATTR(dtype, Int, DT_INT32)
+    .OP_END_FACTORY_REG(GatherShapes)
+
+/**
 *@brief Returns shape of tensors. \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h
index d9883253..f05135d1 100644
--- a/third_party/fwkacllib/inc/ops/audio_ops.h
+++ b/third_party/fwkacllib/inc/ops/audio_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
index 9583eff9..d0800a08 100644
--- a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
+++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h
index 4b78951d..ca4fe1db 100644
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h
index d032476d..dac78118 100644
--- a/third_party/fwkacllib/inc/ops/bitwise_ops.h
+++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
index 550e8b7d..08e54824 100644
--- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
+++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
index e20607bf..890c52ae 100644
--- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
+++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/case_condition_ops.h b/third_party/fwkacllib/inc/ops/case_condition_ops.h
index 85064845..85dba609 100644
--- a/third_party/fwkacllib/inc/ops/case_condition_ops.h
+++ b/third_party/fwkacllib/inc/ops/case_condition_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h
index f3242a13..19b4ea05 100644
--- a/third_party/fwkacllib/inc/ops/cluster.h
+++ b/third_party/fwkacllib/inc/ops/cluster.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h
index 5e91eb07..029cffbf 100644
--- a/third_party/fwkacllib/inc/ops/condtake_ops.h
+++ b/third_party/fwkacllib/inc/ops/condtake_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h
index 53a213f7..cd993599 100644
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h b/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h
index 79a64c2c..f52c90b0 100644
--- a/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h
+++ b/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h
index c7262cbb..caebba50 100644
--- a/third_party/fwkacllib/inc/ops/correlation.h
+++ b/third_party/fwkacllib/inc/ops/correlation.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h
index 7729432e..6e908091 100644
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index 492a58ae..3034730d 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2415,6 +2415,23 @@ REG_OP(AdpGetNext)
   .ATTR(output_shapes, ListListInt, {{}, {}})
   .ATTR(queue_name, String, "")
   .OP_END_FACTORY_REG(AdpGetNext)
-}   // namespace ge
 
+/**
+*@brief GetNextV2
+*@par Outputs:
+*y: the data in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*queue_name: cdqm queue name
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GetNextV2)
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(channel_name, String, "")
+  .OP_END_FACTORY_REG(GetNextV2)
+} // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/deep_md.h b/third_party/fwkacllib/inc/ops/deep_md.h
new file mode 100644
index 00000000..fadfe128
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/deep_md.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file deep_md.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+* @brief Calculate ProdForceSeA. \n
+*
+* @par Inputs:
+* Five inputs, including:
+* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
+* @li nlist: A Tensor. dtype is int32.
+* @li natoms: A Tensor. dtype is int32. \n
+*
+* @par Outputs:
+* atom_force: A Tensor. Must be one of the following types: float16, float32, float64. \n
+*
+* @par Attributes:
+* Two attributes, including:
+* @li n_a_sel: A Scalar.
+* @li n_r_sel: A Scalar. \n
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ProdForceSeA)
+    .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(nlist, TensorType({DT_INT32}))
+    .INPUT(natoms, TensorType({DT_INT32}))
+    .OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(n_a_sel, Int)
+    .REQUIRED_ATTR(n_r_sel, Int)
+    .OP_END_FACTORY_REG(ProdForceSeA)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 4cb3d961..be201579 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h
index e1fbe6b3..7cfe39c4 100644
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h
index 33dc4f14..e5518ef8 100644
--- a/third_party/fwkacllib/inc/ops/get_data_ops.h
+++ b/third_party/fwkacllib/inc/ops/get_data_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h
index 6db276a9..497f6a68 100644
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h
index a49ec5ed..00299ef7 100644
--- a/third_party/fwkacllib/inc/ops/hvd_ops.h
+++ b/third_party/fwkacllib/inc/ops/hvd_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index e771d67c..dc3a96b6 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -586,6 +586,40 @@ REG_OP(ResizeNearestNeighborV2GradD)
 channels], The image tensor that was resized . \n
 
 *@par Attributes:
+*@li align_corners: An optional bool. Defaults to False. If true, the centers of
+the 4 corner pixels of the input and grad tensors are aligned. Defaults to
+false .
+*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
+to false . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as original_image . \n
+
+*@attention Constraints:
+*Input grads must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeBilinearV2Grad operator.
+*/
+
+REG_OP(ResizeBilinearV2Grad)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(original_image, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeBilinearV2Grad)
+
+/**
+*@brief Computes the gradient of bilinear interpolation . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
+*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
+channels], The image tensor that was resized . \n
+
+*@par Attributes:
 *@li size: An optional listint. Defaults to {}.
 *@par Attributes:
 *@li ori_image_size: An optional listint. Defaults to {}.
@@ -607,10 +641,10 @@ to false . \n
 *Input grads must be a 4-D tensor . \n
 
 *@par Third-party framework compatibility
-*Compatible with tensorflow ResizeBilinearV2Grad operator.
+*Compatible with mindspore ResizeBilinearV2Grad operator.
 */
 
-REG_OP(ResizeBilinearV2Grad)
+REG_OP(SyncResizeBilinearV2Grad)
     .INPUT(grads, TensorType({DT_FLOAT}))
     .INPUT(original_image, TensorType::FloatingDataType())
     .OUTPUT(y, TensorType({DT_FLOAT}))
@@ -620,7 +654,7 @@ REG_OP(ResizeBilinearV2Grad)
     .ATTR(dst_start_w, Int, 0)
     .ATTR(align_corners, Bool, false)
     .ATTR(half_pixel_centers, Bool, false)
-    .OP_END_FACTORY_REG(ResizeBilinearV2Grad)
+    .OP_END_FACTORY_REG(SyncResizeBilinearV2Grad)
 
 /**
 *@brief Resize images to size using bilinear interpolation . \n
@@ -636,10 +670,6 @@ size for the images . \n
 output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false .
 * @li half_pixel_centers: An optional bool. Defaults to False . \n
-*@li ori_image_size: An optional listint. Defaults to {}.
-*@li split_size: An optional listint. Defaults to {}.
-*@li src_start_w: An optional int. Defaults to 0.
-*@li dst_start_w: An optional int. Defaults to 0.
 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
 
@@ -655,13 +685,49 @@ REG_OP(ResizeBilinearV2)
                                DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
     .INPUT(size, TensorType({DT_INT32}))
     .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeBilinearV2)
+
+/**
+*@brief Resize images to size using bilinear interpolation . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
+size for the images . \n
+
+*@par Attributes:
+* @li align_corners: If true, the centers of the 4 corner pixels of the input and
+output tensors are aligned, preserving the values at the corner pixels.
+Defaults to false .
+* @li half_pixel_centers: An optional bool. Defaults to False . \n
+*@li ori_image_size: An optional listint. Defaults to {}.
+*@li split_size: An optional listint. Defaults to {}.
+*@li src_start_w: An optional int. Defaults to 0.
+*@li dst_start_w: An optional int. Defaults to 0.
+*@par Outputs:
+*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore ResizeBilinearV2 operator.
+*/
+
+REG_OP(SyncResizeBilinearV2)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
     .ATTR(ori_image_size, ListInt, {})
     .ATTR(split_size, ListInt, {})
     .ATTR(src_start_w, Int, 0)
     .ATTR(dst_start_w, Int, 0)
     .ATTR(align_corners, Bool, false)
     .ATTR(half_pixel_centers, Bool, false)
-    .OP_END_FACTORY_REG(ResizeBilinearV2)
+    .OP_END_FACTORY_REG(SyncResizeBilinearV2)
 
 /**
 *@brief Converts one or more images from RGB to HSV . \n
diff --git a/third_party/fwkacllib/inc/ops/index_to_addr_ops.h b/third_party/fwkacllib/inc/ops/index_to_addr_ops.h
index c6bbaaa8..3af17a45 100644
--- a/third_party/fwkacllib/inc/ops/index_to_addr_ops.h
+++ b/third_party/fwkacllib/inc/ops/index_to_addr_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h
index 9dde14a5..bcc3f1c3 100644
--- a/third_party/fwkacllib/inc/ops/internal_ops.h
+++ b/third_party/fwkacllib/inc/ops/internal_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h
index 0f362d31..5e31bebd 100644
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h
index 53024878..0aa94e73 100644
--- a/third_party/fwkacllib/inc/ops/list_ops.h
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h
index a20370fd..dd565657 100644
--- a/third_party/fwkacllib/inc/ops/logging_ops.h
+++ b/third_party/fwkacllib/inc/ops/logging_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h
index 3fdc01fe..b1fc254f 100644
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 48867203..d3e8c0bf 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -691,7 +691,7 @@ REG_OP(Conj)
 REG_OP(NLLLoss)
     .INPUT(x, TensorType({DT_FLOAT}))
     .INPUT(target, TensorType({DT_INT32}))
-    .INPUT(weight, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT}))
     .OUTPUT(total_weight, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 289a33a6..0d9a8424 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index ccafa01f..398c6568 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -113,7 +113,8 @@ if input "x" is with format NC1HWC0. Specifies the mean of "x".
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
-*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. \n
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless emement. \n
 
 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -135,6 +136,7 @@ REG_OP(BatchNorm)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
     .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_3, TensorType({DT_FLOAT}))
     .ATTR(epsilon, Float, 0.0001)
     .ATTR(data_format, String, "NHWC")
     .ATTR(is_training, Bool, true)
@@ -309,7 +311,8 @@ REG_OP(BatchNormExt2)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
 *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
 *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
-*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n
+*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm .
+*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n
 
 *@par Attributes:
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
@@ -336,6 +339,7 @@ REG_OP(BatchNormGrad)
     .INPUT(scale, TensorType({DT_FLOAT}))
     .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
     .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT}))
     .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
     .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 025f669c..6f58f028 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -314,9 +314,9 @@ REG_OP(DepthwiseConv2DBackpropInputD)
 REG_OP(DepthwiseConv2D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
     .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT}))
     .REQUIRED_ATTR(strides, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .REQUIRED_ATTR(pads, ListInt)
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index e960234e..108a20a7 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1667,6 +1667,8 @@ REG_OP(DecodeBboxV2)
 * @li y1: A Tensor. Must have the same type as x.
 * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
 *
+*@attention Constraints:
+* The operator depends on the unstable sorting algorithm.
 */
 REG_OP(Sort)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
@@ -2058,6 +2060,33 @@ REG_OP(GIoUGrad)
     .ATTR(is_cross, Bool, true)
     .ATTR(mode, String, "iou")
     .OP_END_FACTORY_REG(GIoUGrad)
+
+/**
+*@brief RotatedOverlaps . \n
+
+*@par Inputs:
+*@li boxes : data of grad increment, a 3D Tensor of type float32 with
+* shape (B, 5, N). "N" indicates the number of boxes, and the value
+* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
+*@li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
+* shape (B, 5, K). "K" indicates the number of boxes, and the value
+* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
+
+*@par Attributes:
+* trans: An optional attr, true for 'xyxyt', false for 'xywht'.
+
+*@par Outputs:
+* overlaps: A 3D Tensor of type float16 or float32 with shape [B, N, K].
+
+*@attention Constraints:
+* In each batch, the invalid box cannot appear before the valid box.
+*/
+REG_OP(RotatedOverlaps)
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(query_boxes, TensorType({DT_FLOAT}))
+    .OUTPUT(overlaps, TensorType({DT_FLOAT}))
+    .ATTR(trans, Bool, false)
+    .OP_END_FACTORY_REG(RotatedOverlaps)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index d66c8948..9ce7abfd 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h
index a08b610b..5b1a4dd0 100644
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index 978c480c..ee599a76 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 39234057..bc75cfb7 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h
index 7834591c..b27b1fa0 100644
--- a/third_party/fwkacllib/inc/ops/no_op.h
+++ b/third_party/fwkacllib/inc/ops/no_op.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index a4f2fe80..d5960395 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1127,6 +1127,34 @@ REG_OP(Shrink)
     .ATTR(lambd, Float, 0.5)
     .ATTR(bias, Float, 0.0)
     .OP_END_FACTORY_REG(Shrink)
+
+/**
+* @brief Thresholds each element of the input Tensor: y = (x > threshold) ? x : value \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor.
+* Must be one of the following types on Ascend310: float16, int8, int32, uint8.
+* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n
+* @li threshold: A Tensor which should have the shape (1,), the value to threshold at.
+* Must be one of the following types on Ascend310: float16, int8, int32, uint8.
+* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n
+* @li value: A Tensor which should have the shape (1,), the value to replace with. default value is 0.
+* Must be one of the following types on Ascend310: float16, int8, int32, uint8.
+* Must be one of the following types on Ascend710 or Ascend910: float16, float32, int8, int32, uint8. \n
+
+* @par Outputs:
+* y: A Tensor which has the same shape and type as the input x. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Threshold.
+*/
+REG_OP(ThresholdV2)
+     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .INPUT(threshold, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .OPTIONAL_INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8, DT_INT32, DT_UINT8}))
+     .OP_END_FACTORY_REG(ThresholdV2)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
index 8d7ef9f9..f36d2935 100644
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ocr_ops.h b/third_party/fwkacllib/inc/ops/ocr_ops.h
index baab5af2..a5755659 100644
--- a/third_party/fwkacllib/inc/ops/ocr_ops.h
+++ b/third_party/fwkacllib/inc/ops/ocr_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h
index e0b783bc..53b9d701 100644
--- a/third_party/fwkacllib/inc/ops/outfeed_ops.h
+++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index a9a3b0f0..6d4bcd5e 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h
index 03024f96..e578997c 100644
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index e4b1075b..fd855734 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
index 9d116760..5af2dd74 100644
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
index 2e253ed4..ceaa64e4 100644
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
index b2caa0be..4376437f 100644
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index 28fbb7f2..ad7f9003 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 4e4c74af..fa572b66 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
index a5d7f9c3..156f2f34 100644
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index 691f1e9f..b374fa5c 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h
index 90707602..850b3e5a 100644
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h
index 0ce473b7..5ce6c2e0 100644
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h
index e8f3e6b6..601b360b 100644
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 8812a14f..b09d08b0 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h
index 1d02fa15..04e04f1b 100644
--- a/third_party/fwkacllib/inc/ops/set_ops.h
+++ b/third_party/fwkacllib/inc/ops/set_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/slice_write_ops.h b/third_party/fwkacllib/inc/ops/slice_write_ops.h
index 994f197c..0c161b2d 100644
--- a/third_party/fwkacllib/inc/ops/slice_write_ops.h
+++ b/third_party/fwkacllib/inc/ops/slice_write_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h
index d9fb4d0a..8eb7b521 100644
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h
index d17cbfdd..ab9e1dec 100644
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h
index cba8e648..08726080 100644
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h
index 49029317..d1ec00b5 100644
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
index a3d18922..f4eb763c 100644
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
index dad3c379..ff9daaa3 100644
--- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h
index a2699315..a78d63a1 100644
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h
index a1bf4f8b..6e8eaac3 100644
--- a/third_party/fwkacllib/inc/ops/swap_co_ops.h
+++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
index 9c61f2c9..9bef1d7b 100644
--- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
+++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 775dd4a9..2bbab7a2 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h
index 8f2201af..1e8c574b 100644
--- a/third_party/fwkacllib/inc/ops/vector_search.h
+++ b/third_party/fwkacllib/inc/ops/vector_search.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
index e19cbd7c..8ef69d8b 100644
--- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
+++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h
deleted file mode 100644
index 35fcc857..00000000
--- a/third_party/fwkacllib/inc/register/op_kernel_registry.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_
-#define INC_REGISTER_OP_KERNEL_REGISTRY_H_
-#include <memory>
-#include <string>
-#include "register/register_types.h"
-#include "register.h"
-
-namespace ge {
-class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry {
- public:
-  using CreateFn = HostCpuOp* (*)();
-  ~OpKernelRegistry();
-
-  static OpKernelRegistry& GetInstance();
-
-  bool IsRegistered(const std::string &op_type);
-
-  void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn);
-
-  std::unique_ptr<HostCpuOp> CreateHostCpuOp(const std::string &op_type);
-
- private:
-  OpKernelRegistry();
-  class OpKernelRegistryImpl;
-  /*lint -e148*/
-  std::unique_ptr<OpKernelRegistryImpl> impl_;
-};
-} // namespace ge
-
-#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h
deleted file mode 100644
index f7e37390..00000000
--- a/third_party/fwkacllib/inc/register/op_registry.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INC_REGISTER_OP_REGISTRY_H_
-#define INC_REGISTER_OP_REGISTRY_H_
-
-#include <limits.h>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <map>
-#include <vector>
-
-#include "register/register.h"
-
-namespace domi {
-enum RemoveInputType {
-  OMG_MOVE_TYPE_DTYPE = 0,
-  OMG_MOVE_TYPE_VALUE,
-  OMG_MOVE_TYPE_SHAPE,
-  OMG_MOVE_TYPE_FORMAT,
-  OMG_MOVE_TYPE_AXIS,
-  OMG_MOVE_TYPE_SCALAR_VALUE,
-  OMG_REMOVE_TYPE_WITH_COND = 1000,
-  OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE,
-  OMG_INPUT_REORDER,
-};
-
-struct RemoveInputConfigure {
-  int inputIdx = INT_MAX;
-  std::string attrName;
-  RemoveInputType moveType;
-  bool attrValue = false;
-  std::string originalType;
-  std::vector<int> input_order;
-};
-
-class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
- public:
-  static OpRegistry *Instance();
-
-  std::vector<OpRegistrationData> registrationDatas;
-
-  bool Register(const OpRegistrationData &reg_data);
-
-  domi::ImplyType GetImplyType(const std::string &op_type);
-
-  void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const domi::ImplyType &imply_type);
-
-  domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type);
-
-  domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type);
-
-  domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type);
-
-  domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type,
-                                                             const std::string &ori_type);
-
-  domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);
-
-  Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func);
-
-  domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype);
-
-  const std::vector<RemoveInputConfigure> &GetRemoveInputConfigure(const std::string &ori_optype) const;
-
-  bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type);
-
-  ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type);
-
- private:
-  std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
-  std::unordered_map<std::string, ParseParamFunc> op_parse_params_fn_map_;
-  std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
-  std::unordered_map<std::string, FusionParseParamFunc> fusion_op_parse_params_fn_map_;
-  std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_;
-  std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
-  std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
-  std::map<std::string, std::string> origin_type_to_om_type_;
-  std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_;
-  std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_;
-};
-}  // namespace domi
-#endif  // INC_REGISTER_OP_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 39301554..aa98ed9a 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: base.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_BASE_H
@@ -95,7 +85,7 @@ typedef enum tagRtSwitchDataType {
 
 typedef enum tagRtStreamFlagType {
     RT_HEAD_STREAM = 0,  // first stream
-    RT_INVALID_FLAG = 0xFFFFFFFF,
+    RT_INVALID_FLAG = 0x7FFFFFFF,
 } rtStreamFlagType_t;
 
 typedef enum tagRtLimitType {
@@ -196,13 +186,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t profConfig);
  * @ingroup profiling_base
  * @brief ts send keypoint profiler log.
  */
-RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);
+RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stm);
 
 /**
  * @ingroup profiling_base
  * @brief ts send keypoint profiler log.
  */
-RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream);
+RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stm);
 
 /**
  * @ingroup profiling_base
@@ -306,134 +296,134 @@ typedef void *rtNotify_t;
 /**
  * @ingroup dvrt_base
  * @brief create label instance
- * @param [out]    label   created label
+ * @param [out]    lbl   created label
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreate(rtLabel_t *label);
+RTS_API rtError_t rtLabelCreate(rtLabel_t *lbl);
 
 /**
  * @ingroup dvrt_base
  * @brief create label instance
- * @param [out] label  created label
- * @param [in] model  label set model
+ * @param [out] lbl  created label
+ * @param [in] mdl  label set model
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model);
+RTS_API rtError_t rtLabelCreateV2(rtLabel_t *lbl, rtModel_t mdl);
 
 /**
  * @ingroup dvrt_base
  * @brief set label and stream instance
- * @param [in] label   set label
- * @param [in] stream  set stream
+ * @param [in] lbl   set label
+ * @param [in] stm  set stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelSet(rtLabel_t label, rtStream_t stream);
+RTS_API rtError_t rtLabelSet(rtLabel_t lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief destroy label instance
- * @param [in] label   label to destroy
+ * @param [in] lbl   label to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelDestroy(rtLabel_t label);
+RTS_API rtError_t rtLabelDestroy(rtLabel_t lbl);
 
 /**
  * @ingroup dvrt_base
  * @brief label switch instance
  * @param [in] ptr  address to get value compared
  * @param [in] condition
- * @param [in] value  to compare
+ * @param [in] val  to compare
  * @param [in] true_label   goto label
- * @param [in] stream  to submit label_switch task
+ * @param [in] stm  to submit label_switch task
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t value, rtLabel_t trueLabel,
-                                rtStream_t stream);
+RTS_API rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t val, rtLabel_t trueLabel,
+                                rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief goto label instance
- * @param [in] label   goto label
- * @param [in] stream  to submit label_goto task
+ * @param [in] lbl   goto label
+ * @param [in] stm  to submit label_goto task
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelGoto(rtLabel_t label, rtStream_t stream);
+RTS_API rtError_t rtLabelGoto(rtLabel_t lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief name label instance
- * @param [in] label  instance
+ * @param [in] lbl  instance
  * @param [in] name  label name
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtNameLabel(rtLabel_t label, const char_t *name);
+RTS_API rtError_t rtNameLabel(rtLabel_t lbl, const char_t *name);
 
 /**
  * @ingroup dvrt_base
  * @brief label switch by index
  * @param [in] ptr  index value ptr
- * @param [in] max  index max value
+ * @param [in] maxValue  index max value
  * @param [in] labelInfoPtr  label content info ptr
- * @param [in] stream  set stream
+ * @param [in] stm  set stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t max, void *labelInfoPtr, rtStream_t stream);
+RTS_API rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t maxValue, void *labelInfoPtr, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief stream goto label
- * @param [in] label  goto label
- * @param [in] stream  stream  to submit label_goto task
+ * @param [in] lbl  goto label
+ * @param [in] stm  stream  to submit label_goto task
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelGotoEx(rtLabel_t label, rtStream_t stream);
+RTS_API rtError_t rtLabelGotoEx(rtLabel_t lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief labels to dev info
- * @param [in] label  model label list
+ * @param [in] lbl  model label list
  * @param [in] labelNumber  label number
  * @param [in] dst  device ptr
  * @param [in] dstMax  dst size
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *dst, uint32_t dstMax);
+RTS_API rtError_t rtLabelListCpy(rtLabel_t *lbl, uint32_t labelNumber, void *dst, uint32_t dstMax);
 
 /**
  * @ingroup dvrt_base
  * @brief labels to dev info
- * @param [out] label  created label handle
- * @param [in] stream  label bind stream
+ * @param [out] lbl  created label handle
+ * @param [in] stm  label bind stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
+RTS_API rtError_t rtLabelCreateEx(rtLabel_t *lbl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief labels to dev info
- * @param [out] label  created label handle
- * @param [in] model  label bind model
- * @param [in] stream  label bind stream
+ * @param [out] lbl  created label handle
+ * @param [in] mdl  label bind model
+ * @param [in] stm  label bind stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup dvrt_base
  * @brief get current thread last stream id and task id
- * @param [out] stream id and task id
+ * @param [out] stm id and task id
  * @param [in] null
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for input null ptr
@@ -443,4 +433,4 @@ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);
 }
 #endif
 
-#endif  // CCE_RUNTIME_BASE_H
\ No newline at end of file
+#endif  // CCE_RUNTIME_BASE_H
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index f9e6a49e..c83c1521 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: config.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_CONFIG_H
@@ -193,10 +183,10 @@ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
 /**
  * @ingroup
  * @brief get l2 buffer Info,virtual baseaddr,Size
- * @param [in] stream
+ * @param [in] stm
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
+RTS_API rtError_t rtMemGetL2Info(rtStream_t stm, void **ptr, uint32_t *size);
 
 /**
  * @ingroup
@@ -215,11 +205,11 @@ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
  * @param [in] deviceId
  * @param [in] moduleType
  * @param [in] featureType
- * @param [out] value
+ * @param [out] val
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
+RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *val);
 
 /**
  * @ingroup
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index bc8dda52..e920a939 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: context.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_CONTEXT_H
@@ -53,57 +43,57 @@ typedef struct tagRtGroupInfo {
 /**
  * @ingroup rt_context
  * @brief create context and associates it with the calling thread
- * @param [out] ctx   created context
+ * @param [out] createCtx   created context
  * @param [in] flags   context creation flag. set to 0.
- * @param [in] device    device to create context on
+ * @param [in] devId    device to create context on
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxCreate(rtContext_t *ctx, uint32_t flags, int32_t device);
+RTS_API rtError_t rtCtxCreate(rtContext_t *createCtx, uint32_t flags, int32_t devId);
 
 /**
  * @ingroup rt_context
  * @brief create context and associates it with the calling thread
- * @param [out] ctx   created context
+ * @param [out] createCtx   created context
  * @param [in] flags   context creation flag. set to 0.
- * @param [in] device    device to create context on
+ * @param [in] devId    device to create context on
  * @param [in] deviceMode    the device mode
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxCreateV2(rtContext_t *ctx, uint32_t flags, int32_t device, rtDeviceMode deviceMode);
+RTS_API rtError_t rtCtxCreateV2(rtContext_t *createCtx, uint32_t flags, int32_t devId, rtDeviceMode deviceMode);
 
 /**
  * @ingroup rt_context
  * @brief create context and associates it with the calling thread
- * @param [out] ctx   created context
+ * @param [out] createCtx   created context
  * @param [in] flags   context creation flag. set to 0.
- * @param [in] device    device to create context on
+ * @param [in] devId    device to create context on
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxCreateEx(rtContext_t *ctx, uint32_t flags, int32_t device);
+RTS_API rtError_t rtCtxCreateEx(rtContext_t *createCtx, uint32_t flags, int32_t devId);
 
 /**
  * @ingroup rt_context
  * @brief destroy context instance
- * @param [in] ctx   context to destroy
+ * @param [in] destroyCtx   context to destroy
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxDestroy(rtContext_t ctx);
+RTS_API rtError_t rtCtxDestroy(rtContext_t destroyCtx);
 
 /**
  * @ingroup rt_context
  * @brief destroy context instance
- * @param [in] ctx   context to destroy
+ * @param [in] destroyCtx   context to destroy
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxDestroyEx(rtContext_t ctx);
+RTS_API rtError_t rtCtxDestroyEx(rtContext_t destroyCtx);
 
 /**
  * @ingroup rt_context
  * @brief binds context to the calling CPU thread.
- * @param [in] ctx   context to bind. if NULL, unbind current context.
+ * @param [in] currentCtx   context to bind. if NULL, unbind current context.
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxSetCurrent(rtContext_t ctx);
+RTS_API rtError_t rtCtxSetCurrent(rtContext_t currentCtx);
 
 /**
  * @ingroup rt_context
@@ -115,26 +105,26 @@ RTS_API rtError_t rtCtxSynchronize(void);
 /**
  * @ingroup rt_context
  * @brief returns the context bound to the calling CPU thread.
- * @param [out] ctx   returned context
+ * @param [out] currentCtx   returned context
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx);
+RTS_API rtError_t rtCtxGetCurrent(rtContext_t *currentCtx);
 
 /**
  * @ingroup rt_context
  * @brief returns the primary context of device.
- * @param [out] ctx   returned context
+ * @param [out] primaryCtx   returned context
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx);
+RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t devId, rtContext_t *primaryCtx);
 
 /**
  * @ingroup rt_context
  * @brief returns the device ID for the current context
- * @param [out] device   returned device id
+ * @param [out] devId   returned device id
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtCtxGetDevice(int32_t *device);
+RTS_API rtError_t rtCtxGetDevice(int32_t *devId);
 
 /**
  * @ingroup
@@ -158,19 +148,19 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint
  * @param [in] groupid count
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtGetGroupCount(uint32_t *count);
+RTS_API rtError_t rtGetGroupCount(uint32_t *cnt);
 
 /**
  * @ingroup rt_context
  * @brief set context INF mode
- * @param [in] mode
+ * @param [in] infMode
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtSetCtxINFMode(bool mode);
+RTS_API rtError_t rtSetCtxINFMode(bool infMode);
 
 #if defined(__cplusplus)
 }
 #endif
 
 
-#endif  // CCE_RUNTIME_CONTEXT_H
\ No newline at end of file
+#endif  // CCE_RUNTIME_CONTEXT_H
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index 75d01f36..e5b741c7 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: dev.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_DEVICE_H
@@ -146,30 +136,30 @@ RTS_API rtError_t rtGetDeviceIDs(uint32_t *devices, uint32_t len);
                     INFO_TYPE_IP,
                     INFO_TYPE_ENDIAN,
                } DEV_INFO_TYPE;
- * @param [out] value   the device info
+ * @param [out] val   the device info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_DRV_ERR for error
  */
-RTS_API rtError_t rtGetDeviceInfo(uint32_t deviceId, int32_t moduleType, int32_t infoType, int64_t *value);
+RTS_API rtError_t rtGetDeviceInfo(uint32_t deviceId, int32_t moduleType, int32_t infoType, int64_t *val);
 
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDevice(int32_t device);
+RTS_API rtError_t rtSetDevice(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
  * @param [int] deviceMode   the device mode
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode);
+RTS_API rtError_t rtSetDeviceV2(int32_t devId, rtDeviceMode deviceMode);
 
 /**
  * @ingroup dvrt_dev
@@ -201,11 +191,11 @@ RTS_API rtError_t rtGetDie(int32_t *die);
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDeviceEx(int32_t device);
+RTS_API rtError_t rtSetDeviceEx(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
@@ -250,13 +240,13 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc);
 /**
  * @ingroup dvrt_dev
  * @brief get cability of P2P omemry copy betwen device and peeredevic.
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] peerDevice   the physical device id
  * @param [outv] *canAccessPeer   1:enable 0:disable
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice);
+RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t devId, uint32_t peerDevice);
 
 /**
  * @ingroup dvrt_dev
@@ -280,11 +270,11 @@ RTS_API rtError_t rtDeviceGetBareTgid(uint32_t *pid);
 /**
  * @ingroup dvrt_dev
  * @brief get target device of current thread
- * @param [in|out] device   the device id
+ * @param [in|out] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetDevice(int32_t *device);
+RTS_API rtError_t rtGetDevice(int32_t *devId);
 
 /**
  * @ingroup dvrt_dev
@@ -292,7 +282,7 @@ RTS_API rtError_t rtGetDevice(int32_t *device);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceReset(int32_t device);
+RTS_API rtError_t rtDeviceReset(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
@@ -300,19 +290,19 @@ RTS_API rtError_t rtDeviceReset(int32_t device);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceResetEx(int32_t device);
+RTS_API rtError_t rtDeviceResetEx(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
  * @brief get total device infomation.
- * @param [in] device   the device id
+ * @param [in] devId   the device id
  * @param [in] type     limit type RT_LIMIT_TYPE_LOW_POWER_TIMEOUT=0
- * @param [in] value    limit value
+ * @param [in] val    limit value
  * @param [out] info   the device info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceSetLimit(int32_t device, rtLimitType_t type, uint32_t value);
+RTS_API rtError_t rtDeviceSetLimit(int32_t devId, rtLimitType_t type, uint32_t val);
 
 /**
  * @ingroup dvrt_dev
@@ -334,15 +324,6 @@ RTS_API rtError_t rtDeviceGetStreamPriorityRange(int32_t *leastPriority, int32_t
 
 /**
  * @ingroup dvrt_dev
- * @brief Set exception handling callback function
- * @param [in] callback   rtExceptiontype
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for error input
- */
-RTS_API rtError_t rtSetExceptCallback(rtErrorCallback callback);
-
-/**
- * @ingroup dvrt_dev
  * @brief Setting Scheduling Type of Graph
  * @param [in] tsId   the ts id
  * @return RT_ERROR_NONE for ok
@@ -373,14 +354,14 @@ RTS_API rtError_t rtGetAicpuDeploy(rtAicpuDeployType_t *deployType);
  * @brief set chipType
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtSetSocVersion(const char_t *version);
+RTS_API rtError_t rtSetSocVersion(const char_t *ver);
 
 /**
  * @ingroup dvrt_dev
  * @brief get chipType
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetSocVersion(char_t *version, const uint32_t maxLen);
+RTS_API rtError_t rtGetSocVersion(char_t *ver, const uint32_t maxLen);
 
 /**
  * @ingroup dvrt_dev
@@ -388,10 +369,10 @@ RTS_API rtError_t rtGetSocVersion(char_t *version, const uint32_t maxLen);
  * @param [in] devId   the logical device id
  * @param [in] otherDevId   the other logical device id
  * @param [in] infoType   info type
- * @param [in|out] value   pair info
+ * @param [in|out] val   pair info
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int32_t infoType, int64_t *value);
+RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int32_t infoType, int64_t *val);
 
 /**
  * @ingroup dvrt_dev
@@ -406,19 +387,19 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
                     MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
                     MEMCPY_INFO _RSV,
                } rtMemcpyInfo_t;
- * @param [out] value  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
+ * @param [out] val  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
+RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *val);
 
 /**
  * @ingroup dvrt_dev
  * @brief set target device for current thread
- * @param [int] device   the device id
+ * @param [int] devId   the device id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
+RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
@@ -426,7 +407,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);
+RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t devId);
 
 /**
  * @ingroup dvrt_dev
diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
index c610bbb6..7cb8c8a6 100644
--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: dvfsprofile.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_DVFSPROFILE_H
@@ -60,4 +50,4 @@ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
 }
 #endif
 
-#endif  // CCE_RUNTIME_DVFSPROFILE_H
\ No newline at end of file
+#endif  // CCE_RUNTIME_DVFSPROFILE_H
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index f6141d42..024ff3e3 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: event.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_EVENT_H
@@ -48,7 +38,7 @@ typedef enum rtEventWaitStatus {
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventCreate(rtEvent_t *event);
+RTS_API rtError_t rtEventCreate(rtEvent_t *evt);
 
 /**
  * @ingroup dvrt_event
@@ -57,103 +47,103 @@ RTS_API rtError_t rtEventCreate(rtEvent_t *event);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag);
+RTS_API rtError_t rtEventCreateWithFlag(rtEvent_t *evt, uint32_t flag);
 
 /**
  * @ingroup dvrt_event
  * @brief destroy event instance
- * @param [in] event   event to destroy
+ * @param [in] evt   event to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventDestroy(rtEvent_t event);
+RTS_API rtError_t rtEventDestroy(rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief get event id
- * @param [in] event_ event to be get
+ * @param [in] evt event to be get
  * @param [in|out] event_id   event_id id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetEventID(rtEvent_t event, uint32_t *eventId);
+RTS_API rtError_t rtGetEventID(rtEvent_t evt, uint32_t *evtId);
 
 /**
  * @ingroup dvrt_event
  * @brief event record
  * @param [int] event   event to record
- * @param [int] stream   stream handle
+ * @param [int] stm   stream handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventRecord(rtEvent_t event, rtStream_t stream);
+RTS_API rtError_t rtEventRecord(rtEvent_t evt, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
  * @brief event reset
  * @param [int] event   event to reset
- * @param [int] stream   stream handle
+ * @param [int] stm   stream handle
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtEventReset(rtEvent_t event, rtStream_t stream);
+RTS_API rtError_t rtEventReset(rtEvent_t evt, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
  * @brief wait event to be complete
- * @param [in] event   event to wait
+ * @param [in] evt   event to wait
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEventSynchronize(rtEvent_t event);
+RTS_API rtError_t rtEventSynchronize(rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief Queries an event's status
- * @param [in] event   event to query
+ * @param [in] evt   event to query
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_EVENT_NOT_COMPLETE for not complete
  */
-RTS_API rtError_t rtEventQuery(rtEvent_t event);
+RTS_API rtError_t rtEventQuery(rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief Queries an event's wait status
- * @param [in] event   event to query
+ * @param [in] evt   event to query
  * @param [in out] EVENT_WAIT_STATUS status
  * @return EVENT_STATUS_COMPLETE for complete
  * @return EVENT_STATUS_NOT_READY for not complete
  */
-RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t event, rtEventWaitStatus_t *status);
+RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status);
 
 /**
  * @ingroup dvrt_event
  * @brief computes the elapsed time between events.
- * @param [in] time   time between start and end in ms
- * @param [in] start  starting event
- * @param [in] end  ending event
+ * @param [in] timeInterval   time between start and end in ms
+ * @param [in] startEvent  starting event
+ * @param [in] endEvent  ending event
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtEventElapsedTime(float32_t *time, rtEvent_t start, rtEvent_t end);
+RTS_API rtError_t rtEventElapsedTime(float32_t *timeInterval, rtEvent_t startEvent, rtEvent_t endEvent);
 
 /**
  * @ingroup dvrt_event
  * @brief get the elapsed time from a event after event recorded.
- * @param [in] time   time in ms
- * @param [in] event  event handle
+ * @param [in] timeStamp   time in ms
+ * @param [in] evt  event handle
  * @return RT_ERROR_NONE for ok, errno for failed
  */
-RTS_API rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event);
+RTS_API rtError_t rtEventGetTimeStamp(uint64_t *timeStamp, rtEvent_t evt);
 
 /**
  * @ingroup dvrt_event
  * @brief name an event
- * @param [in] event  event to be named
+ * @param [in] evt  event to be named
  * @param [in] name  identification name
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input of event, name
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtNameEvent(rtEvent_t event, const char_t *name);
+RTS_API rtError_t rtNameEvent(rtEvent_t evt, const char_t *name);
 
 /**
  * @ingroup dvrt_event
@@ -184,7 +174,7 @@ RTS_API rtError_t rtNotifyDestroy(rtNotify_t notify);
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stream);
+RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
@@ -195,19 +185,19 @@ RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stream);
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream);
+RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stm);
 
 /**
  * @ingroup dvrt_event
  * @brief Wait for a notify with time out
  * @param [in] notify notify to be wait
- * @param [in] stream  input stream
+ * @param [in] stm  input stream
  * @param [in] timeOut  input timeOut
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
  */
-RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stream, uint32_t timeOut);
+RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stm, uint32_t timeOut);
 
 /**
  * @ingroup dvrt_event
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index 2bd7f284..08860ccd 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: kernel.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_KERNEL_H
@@ -154,7 +144,7 @@ typedef enum tagRtDumpKind {
  * @ingroup rt_kernel
  * @brief report callback
  */
-typedef rtError_t (*rtKernelReportCallback)(rtStream_t stream, rtKernelInfo_t kernelInfo);
+typedef rtError_t (*rtKernelReportCallback)(rtStream_t stm, rtKernelInfo_t kernelInfo);
 
 /**
  * @ingroup rt_kernel
@@ -237,49 +227,49 @@ typedef void (*rtCallback_t)(void *fnData);
  * @ingroup rt_kernel
  * @brief register device binary
  * @param [in] bin   device binary description
- * @param [out] handle   device binary handle
+ * @param [out] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
+RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief register device binary with all kernel
  * @param [in] bin   device binary description
- * @param [out] handle   device binary handle
+ * @param [out] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);
+RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief register fast memeory device binary
- * @param [in] handle   device binary handle
+ * @param [in] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtBinaryRegisterToFastMemory(void *handle);
+RTS_API rtError_t rtBinaryRegisterToFastMemory(void *hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief unregister device binary
- * @param [in] handle   device binary handle
+ * @param [in] hdl   device binary handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDevBinaryUnRegister(void *handle);
+RTS_API rtError_t rtDevBinaryUnRegister(void *hdl);
 
 /**
  * @ingroup rt_kernel
  * @brief register device binary metadata
- * @param [in] handle    device binary description
+ * @param [in] hdl    device binary description
  * @param [in] metadata  device binary metadata
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMetadataRegister(void *handle, const char_t *metadata);
+RTS_API rtError_t rtMetadataRegister(void *hdl, const char_t *metadata);
 
 /**
  * @ingroup rt_kernel
@@ -343,7 +333,7 @@ RTS_API rtError_t rtQueryFunctionRegistered(const char_t *stubName);
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, uint32_t blockDim, void **dumpBaseAddr,
-                                     rtStream_t stream);
+                                     rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
@@ -353,28 +343,28 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
  * @param [in] args   argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
-                                 rtSmDesc_t *smDesc, rtStream_t stream);
+                                 rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief launch kernel with handle to device
- * @param [in] handle   program
+ * @param [in] hdl   program
  * @param [in] devFunc   device function description.
  * @param [in] blockDim   block dimentions
  * @param [in] args   argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @param [in] kernelInfo   kernel info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim,
+RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *devFunc, uint32_t blockDim,
                                            void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_,
                                            const void *kernelInfo);
 
@@ -386,13 +376,13 @@ RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, ui
  * @param [in] args   argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @param [in] flag   dump flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
-                                         rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags);
+                                         rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags);
 
 /**
  * @ingroup rt_kernel(abandoned)
@@ -400,11 +390,11 @@ RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim
  * @param [in] args       argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] flags      launch flags
- * @param [in] stream     associated stream
+ * @param [in] stm     associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags, rtStream_t stream);
+RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel(in use)
@@ -413,7 +403,7 @@ RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags
  * @param [in] args       argments address for kernel function
  * @param [in] argsSize   argements size
  * @param [in] flags      launch flags
- * @param [in] stream     associated stream
+ * @param [in] stm     associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
@@ -429,12 +419,12 @@ RTS_API rtError_t rtKernelLaunchFwk(const char_t *opName, void *args, uint32_t a
  * @param [in] args          argments address for kernel function
  * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm        associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtCpuKernelLaunch(const void *soName, const void *kernelName, uint32_t blockDim, const void *args,
-                                    uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream);
+                                    uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel(in use)
@@ -444,12 +434,12 @@ RTS_API rtError_t rtCpuKernelLaunch(const void *soName, const void *kernelName,
  * @param [in] args          argments address for kernel function
  * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm        associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames,
-    uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream);
+    uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel(abandoned)
@@ -460,13 +450,13 @@ RTS_API rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames,
  * @param [in] args          argments address for kernel function
  * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm        associated stream
  * @param [in] flag          dump flag or others function flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim,
-                                            const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream,
+                                            const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm,
                                             uint32_t flags);
 
 /**
@@ -477,25 +467,25 @@ RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kern
  * @param [in] args          argments address for kernel function
  * @param [in] argsSize      argments size
  * @param [in] smDesc        shared memory description
- * @param [in] stream        associated stream
+ * @param [in] stm        associated stream
  * @param [in] flag          dump flag or others function flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim,
-    const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags);
+    const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags);
 
 /**
  * @ingroup rt_kernel
  * @brief L1 fusion dump addr transfered to device
- * @param [in] model    handle info
+ * @param [in] mdl    handle info
  * @param [in] addr     ddr address of L1 Fusion Dump
  * @param [in] dumpSize memory size
  * @param [in] flag     memory flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);
+RTS_API rtError_t rtDumpAddrSet(rtModel_t mdl, void *addr, uint32_t dumpSize, uint32_t flag);
 
 /**
  * @ingroup rt_kernel
@@ -514,14 +504,14 @@ RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);
  * @brief configure call argment for next rtLaunch in current thread
  * @param [in] numBlocks   block dimentions
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 #ifdef __cplusplus
-RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr);
+RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stm = nullptr);
 #else
-RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream);
+RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stm);
 
 #endif
 #endif  // __CLANG_CCE_RUNTIME_H__
@@ -563,20 +553,20 @@ RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_
 /**
  * @ingroup rt_kernel
  * @brief start fusion kernels.
- * @param [in] stream   stream for fusion kernels
+ * @param [in] stm   stream for fusion kernels
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelFusionStart(rtStream_t stream);
+RTS_API rtError_t rtKernelFusionStart(rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief end fusion kernels.
- * @param [in] stream   stream for fusion kernels
+ * @param [in] stm   stream for fusion kernels
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelFusionEnd(rtStream_t stream);
+RTS_API rtError_t rtKernelFusionEnd(rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
@@ -591,22 +581,22 @@ RTS_API rtError_t rtSetKernelReportCallback(rtKernelReportCallback callBack);
  * @ingroup rt_kernel
  * @brief subscribe stream callback report.
  * @param [in] threadId   thread id for stream
- * @param [in] stream   stream for subscribe
+ * @param [in] stm   stream for subscribe
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stream);
+RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief add callback launch task in stream.
  * @param [in] callBackFunc   app callback function
  * @param [in] fnData   user data
- * @param [in] stream   subscribed stream
+ * @param [in] stm   subscribed stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtStream_t stream, bool isBlock);
+RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtStream_t stm, bool isBlock);
 
 /**
  * @ingroup rt_kernel
@@ -621,11 +611,11 @@ RTS_API rtError_t rtProcessReport(int32_t timeout);
  * @ingroup rt_kernel
  * @brief unsubscribe callback report.
  * @param [in] threadId   thread id for stream
- * @param [in] stream   stream for subscribe
+ * @param [in] stm   stream for subscribe
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream);
+RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stm);
 
 /**
  * @ingroup profiling_base
@@ -633,7 +623,7 @@ RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum);
+RTS_API rtError_t rtStartOnlineProf(rtStream_t stm, uint32_t sampleNum);
 
 /**
  * @ingroup profiling_base
@@ -641,7 +631,7 @@ RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
+RTS_API rtError_t rtStopOnlineProf(rtStream_t stm);
 
 /**
  * @ingroup profiling_base
@@ -649,7 +639,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
+RTS_API rtError_t rtGetOnlineProfData(rtStream_t stm, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
 
 /**
  * @ingroup profiling_base
@@ -674,28 +664,28 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr);
  * @param [in] blockDim   block dimentions
  * @param [in] argsInfo   argments info address for kernel function
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockDim,
-    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream);
+    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm);
 
 /**
  * @ingroup rt_kernel
  * @brief launch kernel with handle and tiling data to device
- * @param [in] handle   program
+ * @param [in] hdl   program
  * @param [in] devFunc   device function description.
  * @param [in] blockDim   block dimentions
  * @param [in] argsInfo   argments info address for kernel function
  * @param [in] smDesc   shared memory description
- * @param [in] stream   associated stream
+ * @param [in] stm   associated stream
  * @param [in] kernelInfo   kernel info
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *handle, const void *devFunc, uint32_t blockDim,
-    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream, const void* kernelInfo);
+RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *devFunc, uint32_t blockDim,
+    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const void* kernelInfo);
 
 #if defined(__cplusplus)
 }
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index d095ef0c..c086da10 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: mem.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_MEM_H
@@ -59,12 +49,13 @@ extern "C" {
  * @brief memory Policy
  */
 #define RT_MEMORY_POLICY_NONE (0x0U)                     // Malloc mem prior huge page, then default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x1U << 10U)    // Malloc mem prior huge page, then default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x1U << 11U)     // Malloc mem only use huge page
-#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1U << 12U)  // Malloc mem only use default page
-#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x1U << 13U) // Malloc mem prior huge page, then default page, for p2p
-#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x1U << 14U)     // Malloc mem only use huge page, use for p2p
-#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x1U << 15U)  // Malloc mem only use default page, use for p2p
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x400U)    // Malloc mem prior huge page, then default page, 0x1U << 10U
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x800U)     // Malloc mem only use huge page, 0x1U << 11U
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1000U)  // Malloc mem only use default page, 0x1U << 12U
+// Malloc mem prior huge page, then default page, for p2p, 0x1U << 13U
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x2000U)
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U)     // Malloc mem only use huge page, use for p2p, 0x1U << 14U
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U)  // Malloc mem only use default page, use for p2p, 0x1U << 15U
 
 #define MEM_ALLOC_TYPE_BIT (0x3FFU)  // mem type bit in <0, 9>
 
@@ -130,8 +121,8 @@ typedef enum tagRtDataType {
     RT_DATA_TYPE_BFP16 = 6, // bfp16
     RT_DATA_TYPE_BFP32 = 7, // bfp32
     RT_DATA_TYPE_UINT8 = 8, // uint8
-    RT_DATA_TYPE_UINT16= 9, // uint16
-    RT_DATA_TYPE_UINT32= 10,// uint32
+    RT_DATA_TYPE_UINT16 = 9, // uint16
+    RT_DATA_TYPE_UINT32 = 10, // uint32
     RT_DATA_TYPE_END = 11,
 } rtDataType_t;
 
@@ -188,19 +179,19 @@ typedef struct tagRtPointerAttributes {
 } rtPointerAttributes_t;
 
 
-typedef struct rtMallocHostSharedMemoryIn {
+typedef struct {
     const char_t *name;
     const uint64_t size;
     uint32_t flag;
 } rtMallocHostSharedMemoryIn;
 
-typedef struct rtMallocHostSharedMemoryOut {
+typedef struct {
     int32_t fd;
     void *ptr;
     void *devPtr;
 } rtMallocHostSharedMemoryOut;
 
-typedef struct rtFreeHostSharedMemoryIn {
+typedef struct {
     const char_t *name;
     const uint64_t size;
     int32_t fd;
@@ -311,17 +302,6 @@ RTS_API rtError_t rtMemFreeManaged(void *ptr);
 
 /**
  * @ingroup dvrt_mem
- * @brief Specifies how memory is use
- * @param [in] devPtr   memory pointer
- * @param [in] size     memory size
- * @param [in] advise   reserved, set to 1
- * @return RT_ERROR_NONE for ok
- * @return others for error
- */
-RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t size, uint32_t advise);
-
-/**
- * @ingroup dvrt_mem
  * @brief alloc cached device memory
  * @param [in| devPtr   memory pointer
  * @param [in] size     memory size
@@ -354,12 +334,12 @@ RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  * @param [in] dst   destination address pointer
  * @param [in] Max length of destination address memory
  * @param [in] src   source address pointer
- * @param [in] count   the number of byte to copy
+ * @param [in] cnt   the number of byte to copy
  * @param [in] kind   memcpy type
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
+RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);
 
 /**
  * @ingroup dvrt_mem
@@ -367,14 +347,14 @@ RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_
  * @param [in] dst   destination address pointer
  * @param [in] Max length of destination address memory
  * @param [in] src   source address pointer
- * @param [in] count   the number of byte to copy
+ * @param [in] cnt   the number of byte to copy
  * @param [in] kind   memcpy type
- * @param [in] stream   asynchronized task stream
+ * @param [in] stm   asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
-                                rtStream_t stream);
+RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind,
+                                rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -382,15 +362,15 @@ RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, ui
  * @param [in] dst   destination address pointer
  * @param [in] Max length of destination address memory
  * @param [in] src   source address pointer
- * @param [in] count   the number of byte to copy
+ * @param [in] cnt   the number of byte to copy
  * @param [in] kind   memcpy type
  * @param [in] type   data type
- * @param [in] stream   asynchronized task stream
+ * @param [in] stm   asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
-                                rtDataType_t type, rtStream_t stream);
+RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind,
+                                rtDataType_t type, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -418,12 +398,12 @@ RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint
  * @param [in] width    width of matrix transfer
  * @param [in] height   height of matrix transfer
  * @param [in] kind     memcpy type
- * @param [in] stream   asynchronized task stream
+ * @param [in] stm      asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
-                                  uint64_t height, rtMemcpyKind_t kind, rtStream_t stream);
+                                  uint64_t height, rtMemcpyKind_t kind, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -449,25 +429,25 @@ RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize)
  * @brief set memory with uint32_t value
  * @param [in] devPtr
  * @param [in] Max length of destination address memory
- * @param [in] value
- * @param [in] count byte num
+ * @param [in] val
+ * @param [in] cnt byte num
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
+RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t val, uint64_t cnt);
 
 /**
  * @ingroup dvrt_mem
  * @brief set memory with uint32_t value async
  * @param [in] devPtr
  * @param [in] Max length of destination address memory
- * @param [in] value
- * @param [in] count byte num
- * @param [in] stream
+ * @param [in] val
+ * @param [in] cnt byte num
+ * @param [in] stm
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
+RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t val, uint64_t cnt, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -494,11 +474,11 @@ RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize,
  * @brief set memory with uint32_t value
  * @param [in] devPtr
  * @param [in] len
- * @param [in] device
+ * @param [in] devId
  * @return RT_ERROR_NONE for ok, errno for failed
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
+RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t devId);
 
 /**
  * @ingroup dvrt_mem
@@ -557,14 +537,14 @@ RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
 /**
  * @ingroup dvrt_mem
  * @brief HCCL Async memory cpy
- * @param [in] index sq index
+ * @param [in] sqIndex sq index
  * @param [in] wqeIndex moudle index
- * @param [in] stream asynchronized task stream
+ * @param [in] stm asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t stream);
+RTS_API rtError_t rtRDMASend(uint32_t sqIndex, uint32_t wqeIndex, rtStream_t stm);
 
 /**
  * @ingroup dvrt_mem
@@ -583,12 +563,12 @@ RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num)
  * @brief HCCL Async memory cpy
  * @param [in] dbindex single device 0
  * @param [in] dbinfo doorbell info
- * @param [in] stream asynchronized task stream
+ * @param [in] stm asynchronized task stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  * @return RT_ERROR_DRV_ERR for driver error
  */
-RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
+RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stm);
 
 #if defined(__cplusplus)
 }
diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h
index 8c3e339f..10b763b2 100644
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: rt.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_RT_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_dfx.h b/third_party/fwkacllib/inc/runtime/rt_dfx.h
index 7e0bc280..f96c0859 100644
--- a/third_party/fwkacllib/inc/runtime/rt_dfx.h
+++ b/third_party/fwkacllib/inc/runtime/rt_dfx.h
@@ -1,17 +1,6 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: dfx interface
  */
 
 #ifndef CCE_RUNTIME_RT_DFX_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h
index ded78d25..5c0ab971 100644
--- a/third_party/fwkacllib/inc/runtime/rt_ffts.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h
@@ -1,17 +1,6 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
+ * Description: ffts interface
  */
 
 #ifndef CCE_RUNTIME_RT_FFTS_H
@@ -91,8 +80,11 @@ typedef struct tagAutoThreadPrefetch {
 typedef struct tagAutoThreadAicAivInfo {
     uint64_t taskParamAddr; // device mem
     uint16_t taskParamOffset;
-    // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
-    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16
+    /*
+     * when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
+     * when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to
+     *     +/-MAX of FP16
+     */
     uint8_t satMode;
     uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
     uint8_t iCachePrefetchCnt; // units is 2K
@@ -187,12 +179,12 @@ typedef struct tagFftsTaskInfo {
     rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM];
 } rtFftsTaskInfo_t;
 
-RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);
+RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stm);
 RTS_API rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len);
 
-RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream, uint32_t flag);
+RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stm, uint32_t flag);
 
 #if defined(__cplusplus)
 }
 #endif
-#endif // CCE_RUNTIME_RT_FFTS_H
\ No newline at end of file
+#endif // CCE_RUNTIME_RT_FFTS_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
index 53f3e60a..18b25d36 100644
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
@@ -1,17 +1,6 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: ffts plus interface
  */
 
 #ifndef CCE_RUNTIME_RT_FFTS_PLUS_H
@@ -36,11 +25,11 @@ typedef struct tagFftsPlusTaskInfo {
 
 #pragma pack(pop)
 
-RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt);
+RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *devFunc, void **addr, uint32_t *prefetchCnt);
 
-RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream);
+RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm);
 
-RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream,
+RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm,
                                                uint32_t flag);
 
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
index 8956e009..b1a6232a 100644
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
@@ -1,17 +1,6 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: the definition of ffts plus
  */
 
 #ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
@@ -370,8 +359,8 @@ typedef struct tagFftsPlusWriteValueCtx {
     // 80-83
     uint32_t writeAddressBaseL;
     // 84-87
-    uint32_t writeAddressBaseH: 17;
-    uint32_t res10: 15;
+    uint32_t writeAddressBaseH : 17;
+    uint32_t res10 : 15;
     // 88-91
     uint32_t writeAddressOffset;
     // 92-95
diff --git a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
index c6f56064..49a272f0 100644
--- a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
+++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
@@ -187,107 +187,107 @@ typedef enum rtGroupType {
 /**
  * @ingroup rt_mem_queue
  * @brief init queue schedule
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] grpName   the name of group, can be nullptr
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueInitQS(int32_t device, const char* grpName);
+RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName);
 
 /**
  * @ingroup rt_mem_queue
  * @brief create mbuf queue
- * @param [in] device   the logical device id
- * @param [in] rtMemQueueAttr   attribute of queue
+ * @param [in] devId   the logical device id
+ * @param [in] queAttr   attribute of queue
  * @param [out] qid  queue id
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueCreate(int32_t device, const rtMemQueueAttr_t *queueAttr, uint32_t *qid);
+RTS_API rtError_t rtMemQueueCreate(int32_t devId, const rtMemQueueAttr_t *queAttr, uint32_t *qid);
 
 /**
  * @ingroup rt_mem_queue
  * @brief destroy mbuf queue
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueDestroy(int32_t device, uint32_t qid);
+RTS_API rtError_t rtMemQueueDestroy(int32_t devId, uint32_t qid);
 
 /**
  * @ingroup rt_mem_queue
  * @brief destroy mbuf queue init
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueInit(int32_t device);
+RTS_API rtError_t rtMemQueueInit(int32_t devId);
 
 /**
  * @ingroup rt_mem_queue
  * @brief enqueu mbuf
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
  * @param [in] mbuf   enqueue mbuf
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueEnQueue(int32_t device, uint32_t qid, void *mbuf);
+RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *mbuf);
 
 
 /**
  * @ingroup rt_mem_queue
  * @brief enqueu mbuf
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
  * @param [out] mbuf   dequeue mbuf
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueDeQueue(int32_t device, uint32_t qid, void **mbuf);
+RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **mbuf);
 
 /**
  * @ingroup rt_mem_queue
  * @brief enqueu peek
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
  * @param [out] bufLen   length of mbuf in queue
  * @param [in] timeout  peek timeout  (ms), -1: wait all the time until peeking success
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueuePeek(int32_t device, uint32_t qid, size_t *bufLen, int32_t timeout);
+RTS_API rtError_t rtMemQueuePeek(int32_t devId, uint32_t qid, size_t *bufLen, int32_t timeout);
 
 /**
  * @ingroup rt_mem_queue
  * @brief enqueu  buff
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
  * @param [in] inBuf   enqueue buff
  * @param [in] timeout  enqueue timeout  (ms), -1: wait all the time until enqueue success
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout);
+RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t devId, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout);
 
 /**
  * @ingroup rt_mem_queue
  * @brief enqueu  buff
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
  * @param [out] outBuf   dequeue buff
  * @param [in] timeout  dequeue timeout  (ms), -1: wait all the time until dequeue success
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout);
+RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t devId, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout);
 
 /**
  * @ingroup rt_mem_queue
  * @brief query current queue info
- * @param [in] device   the logical device id
+ * @param [in] devId   the logical device id
  * @param [in] qid  queue id
- * @param [out] queueInfo   current queue info
+ * @param [out] queInfo   current queue info
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtMemQueueQueryInfo(int32_t device, uint32_t qid, rtMemQueueInfo_t *queueInfo);
+RTS_API rtError_t rtMemQueueQueryInfo(int32_t devId, uint32_t qid, rtMemQueueInfo_t *queInfo);
 
 /**
 * @ingroup rt_mem_queue
 * @brief  query queue status
-* @param [in] device: the logical device id
+* @param [in] devId: the logical device id
 * @param [in] cmd: query cmd
 * @param [in] inBuff: input buff
 * @param [in] inLen: the length of input
@@ -295,39 +295,39 @@ RTS_API rtError_t rtMemQueueQueryInfo(int32_t device, uint32_t qid, rtMemQueueIn
 * @param [in|out] outLen: the length of output
 * @return RT_ERROR_NONE for ok
 */
-RTS_API rtError_t rtMemQueueQuery(int32_t device, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen,
+RTS_API rtError_t rtMemQueueQuery(int32_t devId, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen,
     void *outBuff, uint32_t *outLen);
 
 /**
 * @ingroup rt_mem_queue
 * @brief  grant queue
-* @param [in] device: logic devid
+* @param [in] devId: logic devid
 * @param [in] qid: queue id
 * @param [in] pid: pid
 * @param [in] attr: queue share attr
 * @return RT_ERROR_NONE for ok
 */
-RTS_API rtError_t rtMemQueueGrant(int32_t device, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr);
+RTS_API rtError_t rtMemQueueGrant(int32_t devId, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr);
 
 /**
 * @ingroup rt_mem_queue
 * @brief  attach queue
-* @param [in] device: logic devid
+* @param [in] devId: logic devid
 * @param [in] qid: queue id
 * @param [in] timeOut: timeOut
 * @return RT_ERROR_NONE for ok
 */
-RTS_API rtError_t rtMemQueueAttach(int32_t device, uint32_t qid, int32_t timeOut);
+RTS_API rtError_t rtMemQueueAttach(int32_t devId, uint32_t qid, int32_t timeOut);
 
 /**
 * @ingroup rt_mem_queue
 * @brief  Commit the event to a specific process
-* @param [in] device: logic devid
-* @param [in] event: event summary info
+* @param [in] devId: logic devid
+* @param [in] evt: event summary info
 * @param [out] ack: event reply info
 * @return RT_ERROR_NONE for ok
 */
-RTS_API rtError_t rtEschedSubmitEventSync(int32_t device, rtEschedEventSummary_t *event,
+RTS_API rtError_t rtEschedSubmitEventSync(int32_t devId, rtEschedEventSummary_t *evt,
                                           rtEschedEventReply_t *ack);
 
 /**
@@ -411,8 +411,11 @@ typedef struct {
     int32_t pid;
 } rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
 
-typedef union {
-    rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+typedef struct {
+    int32_t cmd;
+    union {
+        rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
+    };
 } rtMemGrpQueryInput_t;
 
 #define RT_MEM_GRP_NAME_LEN 32  // it must be same as driver define BUFF_GRP_NAME_LEN
@@ -460,116 +463,115 @@ RTS_API rtError_t rtMemGrpAttach(const char_t *name, int32_t timeout);
 /**
 * @ingroup rt_mem_queue
 * @brief buff group query
-* @param [in] cmd, cmd type
 * @param [in] input, query input
 * @param [in|out] output, query output
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtMemGrpQuery(int32_t cmd, const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output);
+RTS_API rtError_t rtMemGrpQuery(const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output);
 
 /**
 * @ingroup rt_mem_queue
 * @brief buff group query
-* @param [in] device, cdevice id
+* @param [in] devId, cdevice id
 * @param [in] name, group name
 * @param [out] qid, queue id
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtMemQueueGetQidByName(int32_t device, const char *name, uint32_t *qId);
+RTS_API rtError_t rtMemQueueGetQidByName(int32_t devId, const char_t *name, uint32_t *qId);
 
 /**
 * @ingroup rt_mem_queue
 * @brief esched attach device
-* @param [in] device, device id
+* @param [in] devId, device id
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedAttachDevice(int32_t device);
+RTS_API rtError_t rtEschedAttachDevice(int32_t devId);
 
 /**
 * @ingroup rt_mem_queue
 * @brief esched dettach device
-* @param [in] device, device id
+* @param [in] devId, device id
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedDettachDevice(int32_t device);
+RTS_API rtError_t rtEschedDettachDevice(int32_t devId);
 
 /**
 * @ingroup rt_mem_queue
 * @brief esched wait event
-* @param [in] device, device id
+* @param [in] devId, device id
 * @param [in] grpId, group id
 * @param [in] threadId, thread id
 * @param [in] timeout
-* @param [in] event
+* @param [in] evt
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedWaitEvent(int32_t device, uint32_t grpId, uint32_t threadId,
-                                    int timeout, rtEschedEventSummary_t *event);
+RTS_API rtError_t rtEschedWaitEvent(int32_t devId, uint32_t grpId, uint32_t threadId,
+                                    int32_t timeout, rtEschedEventSummary_t *evt);
 
 /**
 * @ingroup rt_mem_queue
 * @brief esched create group
-* @param [in] device, device id
+* @param [in] devId, device id
 * @param [in] grpId, group id
 * @param [in] type, group type
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedCreateGrp(int32_t device, uint32_t grpId, rtGroupType_t type);
+RTS_API rtError_t rtEschedCreateGrp(int32_t devId, uint32_t grpId, rtGroupType_t type);
 
 /**
 * @ingroup rt_mem_queue
 * @brief esched submit event
-* @param [in] device, device id
-* @param [in] event
+* @param [in] devId, device id
+* @param [in] evt
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedSubmitEvent(int32_t device, rtEschedEventSummary_t *event);
+RTS_API rtError_t rtEschedSubmitEvent(int32_t devId, rtEschedEventSummary_t *evt);
 
 /**
 * @ingroup rt_mem_queue
 * @brief esched submit event
-* @param [in] device, device id
+* @param [in] devId, device id
 * @param [in] grpId, group id
 * @param [in] threadId, thread id
 * @param [in] eventBitmap
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedSubscribeEvent(int32_t device, uint32_t grpId, uint32_t threadId, uint64_t eventBitmap);
+RTS_API rtError_t rtEschedSubscribeEvent(int32_t devId, uint32_t grpId, uint32_t threadId, uint64_t eventBitmap);
 
 /**
 * @ingroup rtEschedAckEvent
 * @brief esched ack event
-* @param [in] device, device id
-* @param [in] eventId, event type
-* @param [in] subeventId, sub event type
+* @param [in] devId, device id
+* @param [in] evtId, event type
+* @param [in] subEvtId, sub event type
 * @param [in] msg, message info
 * @param [in] len, message length
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtEschedAckEvent(int32_t device, rtEventIdType_t eventId,
-                                   uint32_t subeventId, char *msg, uint32_t len);
+RTS_API rtError_t rtEschedAckEvent(int32_t devId, rtEventIdType_t evtId,
+                                   uint32_t subEvtId, char_t *msg, uint32_t len);
 
 /**
 * @ingroup rtQueueSubF2NFEvent
 * @brief full to not full event
-* @param [in] device, device id
+* @param [in] devId, device id
 * @param [in] qid, queue id
 * @param [in] groupId, group id
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtQueueSubF2NFEvent(int32_t device, uint32_t qId, uint32_t groupId);
+RTS_API rtError_t rtQueueSubF2NFEvent(int32_t devId, uint32_t qId, uint32_t groupId);
 
 /**
 * @ingroup rtQueueSubscribe
 * @brief queue subscribe
-* @param [in] device, device id
+* @param [in] devId, device id
 * @param [in] qid, queue id
 * @param [in] groupId, group id
 * @param [in] type
 
 * @return   0 for success, others for fail
 */
-RTS_API rtError_t rtQueueSubscribe(int32_t device, uint32_t qId, uint32_t groupId, int type);
+RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type);
 
 #if defined(__cplusplus)
 }
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index 9486639d..2807a705 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: rt_model.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_RT_MODEL_H
@@ -320,7 +310,7 @@ typedef struct tagLabelDevInfo_t {
     }u;
 }rtLabelDevInfo;
 
-typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);
+typedef rtError_t (*rtTaskGenCallback)(rtModel_t mdl, rtTaskInfo_t *taskInfo);
 
 /**
  * @ingroup rt_model
@@ -334,165 +324,165 @@ RTS_API rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback);
 /**
  * @ingroup rt_model
  * @brief create model instance
- * @param [out]    model   created model
+ * @param [out]    mdl     created model
  * @param [in]     flag    reserved
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelCreate(rtModel_t *model, uint32_t flag);
+RTS_API rtError_t rtModelCreate(rtModel_t *mdl, uint32_t flag);
 
 /**
  * @ingroup rt_model
  * @brief destroy model instance
- * @param [in] model   model to destroy
+ * @param [in] mdl   model to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelDestroy(rtModel_t model);
+RTS_API rtError_t rtModelDestroy(rtModel_t mdl);
 
 /**
  * @ingroup rt_model
  * @brief bind model and stream instance
- * @param [in] model   binded model
- * @param [in] stream  binded stream
+ * @param [in] mdl   binded model
+ * @param [in] stm  binded stream
  * @param [in] flag    reserved
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelBindStream(rtModel_t model, rtStream_t stream, uint32_t flag);
+RTS_API rtError_t rtModelBindStream(rtModel_t mdl, rtStream_t stm, uint32_t flag);
 
 /**
  * @ingroup rt_model
  * @brief unbind model and stream instance
- * @param [in] model   unbinded model
- * @param [in] stream  unbinded stream
+ * @param [in] mdl   unbinded model
+ * @param [in] stm  unbinded stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelUnbindStream(rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtModelUnbindStream(rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup rt_model
  * @brief tell runtime Model has been Loaded
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtModelLoadComplete(rtModel_t model);
+RTS_API rtError_t rtModelLoadComplete(rtModel_t mdl);
 
 /**
  * @ingroup rt_model
  * @brief execute model instance
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t flag);
+RTS_API rtError_t rtModelExecute(rtModel_t mdl, rtStream_t stm, uint32_t flag);
 
 /**
  * @ingroup rt_model
  * @brief get model the last persist task id
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [out] taskId last task id of the model
  * @param [out] streamId last steam id of the model
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskId, uint32_t *streamId);
+RTS_API rtError_t rtModelGetTaskId(rtModel_t mdl, uint32_t *taskId, uint32_t *streamId);
 
 /**
  * @ingroup rt_model
  * @brief add a end graph task to stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] end graph stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEndGraph(rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtEndGraph(rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup rt_model
  * @brief add a end graph task with flag to stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] end graph stream
  * @param [in] flags   AICPU datadump
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtEndGraphEx(rtModel_t model, rtStream_t stream, uint32_t flags);
+RTS_API rtError_t rtEndGraphEx(rtModel_t mdl, rtStream_t stm, uint32_t flags);
 
 /**
  * @ingroup rt_model
  * @brief add a end graph task to stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] flags EXECUTOR_TS | EXECUTOR_AICPU
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelExecutorSet(rtModel_t model, uint8_t flags);
+RTS_API rtError_t rtModelExecutorSet(rtModel_t mdl, uint8_t flags);
 
 /**
  * @ingroup rt_model
  * @brief abort model
- * @param [in] model   model to abort
+ * @param [in] mdl   model to abort
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelAbort(rtModel_t model);
+RTS_API rtError_t rtModelAbort(rtModel_t mdl);
 
 /**
  * @ingroup rt_model
  * @brief end graph task to model default stream
- * @param [in] model   model to execute
+ * @param [in] mdl   model to execute
  * @param [in] end graph stream
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelExit(rtModel_t model, rtStream_t stream);
+RTS_API rtError_t rtModelExit(rtModel_t mdl, rtStream_t stm);
 
 /**
  * @ingroup rt_model
  * @brief bind queue
- * @param [in] model     model to bind
+ * @param [in] mdl     model to bind
  * @param [in] queueId   queueId to bind
  * @param [in] flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelBindQueue(rtModel_t model, uint32_t queueId, rtModelQueueFlag_t flag);
+RTS_API rtError_t rtModelBindQueue(rtModel_t mdl, uint32_t queueId, rtModelQueueFlag_t flag);
 
 /**
  * @ingroup rt_model
  * @brief get model id
- * @param [in] model
+ * @param [in] mdl
  * @param [out] modelId   model id
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtModelGetId(rtModel_t model, uint32_t *modelId);
+RTS_API rtError_t rtModelGetId(rtModel_t mdl, uint32_t *modelId);
 
 /*
  * @ingroup rt_model
  * @brief enable debug for dump overflow exception
  * @param [in] addr: ddr address of kernel exception dumpped
- * @param [in] model: model handle
+ * @param [in] mdl: model handle
  * @param [in] flag: debug flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr,
+RTS_API rtError_t rtDebugRegister(rtModel_t mdl, uint32_t flag, const void *addr,
                                   uint32_t *streamId, uint32_t *taskId);
 
 /*
  * @ingroup rt_model
  * @brief disable debug for dump overflow exception
- * @param [in] model: model handle
+ * @param [in] mdl: model handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
+RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl);
 
 #if defined(__cplusplus)
 }
 #endif
 
-#endif  // CCE_RUNTIME_RT_MODEL_H
\ No newline at end of file
+#endif  // CCE_RUNTIME_RT_MODEL_H
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h
index 857006b5..a9c91897 100644
--- a/third_party/fwkacllib/inc/runtime/rt_stars.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -1,17 +1,6 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: the definition of stars
  */
 
 #ifndef CCE_RUNTIME_RT_STARS_H
@@ -29,10 +18,10 @@ extern "C" {
  * used for send star sqe directly.
  * @param [in] taskSqe     stars task sqe
  * @param [in] sqeLen      stars task sqe length
- * @param [in] stream      associated stream
+ * @param [in] stm      associated stream
  * @return RT_ERROR_NONE for ok, others failed
  */
-RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);
+RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stm);
 
 
 /**
@@ -71,11 +60,11 @@ RTS_API rtError_t rtCdqAllocBatch(const char_t *queName, int32_t timeout, uint32
  * @param [in] cdqeIndex    cdqe index
  * @param [in] data         cdqe infomation
  * @param [in] dataSize     data size
- * @param [in] stream       launch task on the stream
+ * @param [in] stm       launch task on the stream
  * @return RT_ERROR_NONE for ok, others failed
  */
 RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize,
-    rtStream_t stream);
+    rtStream_t stm);
 
 /**
  * @ingroup rt_stars
@@ -85,11 +74,11 @@ RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void *
  * @param [in] cdqeIndex    cdqe index
  * @param [in] data         cdqe infomation
  * @param [in] dataSize     data size
- * @param [in] stream       launch task on the stream
+ * @param [in] stm       launch task on the stream
  * @return RT_ERROR_NONE for ok, others failed
  */
 RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr,
-    rtStream_t stream);
+    rtStream_t stm);
 
 #if defined(__cplusplus)
 
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars_define.h b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
index 861af80b..260809af 100644
--- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
@@ -1,17 +1,6 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description: the definition of stars
  */
 
 #ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
@@ -83,9 +72,9 @@ typedef struct tagFftsPlusSqe {
     uint16_t  aicPrefetchUpper : 5;
     uint16_t  reserved12 : 3;
     uint16_t  aivPrefetchLower : 5;
-    uint16_t  Reserved13 : 3;
+    uint16_t  reserved13 : 3;
     uint16_t  aivPrefetchUpper : 5;
-    uint16_t  Reserved14 : 3;
+    uint16_t  reserved14 : 3;
     // 40-47 bytes
     uint32_t contextAddressBaseL;
     uint32_t contextAddressBaseH : 17;
diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h
index a7ca9ebb..026b0155 100644
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -1,17 +1,7 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+ * Description: stream.h
+ * Create: 2020-01-01
  */
 
 #ifndef CCE_RUNTIME_STREAM_H
@@ -53,70 +43,70 @@ extern "C" {
 /**
  * @ingroup dvrt_stream
  * @brief create stream instance
- * @param [in|out] stream   created stream
+ * @param [in|out] stm   created stream
  * @param [in] priority   stream priority
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamCreate(rtStream_t *stream, int32_t priority);
+RTS_API rtError_t rtStreamCreate(rtStream_t *stm, int32_t priority);
 
 /**
  * @ingroup dvrt_stream
  * @brief create stream instance
- * @param [in|out] stream   created stream
+ * @param [in|out] stm   created stream
  * @param [in] priority   stream priority
  * @param [in] flags  stream op flags
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamCreateWithFlags(rtStream_t *stream, int32_t priority, uint32_t flags);
+RTS_API rtError_t rtStreamCreateWithFlags(rtStream_t *stm, int32_t priority, uint32_t flags);
 
 /**
  * @ingroup dvrt_stream
  * @brief destroy stream instance.
- * @param [in] stream   the stream to destroy
+ * @param [in] stm   the stream to destroy
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamDestroy(rtStream_t stream);
+RTS_API rtError_t rtStreamDestroy(rtStream_t stm);
 
 /**
  * @ingroup dvrt_stream
  * @brief wait an recorded event for stream
- * @param [in] stream   the wait stream
+ * @param [in] stm   the wait stream
  * @param [in] event   the event to wait
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event);
+RTS_API rtError_t rtStreamWaitEvent(rtStream_t stm, rtEvent_t evt);
 
 /**
  * @ingroup dvrt_stream
  * @brief wait stream to be complete
- * @param [in] stream   stream to wait
+ * @param [in] stm   stream to wait
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamSynchronize(rtStream_t stream);
+RTS_API rtError_t rtStreamSynchronize(rtStream_t stm);
 
 /**
  * @ingroup dvrt_stream
  * @brief queries an asynchronous stream for completion status
- * @param [in] stream   stream to query
+ * @param [in] stm   stream to query
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_STREAM_NOT_COMPLETE for not complete
  */
-RTS_API rtError_t rtStreamQuery(rtStream_t stream);
+RTS_API rtError_t rtStreamQuery(rtStream_t stm);
 
 /**
  * @ingroup dvrt_stream
  * @brief get stream id from a stream handle
- * @param [in] stream   stream hadle
+ * @param [in] stm   stream hadle
  * @param [in] streamId   stream id
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtGetStreamId(rtStream_t stream, int32_t *streamId);
+RTS_API rtError_t rtGetStreamId(rtStream_t stm, int32_t *streamId);
 
 /**
  * @ingroup dvrt_stream
@@ -132,26 +122,26 @@ RTS_API rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *maxStrCou
 /**
  * @ingroup dvrt_stream
  * @brief Name a stream
- * @param [in] stream  stream to be named
+ * @param [in] stm  stream to be named
  * @param [in] name   identification name
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtNameStream(rtStream_t stream, const char_t *name);
+RTS_API rtError_t rtNameStream(rtStream_t stm, const char_t *name);
 
 /**
  * @ingroup dvrt_stream
  * @brief switch to the corresponding stream according to the contents of the ptr
  * @param [in] ptr  Determine the address where the value of the true and false branches is located
  * @param [in] condition switch condition
- * @param [in] value  switch value
+ * @param [in] val  switch value
  * @param [in] trueStream  Stream that needs to be activated when the value is non-zero
- * @param [in] stream input stream to init task
+ * @param [in] stm input stream to init task
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t trueStream,
-                                 rtStream_t stream);
+RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t val, rtStream_t trueStream,
+                                 rtStream_t stm);
 
 /**
  * @brief execute extensible stream switch task
@@ -159,22 +149,22 @@ RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t val
  * @param [in] condition   judge condition
  * @param [in] value_ptr   pointer of target value
  * @param [in] true_stream   stream to be activated when value is not zero
- * @param [in] stream   stream id
+ * @param [in] stm   stream id
  * @param [in] dataType   data type of target value
  * @return RT_ERROR_NONE for complete
  */
 RTS_API rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *valuePtr, rtStream_t trueStream,
-                                   rtStream_t stream, rtSwitchDataType_t dataType);
+                                   rtStream_t stm, rtSwitchDataType_t dataType);
 
 /**
  * @ingroup dvrt_stream
  * @brief Active a stream
  * @param [in] activeStream stream to be activated
- * @param [in] stream input stream to init task
+ * @param [in] stm input stream to init task
  * @return RT_ERROR_NONE for complete
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
+RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stm);
 
 /**
  * @brief execute extensible stream case switch task
@@ -183,36 +173,36 @@ RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
  * @param [in] valuePtr  pointer of target value, length = size * elementSize
  * @param [in] trueStreamPtr streams to be activated
  * @param [in] elementSize  size of to be activated true streams
- * @param [in] stream input stream to init task
+ * @param [in] stm input stream to init task
  * @param [in] dataType   data type of target value
  * @return RT_ERROR_NONE for complete
  */
 RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
-                                  uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
+                                  uint32_t elementSize, rtStream_t stm, rtSwitchDataType_t dataType);
 
 /*
  * @ingroup dvrt_stream
  * @brief enable debug for dump overflow exception with stream
  * @param [in] addr: ddr address of kernel exception dumpped
- * @param [in] stream: stream handle
+ * @param [in] stm: stream handle
  * @param [in] flag: debug flag
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr,
+RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const void *addr,
                                    uint32_t *streamId, uint32_t *taskId);
 
 /*
  * @ingroup rt_model
  * @brief disable debug for dump overflow exception with stream
- * @param [in] stream: stream handle
+ * @param [in] stm: stream handle
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
+RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stm);
 
 #if defined(__cplusplus)
 }
 #endif
 
-#endif  // CCE_RUNTIME_STREAM_H
\ No newline at end of file
+#endif  // CCE_RUNTIME_STREAM_H
diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
deleted file mode 100644
index 975043b3..00000000
--- a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef EXTERNALSOFTDP_H
-#define EXTERNALSOFTDP_H
-
-#include <stdint.h>
-
-extern "C" {
-struct SoftDpProcsessInfo {
-    uint8_t* inputBuffer;
-    uint32_t inputBufferSize;
-
-    uint8_t* outputBuffer;
-    uint32_t outputBufferSize;
-
-    uint32_t outputWidth;
-    uint32_t outputHeight;
-
-    uint32_t reserved;
-};
-
-struct DpCropInfo {
-    uint32_t left;
-    uint32_t right;
-    uint32_t up;
-    uint32_t down;
-};
-
-/*
- * @brief decode and resize interface
- * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
- * @return success: return 0, fail: return error number
- */
-uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo);
-
-/*
- * @brief decode crop and resize interface
- * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
- * @param [in] const DpCropInfo& cropInfo: crop struct
- * @return success: return 0, fail: return error number
- */
-uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo);
-}
-#endif // EXTERNALSOFTDP_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h
index e2391f72..ca428e6a 100644
--- a/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_callback.h
@@ -18,15 +18,15 @@
 #define ADX_DATADUMP_CALLBACK_H
 #include <cstdint>
 namespace Adx {
-    const uint32_t MAX_FILE_PATH_LENGTH = 4096;
-    struct DumpChunk {
-        char fileName[MAX_FILE_PATH_LENGTH];
-        uint32_t bufLen;
-        uint32_t isLastChunk;
-        int64_t offset;
-        int32_t flag;
-        uint8_t dataBuf[0];
-    };
+const uint32_t MAX_FILE_PATH_LENGTH          = 4096;
+struct DumpChunk {
+    char       fileName[MAX_FILE_PATH_LENGTH];   // file name, absolute path
+    uint32_t   bufLen;                           // dataBuf length
+    uint32_t   isLastChunk;                      // is last chunk. 0: not 1: yes
+    int64_t    offset;                           // Offset in file. -1: append write
+    int32_t    flag;                             // flag
+    uint8_t    dataBuf[0];                       // data buffer
+};
 
     int AdxRegDumpProcessCallBack(int (* const messageCallback) (const Adx::DumpChunk *, int));
     void AdxUnRegDumpProcessCallBack();
diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
index 80f4baab..8ecd5f14 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -18,62 +18,62 @@
 #define MSPROFILER_API_PROF_ACL_API_H_
 
 // DataTypeConfig
-#define PROF_ACL_API                0x00000001
-#define PROF_TASK_TIME              0x00000002
-#define PROF_AICORE_METRICS         0x00000004
-#define PROF_AICPU_TRACE            0x00000008
-#define PROF_L2CACHE                0x00000010
-#define PROF_HCCL_TRACE             0x00000020
-#define PROF_TRAINING_TRACE         0x00000040
+#define PROF_ACL_API                0x00000001ULL
+#define PROF_TASK_TIME              0x00000002ULL
+#define PROF_AICORE_METRICS         0x00000004ULL
+#define PROF_AICPU_TRACE            0x00000008ULL
+#define PROF_L2CACHE                0x00000010ULL
+#define PROF_HCCL_TRACE             0x00000020ULL
+#define PROF_TRAINING_TRACE         0x00000040ULL
 
 // system profilinig switch
-#define PROF_CPU                    0x00010000
-#define PROF_HARDWARE_MEMORY        0x00020000
-#define PROF_IO                     0x00040000
-#define PROF_INTER_CONNECTION       0x00080000
-#define PROF_DVPP                   0x00100000
-#define PROF_SYS_AICORE_SAMPLE      0x00200000
-#define PROF_AIVECTORCORE_SAMPLE    0x00400000
-
-#define PROF_MODEL_EXECUTE          0x0000001000000
-#define PROF_RUNTIME_API            0x0000002000000
-#define PROF_RUNTIME_TRACE          0x0000004000000
-#define PROF_SCHEDULE_TIMELINE      0x0000008000000
-#define PROF_SCHEDULE_TRACE         0x0000010000000
-#define PROF_AIVECTORCORE_METRICS   0x0000020000000
-#define PROF_SUBTASK_TIME           0x0000040000000
-
-#define PROF_TASK_TRACE             0x0000005000062
-
-#define PROF_MODEL_LOAD             0x8000000000000000
+#define PROF_CPU                    0x00010000ULL
+#define PROF_HARDWARE_MEMORY        0x00020000ULL
+#define PROF_IO                     0x00040000ULL
+#define PROF_INTER_CONNECTION       0x00080000ULL
+#define PROF_DVPP                   0x00100000ULL
+#define PROF_SYS_AICORE_SAMPLE      0x00200000ULL
+#define PROF_AIVECTORCORE_SAMPLE    0x00400000ULL
+
+#define PROF_MODEL_EXECUTE          0x0000001000000ULL
+#define PROF_RUNTIME_API            0x0000002000000ULL
+#define PROF_RUNTIME_TRACE          0x0000004000000ULL
+#define PROF_SCHEDULE_TIMELINE      0x0000008000000ULL
+#define PROF_SCHEDULE_TRACE         0x0000010000000ULL
+#define PROF_AIVECTORCORE_METRICS   0x0000020000000ULL
+#define PROF_SUBTASK_TIME           0x0000040000000ULL
+
+#define PROF_TASK_TRACE             0x0000005000062ULL
+
+#define PROF_MODEL_LOAD             0x8000000000000000ULL
 
 // DataTypeConfig MASK
-#define PROF_ACL_API_MASK                0x00000001
-#define PROF_TASK_TIME_MASK              0x00000002
-#define PROF_AICORE_METRICS_MASK         0x00000004
-#define PROF_AICPU_TRACE_MASK            0x00000008
-#define PROF_L2CACHE_MASK                0x00000010
-#define PROF_HCCL_TRACE_MASK             0x00000020
-#define PROF_TRAINING_TRACE_MASK         0x00000040
+#define PROF_ACL_API_MASK                0x00000001ULL
+#define PROF_TASK_TIME_MASK              0x00000002ULL
+#define PROF_AICORE_METRICS_MASK         0x00000004ULL
+#define PROF_AICPU_TRACE_MASK            0x00000008ULL
+#define PROF_L2CACHE_MASK                0x00000010ULL
+#define PROF_HCCL_TRACE_MASK             0x00000020ULL
+#define PROF_TRAINING_TRACE_MASK         0x00000040ULL
 
 // system profilinig mask
-#define PROF_CPU_MASK                    0x00010000
-#define PROF_HARDWARE_MEMORY_MASK        0x00020000
-#define PROF_IO_MASK                     0x00040000
-#define PROF_INTER_CONNECTION_MASK       0x00080000
-#define PROF_DVPP_MASK                   0x00100000
-#define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000
-#define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000
-
-#define PROF_MODEL_EXECUTE_MASK          0x0000001000000
-#define PROF_RUNTIME_API_MASK            0x0000002000000
-#define PROF_RUNTIME_TRACE_MASK          0x0000004000000
-#define PROF_SCHEDULE_TIMELINE_MASK      0x0000008000000
-#define PROF_SCHEDULE_TRACE_MASK         0x0000010000000
-#define PROF_AIVECTORCORE_METRICS_MASK   0x0000020000000
-#define PROF_SUBTASK_TIME_MASK           0x0000040000000
-
-#define PROF_MODEL_LOAD_MASK             0x8000000000000000
+#define PROF_CPU_MASK                    0x00010000ULL
+#define PROF_HARDWARE_MEMORY_MASK        0x00020000ULL
+#define PROF_IO_MASK                     0x00040000ULL
+#define PROF_INTER_CONNECTION_MASK       0x00080000ULL
+#define PROF_DVPP_MASK                   0x00100000ULL
+#define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000ULL
+#define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000ULL
+
+#define PROF_MODEL_EXECUTE_MASK          0x0000001000000ULL
+#define PROF_RUNTIME_API_MASK            0x0000002000000ULL
+#define PROF_RUNTIME_TRACE_MASK          0x0000004000000ULL
+#define PROF_SCHEDULE_TIMELINE_MASK      0x0000008000000ULL
+#define PROF_SCHEDULE_TRACE_MASK         0x0000010000000ULL
+#define PROF_AIVECTORCORE_METRICS_MASK   0x0000020000000ULL
+#define PROF_SUBTASK_TIME_MASK           0x0000040000000ULL
+
+#define PROF_MODEL_LOAD_MASK             0x8000000000000000ULL
 
 #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
 #define MSVP_PROF_API __declspec(dllexport)
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
index cb531417..5f5a151b 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -43,7 +43,7 @@ enum MsprofErrorCode {
     MSPROF_ERROR,
 };
 
-#define MSPROF_ENGINE_MAX_TAG_LEN (31)
+#define MSPROF_ENGINE_MAX_TAG_LEN (63)
 
 /**
  * @name  ReporterData
diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
deleted file mode 100644
index 4ba835b6..00000000
--- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/** @defgroup aoe aoe调优接口 */
-#ifndef TUNE_API_H
-#define TUNE_API_H
-#include <map>
-#include <string>
-#include "ge/ge_api.h"
-#include "aoe_types.h"
-
-/**
- * @ingroup aoe
- * @par 描述: 命令行调优
- *
- * @attention 无
- * @param  option [IN] 调优参数
- * @param  msg [OUT] 调优异常下返回信息
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std::string &msg);
-
-/**
- * @ingroup aoe
- * @par 描述: 调优初始化
- *
- * @attention 无
- * @param  session [IN] ge连接会话
- * @param  option [IN] 参数集. 包含调优参数及ge参数
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<std::string, std::string> &option);
-
-/**
- * @ingroup aoe
- * @par 描述: 调优去初始化
- *
- * @attention 无
- * @param  无
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" AoeStatus AoeOnlineFinalize();
-
-/**
- * @ingroup aoe
- * @par 描述: 调优处理
- *
- * @attention 无
- * @param  tuningGraph [IN] 调优图
- * @param  dependGraph [IN] 调优依赖图
- * @param  session [IN] ge连接会话
- * @param  option [IN] 参数集. 包含调优参数及ge参数
- * @retval #AOE_SUCCESS 执行成功
- * @retval #AOE_FAILURE 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" AoeStatus AoeOnlineTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
-    ge::Session *session, const std::map<std::string, std::string> &option);
-#endif